Source code for peptdeep.rescore.fdr
import numba
import numpy as np
import pandas as pd
[docs]
@numba.njit
def fdr_to_q_values(fdr_values: np.ndarray) -> np.ndarray:
"""convert FDR values to q_values.
Parameters
----------
fdr_values : np.ndarray
FDR values, they should be
sorted according to the descending order of the `score`
Returns
-------
np.ndarray
q_values
"""
q_values = np.zeros_like(fdr_values)
min_q_value = np.max(fdr_values)
for i in range(len(fdr_values) - 1, -1, -1):
fdr = fdr_values[i]
if fdr < min_q_value:
min_q_value = fdr
q_values[i] = min_q_value
return q_values
[docs]
def calc_fdr(
df: pd.DataFrame, score_column: str, decoy_column: str = "decoy"
) -> pd.DataFrame:
"""Calculate FDR values (q_values in fact) for the given dataframe
Parameters
----------
df : pd.DataFrame
PSM dataframe to calculate FDRs
score_column : str
score column to sort in decending order
decoy_column : str, optional
decoy column in the dataframe.
1=target, 0=decoy. Defaults to 'decoy'.
Returns
-------
pd.DataFrame
PSM dataframe with 'fdr' column added
"""
df = df.reset_index(drop=True).sort_values(
[score_column, decoy_column], ascending=False
)
target_values = 1 - df[decoy_column].values
decoy_cumsum = np.cumsum(df[decoy_column].values)
target_cumsum = np.cumsum(target_values)
fdr_values = decoy_cumsum / target_cumsum
df["fdr"] = fdr_to_q_values(fdr_values)
return df
# wrapper
calc_fdr_for_df = calc_fdr
[docs]
@numba.njit
def fdr_from_ref(
sorted_scores: np.ndarray, ref_scores: np.ndarray, ref_fdr_values: np.ndarray
) -> np.ndarray:
"""Calculate FDR values from the given reference scores and fdr_values.
It is used to extend peptide-level or sequence-level FDR (reference)
to each PSM, as PSMs are more useful for quantification.
Parameters
----------
sorted_scores : np.array
the scores to calculate FDRs,
they must be sorted in decending order.
ref_scores : np.array
reference scores that used to
calculate ref_fdr_values, also sorted in decending order.
ref_fdr_values : np.array
fdr values corresponding to ref_scores
Returns
-------
np.array
fdr values corresponding to sorted_scores.
"""
q_values = np.zeros_like(sorted_scores)
i, j = 0, 0
while i < len(sorted_scores) and j < len(ref_scores):
if sorted_scores[i] >= ref_scores[j]:
q_values[i] = ref_fdr_values[j]
i += 1
else:
j += 1
while i < len(sorted_scores):
q_values[i] = ref_fdr_values[-1]
i += 1
return q_values
[docs]
def calc_fdr_from_ref(
df: pd.DataFrame,
ref_scores: np.ndarray,
ref_fdr_values: np.ndarray,
score_column: str,
decoy_column: str = "decoy",
) -> pd.DataFrame:
"""Calculate FDR values for a PSM dataframe from the given reference
scores and fdr_values. It is used to extend peptide-level or
sequence-level FDR (reference) to each PSM, as PSMs are more useful
for quantification.
``
Parameters
----------
df : pd.DataFrame
PSM dataframe
ref_scores : np.array
reference scores that used to
calculate ref_fdr_values, also sorted in decending order.
ref_fdr_values : np.array
fdr values corresponding to ref_scores
score_column : str
score column in the dataframe
decoy_column : str, optional
decoy column in the dataframe.
1=target, 0=decoy. Defaults to 'decoy'.
Returns
-------
pd.DataFrame
dataframe with 'fdr' column added
"""
df = df.reset_index(drop=True).sort_values(
[score_column, decoy_column], ascending=False
)
sorted_idxes = np.argsort(ref_fdr_values)
ref_scores = ref_scores[sorted_idxes]
ref_q_values = ref_fdr_values[sorted_idxes]
df["fdr"] = fdr_from_ref(df.score.values, ref_scores, ref_q_values)
return df
calc_fdr_from_ref_for_df = calc_fdr_from_ref