from alphabase.protein.fasta import SpecLibFasta
from peptdeep.spec_lib.predict_lib import PredictSpecLib
from peptdeep.pretrained_models import ModelManager
[docs]
class PredictSpecLibFasta(SpecLibFasta, PredictSpecLib):
"""
Predicted spec lib from fasta files or other peptide files.
"""
[docs]
def __init__(
self,
model_manager: ModelManager = None,
*,
charged_frag_types: list = ["b_z1", "b_z2", "y_z1", "y_z2"],
protease: str = "trypsin",
max_missed_cleavages: int = 2,
peptide_length_min: int = 7,
peptide_length_max: int = 35,
precursor_charge_min: int = 2,
precursor_charge_max: int = 4,
precursor_mz_min: float = 400.0,
precursor_mz_max: float = 1800.0,
var_mods: list = ["Acetyl@Protein_N-term", "Oxidation@M"],
min_var_mod_num: int = 0,
max_var_mod_num: int = 2,
fix_mods: list = ["Carbamidomethyl@C"],
labeling_channels: dict = None,
special_mods: list = [],
min_special_mod_num: int = 0,
max_special_mod_num: int = 1,
special_mods_cannot_modify_pep_n_term: bool = False,
special_mods_cannot_modify_pep_c_term: bool = False,
decoy: str = None, # or pseudo_reverse or diann
include_contaminants: bool = False,
I_to_L=False,
generate_precursor_isotope: bool = False,
rt_to_irt: bool = False,
):
"""
Parameters
----------
model_manager : ModelManager, optional
ModelManager of MS2/RT/CCS... models, by default None
charged_frag_types : list, optional
Fragment types with charge,
by default [ 'b_z1','b_z2','y_z1', 'y_z2' ]
protease : str, optional
Could be pre-defined protease name defined in :data:`protease_dict`,
or a regular expression.
By default 'trypsin'
max_missed_cleavages : int, optional
Maximal missed cleavages, by default 2
peptide_length_min : int, optional
Minimal cleaved peptide length, by default 7
peptide_length_max : int, optional
Maximal cleaved peptide length, by default 35
precursor_charge_min : int, optional
Minimal precursor charge, by default 2
precursor_charge_max : int, optional
Maximal precursor charge, by default 4
precursor_mz_min : float, optional
Minimal precursor mz, by default 200.0
precursor_mz_max : float, optional
Maximal precursor mz, by default 2000.0
var_mods : list, optional
list of variable modifications,
by default ['Acetyl@Protein_N-term','Oxidation@M']
max_var_mod_num : int, optional
Minimal number of variable modifications on a peptide sequence,
by default 0
max_var_mod_num : int, optional
Maximal number of variable modifications on a peptide sequence,
by default 2
fix_mods : list, optional
list of fixed modifications, by default ['Carbamidomethyl@C']
labeling_channels : dict, optional
Add isotope labeling with different channels,
see :meth:`add_peptide_labeling()`.
Defaults to None
special_mods : list, optional
Special modifications.
It is useful for modificaitons like Phospho which may largely
explode the number of candidate modified peptides.
The number of special_mods per peptide
is controlled by `max_append_mod_num`.
Defaults to [].
min_special_mod_num : int, optional
Control the min number of special_mods per peptide, by default 0.
max_special_mod_num : int, optional
Control the max number of special_mods per peptide, by default 1.
special_mods_cannot_modify_pep_c_term : bool, optional
Some modifications cannot modify the peptide C-term,
this will be useful for GlyGly@K as if C-term is di-Glyed,
it cannot be cleaved/digested.
Defaults to False.
special_mods_cannot_modify_pep_n_term : bool, optional
Similar to `special_mods_cannot_modify_pep_c_term`, but at_N-term.
Defaults to False.
decoy : str, optional
Decoy type, see `alphabase.spectral_library.decoy_library`,
by default None
include_contaminants : bool, optional
If include contaminants.fasta, by default False
generate_precursor_isotope : bool, optional
If :meth:`peptdeep.spec_lib.predict_lib.PredictSpecLib.predict_all()`
includes :meth:`peptdeep.spec_lib.predict_lib.PredictSpecLib.calc_precursor_isotope()`.
Defaults to False
rt_to_irt : bool, optional
If convert predicted RT to iRT values, by default False
"""
SpecLibFasta.__init__(
self,
charged_frag_types=charged_frag_types,
protease=protease,
max_missed_cleavages=max_missed_cleavages,
peptide_length_min=peptide_length_min,
peptide_length_max=peptide_length_max,
precursor_charge_min=precursor_charge_min,
precursor_charge_max=precursor_charge_max,
precursor_mz_min=precursor_mz_min,
precursor_mz_max=precursor_mz_max,
var_mods=var_mods,
min_var_mod_num=min_var_mod_num,
max_var_mod_num=max_var_mod_num,
fix_mods=fix_mods,
labeling_channels=labeling_channels,
special_mods=special_mods,
min_special_mod_num=min_special_mod_num,
max_special_mod_num=max_special_mod_num,
special_mods_cannot_modify_pep_n_term=special_mods_cannot_modify_pep_n_term,
special_mods_cannot_modify_pep_c_term=special_mods_cannot_modify_pep_c_term,
decoy=decoy,
include_contaminants=include_contaminants,
I_to_L=I_to_L,
)
PredictSpecLib.__init__(
self,
model_manager=model_manager,
charged_frag_types=self.charged_frag_types,
precursor_mz_min=self.min_precursor_mz,
precursor_mz_max=self.max_precursor_mz,
decoy=self.decoy,
generate_precursor_isotope=generate_precursor_isotope,
rt_to_irt=rt_to_irt,
)
if self.model_manager is None:
if model_manager is None:
self.model_manager = ModelManager()
self.model_manager.reset_by_global_settings()
else:
print(
"Oops, `PredictSpecLibFasta.model_manager` is None, while it should not happen"
)
self.model_manager = model_manager
[docs]
def add_charge(self):
if self.model_manager.charge_model is None:
super().add_charge()
else:
print(f"Predicting charge states for {len(self.precursor_df)} peptides ...")
if self.model_manager.use_predicted_charge_in_speclib:
self._precursor_df = (
self.model_manager.charge_model.predict_and_clip_charges(
self.precursor_df,
min_precursor_charge=self.min_precursor_charge,
max_precursor_charge=self.max_precursor_charge,
charge_prob_cutoff=self.model_manager.charge_prob_cutoff,
)
)
else:
self._precursor_df = (
self.model_manager.charge_model.predict_charges_as_prob(
self.precursor_df,
min_precursor_charge=self.min_precursor_charge,
max_precursor_charge=self.max_precursor_charge,
)
)