Source code for peptdeep.model.ccs

import torch
import pandas as pd
import typing


from alphabase.peptide.mobility import ccs_to_mobility_for_df, mobility_to_ccs_for_df

import peptdeep.model.base as model_base
from peptdeep.utils import evaluate_linear_regression


[docs] class Model_CCS_Bert(torch.nn.Module): """ Transformer model for CCS prediction """
[docs] def __init__( self, dropout=0.1, nlayers=4, hidden=128, output_attentions=False, **kwargs, ): super().__init__() self.dropout = torch.nn.Dropout(dropout) self.input_nn = model_base.AATransformerEncoding(hidden - 2) self._output_attentions = output_attentions self.hidden_nn = model_base.Hidden_HFace_Transformer( hidden, nlayers=nlayers, dropout=dropout, output_attentions=output_attentions, ) self.output_nn = torch.nn.Sequential( model_base.SeqAttentionSum(hidden), torch.nn.PReLU(), self.dropout, torch.nn.Linear(hidden, 1), )
@property def output_attentions(self): return self._output_attentions @output_attentions.setter def output_attentions(self, val: bool): self._output_attentions = val self.hidden_nn.output_attentions = val
[docs] def forward( self, aa_indices, mod_x, charges: torch.Tensor, ): x = self.dropout(self.input_nn(aa_indices, mod_x)) charges = charges.unsqueeze(1).repeat(1, x.size(1), 2) x = torch.cat((x, charges), 2) hidden_x = self.hidden_nn(x) if self.output_attentions: self.attentions = hidden_x[1] else: self.attentions = None x = self.dropout(hidden_x[0] + x * 0.2) return self.output_nn(x).squeeze(1)
[docs] class Model_CCS_LSTM(torch.nn.Module): """LSTM model for CCS prediction"""
[docs] def __init__(self, dropout=0.1): super().__init__() self.dropout = torch.nn.Dropout(dropout) hidden = 256 self.ccs_encoder = model_base.Encoder_26AA_Mod_Charge_CNN_LSTM_AttnSum(hidden) self.ccs_decoder = model_base.Decoder_Linear(hidden + 1, 1)
[docs] def forward( self, aa_indices, mod_x, charges, ): x = self.ccs_encoder(aa_indices, mod_x, charges) x = self.dropout(x) x = torch.cat((x, charges), 1) return self.ccs_decoder(x).squeeze(1)
[docs] def ccs_to_mobility_pred_df(precursor_df: pd.DataFrame) -> pd.DataFrame: """Add 'mobility_pred' into precursor_df inplace""" precursor_df["mobility_pred"] = ccs_to_mobility_for_df(precursor_df, "ccs_pred") return precursor_df
[docs] def mobility_to_ccs_df_(precursor_df: pd.DataFrame) -> pd.DataFrame: """Add 'ccs' into precursor_df inplace""" precursor_df["ccs"] = mobility_to_ccs_for_df(precursor_df, "mobility") return precursor_df
[docs] class AlphaCCSModel(model_base.ModelInterface): """ `ModelInterface` for `Model_CCS_LSTM` or `Model_CCS_Bert` """
[docs] def __init__( self, dropout=0.1, model_class: torch.nn.Module = Model_CCS_LSTM, device: str = "gpu", **kwargs, ): super().__init__(device=device) self.model: Model_CCS_LSTM = None self.build(model_class, dropout=dropout, **kwargs) self.charge_factor = 0.1 self.target_column_to_predict = "ccs_pred" self.target_column_to_train = "ccs"
[docs] def test( self, precursor_df: pd.DataFrame, *, x: typing.Literal["ccs_pred", "mobility_pred"] = "ccs_pred", y: typing.Literal["ccs", "mobility"] = "ccs", batch_size: int = 1024, ): return evaluate_linear_regression( self.predict(precursor_df, batch_size=batch_size), x=x, y=y )
def _get_features_from_batch_df( self, batch_df: pd.DataFrame, ): aa_indices = self._get_26aa_indice_features(batch_df) mod_x = self._get_mod_features(batch_df) charges = ( self._as_tensor(batch_df["charge"].values).unsqueeze(1) * self.charge_factor ) return aa_indices, mod_x, charges
[docs] def ccs_to_mobility_pred(self, precursor_df: pd.DataFrame) -> pd.DataFrame: return ccs_to_mobility_pred_df(precursor_df)