Source code for peptdeep.model.ccs

import torch
import pandas as pd
import typing


from alphabase.peptide.mobility import ccs_to_mobility_for_df, mobility_to_ccs_for_df

import peptdeep.model.base as model_base
from peptdeep.utils import evaluate_linear_regression



[docs]
class Model_CCS_Bert(torch.nn.Module):
    """
    Transformer model for CCS prediction
    """


[docs]
    def __init__(
        self,
        dropout=0.1,
        nlayers=4,
        hidden=128,
        output_attentions=False,
        **kwargs,
    ):
        super().__init__()

        self.dropout = torch.nn.Dropout(dropout)

        self.input_nn = model_base.AATransformerEncoding(hidden - 2)

        self._output_attentions = output_attentions

        self.hidden_nn = model_base.Hidden_HFace_Transformer(
            hidden,
            nlayers=nlayers,
            dropout=dropout,
            output_attentions=output_attentions,
        )

        self.output_nn = torch.nn.Sequential(
            model_base.SeqAttentionSum(hidden),
            torch.nn.PReLU(),
            self.dropout,
            torch.nn.Linear(hidden, 1),
        )


    @property
    def output_attentions(self):
        return self._output_attentions

    @output_attentions.setter
    def output_attentions(self, val: bool):
        self._output_attentions = val
        self.hidden_nn.output_attentions = val


[docs]
    def forward(
        self,
        aa_indices,
        mod_x,
        charges: torch.Tensor,
    ):
        x = self.dropout(self.input_nn(aa_indices, mod_x))
        charges = charges.unsqueeze(1).repeat(1, x.size(1), 2)
        x = torch.cat((x, charges), 2)

        hidden_x = self.hidden_nn(x)
        if self.output_attentions:
            self.attentions = hidden_x[1]
        else:
            self.attentions = None
        x = self.dropout(hidden_x[0] + x * 0.2)

        return self.output_nn(x).squeeze(1)





[docs]
class Model_CCS_LSTM(torch.nn.Module):
    """LSTM model for CCS prediction"""


[docs]
    def __init__(self, dropout=0.1):
        super().__init__()

        self.dropout = torch.nn.Dropout(dropout)

        hidden = 256

        self.ccs_encoder = model_base.Encoder_26AA_Mod_Charge_CNN_LSTM_AttnSum(hidden)

        self.ccs_decoder = model_base.Decoder_Linear(hidden + 1, 1)



[docs]
    def forward(
        self,
        aa_indices,
        mod_x,
        charges,
    ):
        x = self.ccs_encoder(aa_indices, mod_x, charges)
        x = self.dropout(x)
        x = torch.cat((x, charges), 1)
        return self.ccs_decoder(x).squeeze(1)





[docs]
def ccs_to_mobility_pred_df(precursor_df: pd.DataFrame) -> pd.DataFrame:
    """Add 'mobility_pred' into precursor_df inplace"""
    precursor_df["mobility_pred"] = ccs_to_mobility_for_df(precursor_df, "ccs_pred")
    return precursor_df




[docs]
def mobility_to_ccs_df_(precursor_df: pd.DataFrame) -> pd.DataFrame:
    """Add 'ccs' into precursor_df inplace"""
    precursor_df["ccs"] = mobility_to_ccs_for_df(precursor_df, "mobility")
    return precursor_df




[docs]
class AlphaCCSModel(model_base.ModelInterface):
    """
    `ModelInterface` for `Model_CCS_LSTM` or `Model_CCS_Bert`
    """


[docs]
    def __init__(
        self,
        dropout=0.1,
        model_class: torch.nn.Module = Model_CCS_LSTM,
        device: str = "gpu",
        **kwargs,
    ):
        super().__init__(device=device)
        self.model: Model_CCS_LSTM = None
        self.build(model_class, dropout=dropout, **kwargs)
        self.charge_factor = 0.1

        self.target_column_to_predict = "ccs_pred"
        self.target_column_to_train = "ccs"



[docs]
    def test(
        self,
        precursor_df: pd.DataFrame,
        *,
        x: typing.Literal["ccs_pred", "mobility_pred"] = "ccs_pred",
        y: typing.Literal["ccs", "mobility"] = "ccs",
        batch_size: int = 1024,
    ):
        return evaluate_linear_regression(
            self.predict(precursor_df, batch_size=batch_size), x=x, y=y
        )


    def _get_features_from_batch_df(
        self,
        batch_df: pd.DataFrame,
    ):
        aa_indices = self._get_26aa_indice_features(batch_df)

        mod_x = self._get_mod_features(batch_df)

        charges = (
            self._as_tensor(batch_df["charge"].values).unsqueeze(1) * self.charge_factor
        )

        return aa_indices, mod_x, charges


[docs]
    def ccs_to_mobility_pred(self, precursor_df: pd.DataFrame) -> pd.DataFrame:
        return ccs_to_mobility_pred_df(precursor_df)