Building your own models for CCS prediction¶
[ ]:
%reload_ext autoreload
%autoreload 2
[ ]:
from peptdeep.model.featurize import (
get_batch_aa_indices,
get_batch_mod_feature
)
from peptdeep.settings import model_const
import peptdeep.model.model_interface as model_base
import peptdeep.model.building_block as building_block
mod_feature_size = len(model_const['mod_elements'])
import torch
import pandas as pd
[ ]:
class CCS_LSTM_Module(torch.nn.Module):
def __init__(self,
dropout=0.1,
):
super().__init__()
self.dropout = torch.nn.Dropout(dropout)
hidden = 128
self.ccs_encoder = (
building_block.Encoder_26AA_Mod_Charge_CNN_LSTM_AttnSum(
hidden
)
)
self.ccs_decoder = building_block.Decoder_Linear(
hidden+1, 1
)
def forward(self,
aa_indices,
mod_x,
charges,
):
x = self.ccs_encoder(aa_indices, mod_x, charges)
x = self.dropout(x)
x = torch.cat((x, charges),1)
return self.ccs_decoder(x).squeeze(1)
[ ]:
class CCS_ModelInterface(model_base.ModelInterface):
def __init__(self,
model_class:torch.nn.Module=CCS_LSTM_Module,
dropout=0.1,
):
super().__init__()
self.build(
model_class,
dropout=dropout,
)
self.loss_func = torch.nn.L1Loss()
self._target_column_to_train = 'ccs'
self._target_column_to_predict = 'ccs_pred'
def _prepare_predict_data_df(self,
precursor_df:pd.DataFrame,
):
precursor_df[self._target_column_to_predict] = 0.
self.predict_df = precursor_df
def _get_features_from_batch_df(self,
batch_df: pd.DataFrame,
):
aa_indices = torch.LongTensor(
get_batch_aa_indices(
batch_df['sequence'].values.astype('U')
)
)
mod_x = torch.Tensor(
get_batch_mod_feature(
batch_df
)
)
charges = torch.Tensor(
batch_df['charge'].values
).unsqueeze(1)*0.1
return aa_indices, mod_x, charges
def _get_targets_from_batch_df(self,
batch_df: pd.DataFrame,
) -> torch.Tensor:
return torch.Tensor(batch_df['ccs'].values)
Testing the CCS model¶
Prepare training data¶
[ ]:
from peptdeep.model.rt import irt_pep
# virtual ccs values for training
irt_pep['ccs'] = pd.RangeIndex(0, len(irt_pep)).values.astype(float)
irt_pep['charge'] = 2
irt_pep
| sequence | pep_name | irt | mods | mod_sites | nAA | ccs | charge | |
|---|---|---|---|---|---|---|---|---|
| 0 | LGGNEQVTR | RT-pep a | -24.92 | 9 | 0.0 | 2 | ||
| 1 | GAGSSEPVTGLDAK | RT-pep b | 0.00 | 14 | 1.0 | 2 | ||
| 2 | VEATFGVDESNAK | RT-pep c | 12.39 | 13 | 2.0 | 2 | ||
| 3 | YILAGVENSK | RT-pep d | 19.79 | 10 | 3.0 | 2 | ||
| 4 | TPVISGGPYEYR | RT-pep e | 28.71 | 12 | 4.0 | 2 | ||
| 5 | TPVITGAPYEYR | RT-pep f | 33.38 | 12 | 5.0 | 2 | ||
| 6 | DGLDAASYYAPVR | RT-pep g | 42.26 | 13 | 6.0 | 2 | ||
| 7 | ADVTPADFSEWSK | RT-pep h | 54.62 | 13 | 7.0 | 2 | ||
| 8 | GTFIIDPGGVIR | RT-pep i | 70.52 | 12 | 8.0 | 2 | ||
| 9 | GTFIIDPAAVIR | RT-pep k | 87.23 | 12 | 9.0 | 2 | ||
| 10 | LFLQFGAQGSPFLK | RT-pep l | 100.00 | 14 | 10.0 | 2 |
[ ]:
ccs_model = CCS_ModelInterface()
Device `gpu` is not available, set to `cpu`
Test the untrained model¶
Test if training works¶
[ ]:
ccs_model.train(irt_pep, epoch=100, verbose=False)
Test if the model fits the virtual ccs values¶
[ ]:
ccs_model.predict(irt_pep)
| sequence | pep_name | irt | mods | mod_sites | nAA | ccs | charge | ccs_pred | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | LGGNEQVTR | RT-pep a | -24.92 | 9 | 0.0 | 2 | 0.000000 | ||
| 1 | GAGSSEPVTGLDAK | RT-pep b | 0.00 | 14 | 1.0 | 2 | 1.068118 | ||
| 2 | VEATFGVDESNAK | RT-pep c | 12.39 | 13 | 2.0 | 2 | 2.132658 | ||
| 3 | YILAGVENSK | RT-pep d | 19.79 | 10 | 3.0 | 2 | 3.344304 | ||
| 4 | TPVISGGPYEYR | RT-pep e | 28.71 | 12 | 4.0 | 2 | 4.412459 | ||
| 5 | TPVITGAPYEYR | RT-pep f | 33.38 | 12 | 5.0 | 2 | 5.313079 | ||
| 6 | DGLDAASYYAPVR | RT-pep g | 42.26 | 13 | 6.0 | 2 | 7.875914 | ||
| 7 | ADVTPADFSEWSK | RT-pep h | 54.62 | 13 | 7.0 | 2 | 7.989055 | ||
| 8 | GTFIIDPGGVIR | RT-pep i | 70.52 | 12 | 8.0 | 2 | 8.029768 | ||
| 9 | GTFIIDPAAVIR | RT-pep k | 87.23 | 12 | 9.0 | 2 | 8.443041 | ||
| 10 | LFLQFGAQGSPFLK | RT-pep l | 100.00 | 14 | 10.0 | 2 | 8.817308 |