Building your own models for RT prediction¶
[ ]:
%reload_ext autoreload
%autoreload 2
[ ]:
from peptdeep.model.featurize import (
get_batch_aa_indices,
get_batch_mod_feature
)
from peptdeep.settings import model_const
import peptdeep.model.model_interface as model_base
import peptdeep.model.building_block as building_block
mod_feature_size = len(model_const['mod_elements'])
import torch
import pandas as pd
[ ]:
class RT_LSTM_Module(torch.nn.Module):
def __init__(self,
dropout=0.2
):
super().__init__()
self.dropout = torch.nn.Dropout(dropout)
hidden = 128
self.rt_encoder = building_block.Encoder_26AA_Mod_CNN_LSTM_AttnSum(
hidden
)
self.rt_decoder = building_block.Decoder_Linear(
hidden,
1
)
def forward(self,
aa_indices,
mod_x,
):
x = self.rt_encoder(aa_indices, mod_x)
x = self.dropout(x)
return self.rt_decoder(x).squeeze(1)
[ ]:
class RT_Transformer_Module(torch.nn.Module):
def __init__(self,
dropout=0.2
):
super().__init__()
self.dropout = torch.nn.Dropout(dropout)
hidden = 128
self.encoder = building_block.Encoder_AA_Mod_Transformer_AttnSum(
hidden
)
self.decoder = building_block.Decoder_Linear(
hidden,1
)
def forward(self,
aa_indices,
mod_x,
):
x = self.encoder(aa_indices, mod_x)
x = self.dropout(x)
return self.decoder(x).squeeze(1)
[ ]:
class RT_ModelInterface(model_base.ModelInterface):
def __init__(self,
model_class:torch.nn.Module=RT_LSTM_Module,
dropout=0.1,
):
super().__init__()
self.build(
model_class,
dropout=dropout,
)
self.loss_func = torch.nn.L1Loss()
self.target_column_to_train = 'rt_norm'
self.target_column_to_predict = 'rt_pred'
def _get_features_from_batch_df(self,
batch_df: pd.DataFrame,
):
aa_indices = torch.LongTensor(
get_batch_aa_indices(
batch_df['sequence'].values.astype('U')
)
)
mod_x = torch.Tensor(
get_batch_mod_feature(
batch_df
)
)
return aa_indices, mod_x
def _get_targets_from_batch_df(self,
batch_df: pd.DataFrame,
) -> torch.Tensor:
return torch.Tensor(batch_df['rt_norm'].values)
Testing the RT model¶
Prepare training data¶
[ ]:
from peptdeep.model.rt import irt_pep
irt_pep['rt_norm'] = (irt_pep.irt - irt_pep.irt.min())/(irt_pep.irt.max()-irt_pep.irt.min())
irt_pep
| sequence | pep_name | irt | mods | mod_sites | nAA | rt_norm | |
|---|---|---|---|---|---|---|---|
| 0 | LGGNEQVTR | RT-pep a | -24.92 | 9 | 0.000000 | ||
| 1 | GAGSSEPVTGLDAK | RT-pep b | 0.00 | 14 | 0.199488 | ||
| 2 | VEATFGVDESNAK | RT-pep c | 12.39 | 13 | 0.298671 | ||
| 3 | YILAGVENSK | RT-pep d | 19.79 | 10 | 0.357909 | ||
| 4 | TPVISGGPYEYR | RT-pep e | 28.71 | 12 | 0.429315 | ||
| 5 | TPVITGAPYEYR | RT-pep f | 33.38 | 12 | 0.466699 | ||
| 6 | DGLDAASYYAPVR | RT-pep g | 42.26 | 13 | 0.537784 | ||
| 7 | ADVTPADFSEWSK | RT-pep h | 54.62 | 13 | 0.636728 | ||
| 8 | GTFIIDPGGVIR | RT-pep i | 70.52 | 12 | 0.764009 | ||
| 9 | GTFIIDPAAVIR | RT-pep k | 87.23 | 12 | 0.897775 | ||
| 10 | LFLQFGAQGSPFLK | RT-pep l | 100.00 | 14 | 1.000000 |
[ ]:
rt_model = RT_ModelInterface(model_class=RT_LSTM_Module)
Device `gpu` is not available, set to `cpu`
Test the untrained model¶
[ ]:
rt_model.predict(irt_pep)
| sequence | pep_name | irt | mods | mod_sites | nAA | rt_norm | rt_pred | |
|---|---|---|---|---|---|---|---|---|
| 0 | LGGNEQVTR | RT-pep a | -24.92 | 9 | 0.000000 | 0.0 | ||
| 1 | GAGSSEPVTGLDAK | RT-pep b | 0.00 | 14 | 0.199488 | 0.0 | ||
| 2 | VEATFGVDESNAK | RT-pep c | 12.39 | 13 | 0.298671 | 0.0 | ||
| 3 | YILAGVENSK | RT-pep d | 19.79 | 10 | 0.357909 | 0.0 | ||
| 4 | TPVISGGPYEYR | RT-pep e | 28.71 | 12 | 0.429315 | 0.0 | ||
| 5 | TPVITGAPYEYR | RT-pep f | 33.38 | 12 | 0.466699 | 0.0 | ||
| 6 | DGLDAASYYAPVR | RT-pep g | 42.26 | 13 | 0.537784 | 0.0 | ||
| 7 | ADVTPADFSEWSK | RT-pep h | 54.62 | 13 | 0.636728 | 0.0 | ||
| 8 | GTFIIDPGGVIR | RT-pep i | 70.52 | 12 | 0.764009 | 0.0 | ||
| 9 | GTFIIDPAAVIR | RT-pep k | 87.23 | 12 | 0.897775 | 0.0 | ||
| 10 | LFLQFGAQGSPFLK | RT-pep l | 100.00 | 14 | 1.000000 | 0.0 |
Test if training works¶
[ ]:
rt_model.train(irt_pep, epoch=100, verbose=False)
Test if the model fits the irt_pep data¶
[ ]:
rt_model.predict(irt_pep)
| sequence | pep_name | irt | mods | mod_sites | nAA | rt_norm | rt_pred | |
|---|---|---|---|---|---|---|---|---|
| 0 | LGGNEQVTR | RT-pep a | -24.92 | 9 | 0.000000 | 0.000000 | ||
| 1 | GAGSSEPVTGLDAK | RT-pep b | 0.00 | 14 | 0.199488 | 0.209159 | ||
| 2 | VEATFGVDESNAK | RT-pep c | 12.39 | 13 | 0.298671 | 0.293867 | ||
| 3 | YILAGVENSK | RT-pep d | 19.79 | 10 | 0.357909 | 0.349884 | ||
| 4 | TPVISGGPYEYR | RT-pep e | 28.71 | 12 | 0.429315 | 0.416145 | ||
| 5 | TPVITGAPYEYR | RT-pep f | 33.38 | 12 | 0.466699 | 0.462958 | ||
| 6 | DGLDAASYYAPVR | RT-pep g | 42.26 | 13 | 0.537784 | 0.540334 | ||
| 7 | ADVTPADFSEWSK | RT-pep h | 54.62 | 13 | 0.636728 | 0.638801 | ||
| 8 | GTFIIDPGGVIR | RT-pep i | 70.52 | 12 | 0.764009 | 0.725222 | ||
| 9 | GTFIIDPAAVIR | RT-pep k | 87.23 | 12 | 0.897775 | 0.882472 | ||
| 10 | LFLQFGAQGSPFLK | RT-pep l | 100.00 | 14 | 1.000000 | 0.962103 |
Get number of model parameters¶
[ ]:
rt_model.get_parameter_num()
232448
It is easy to switch the model to Transformer.¶
Users can add more nn.Modules without re-designing the AA/PTM feature extraction parts.¶
[ ]:
rt_model = RT_ModelInterface(model_class=RT_Transformer_Module)
rt_model.train(irt_pep, epoch=50, warmup_epoch=20)
rt_model.predict(irt_pep)
Device `gpu` is not available, set to `cpu`
| sequence | pep_name | irt | mods | mod_sites | nAA | rt_norm | rt_pred | |
|---|---|---|---|---|---|---|---|---|
| 0 | LGGNEQVTR | RT-pep a | -24.92 | 9 | 0.000000 | 0.007334 | ||
| 1 | GAGSSEPVTGLDAK | RT-pep b | 0.00 | 14 | 0.199488 | 0.209777 | ||
| 2 | VEATFGVDESNAK | RT-pep c | 12.39 | 13 | 0.298671 | 0.350849 | ||
| 3 | YILAGVENSK | RT-pep d | 19.79 | 10 | 0.357909 | 0.388612 | ||
| 4 | TPVISGGPYEYR | RT-pep e | 28.71 | 12 | 0.429315 | 0.483431 | ||
| 5 | TPVITGAPYEYR | RT-pep f | 33.38 | 12 | 0.466699 | 0.506625 | ||
| 6 | DGLDAASYYAPVR | RT-pep g | 42.26 | 13 | 0.537784 | 0.578891 | ||
| 7 | ADVTPADFSEWSK | RT-pep h | 54.62 | 13 | 0.636728 | 0.619564 | ||
| 8 | GTFIIDPGGVIR | RT-pep i | 70.52 | 12 | 0.764009 | 0.818625 | ||
| 9 | GTFIIDPAAVIR | RT-pep k | 87.23 | 12 | 0.897775 | 0.936355 | ||
| 10 | LFLQFGAQGSPFLK | RT-pep l | 100.00 | 14 | 1.000000 | 1.094726 |
[ ]:
rt_model.get_parameter_num()
817104
[ ]: