{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Translate spectral libraries\n", "\n", "AlphaBase/PeptDeep to TSV (DiaNN/Spectronaut/OpenSWATH)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "vscode": { "languageId": "python" } }, "outputs": [], "source": [ "#%pip install peptdeep" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "vscode": { "languageId": "python" } }, "outputs": [], "source": [ "%reload_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "vscode": { "languageId": "python" } }, "outputs": [], "source": [ "alphapeptdeep_hdf = r'y:\\User\\Feng\\speclib\\human_swissprot.speclib.hdf'\n", "top_k_frag = 16" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Do not change below" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "vscode": { "languageId": "python" } }, "outputs": [ { "data": { "text/plain": [ "'y:\\\\User\\\\Feng\\\\speclib\\\\human_swissprot_frags=16.speclib.tsv'" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "frag_inten = 0.001\n", "\n", "\n", "min_frag_mz = 200\n", "min_frag_nAA = 0\n", "\n", "output_diann_tsv = (\n", " f\"{alphapeptdeep_hdf[:-len('.speclib.hdf')]}_frags={top_k_frag}.speclib.tsv\"\n", ")\n", "output_diann_tsv" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "vscode": { "languageId": "python" } }, "outputs": [], "source": [ "from peptdeep.protein.fasta import PredictSpecLibFasta\n", "\n", "fasta_lib = PredictSpecLibFasta(\n", " None, \n", " decoy=None\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "vscode": { "languageId": "python" } }, "outputs": [], "source": [ "fasta_lib.load_hdf(alphapeptdeep_hdf, load_mod_seq=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "vscode": { "languageId": "python" } }, "outputs": [ { "data": { "text/html": [ "
| \n", " | description | \n", "full_name | \n", "gene_name | \n", "protein_id | \n", "sequence | \n", "
|---|---|---|---|---|---|
| 0 | \n", "sp|Q9H9K5|MER34_HUMAN Endogenous retroviral en... | \n", "sp|Q9H9K5|MER34_HUMAN | \n", "ERVMER34-1 | \n", "Q9H9K5 | \n", "MGSLSNYALLQLTLTAFLTILVQPQHLLAPVFRTLSILTNQSNCWL... | \n", "
| 1 | \n", "sp|P04439|HLAA_HUMAN HLA class I histocompatib... | \n", "sp|P04439|HLAA_HUMAN | \n", "HLA-A | \n", "P04439 | \n", "MAVMAPRTLLLLLSGALALTQTWAGSHSMRYFFTSVSRPGRGEPRF... | \n", "
| 2 | \n", "sp|P01911|DRB1_HUMAN HLA class II histocompati... | \n", "sp|P01911|DRB1_HUMAN | \n", "HLA-DRB1 | \n", "P01911 | \n", "MVCLKLPGGSCMTALTVTLMVLSSPLALSGDTRPRFLWQPKRECHF... | \n", "
| 3 | \n", "sp|P01889|HLAB_HUMAN HLA class I histocompatib... | \n", "sp|P01889|HLAB_HUMAN | \n", "HLA-B | \n", "P01889 | \n", "MLVMAPRTVLLLLSAALALTETWAGSHSMRYFYTSVSRPGRGEPRF... | \n", "
| 4 | \n", "sp|P31689|DNJA1_HUMAN DnaJ homolog subfamily A... | \n", "sp|P31689|DNJA1_HUMAN | \n", "DNAJA1 | \n", "P31689 | \n", "MVKETTYYDVLGVKPNATQEELKKAYRKLALKYHPDKNPNEGEKFK... | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 20391 | \n", "sp|Q8WVZ7|RN133_HUMAN E3 ubiquitin-protein lig... | \n", "sp|Q8WVZ7|RN133_HUMAN | \n", "RNF133 | \n", "Q8WVZ7 | \n", "MHLLKVGTWRNNTASSWLMKFSVLWLVSQNCCRASVVWMAYMNISF... | \n", "
| 20392 | \n", "sp|P05387|RLA2_HUMAN 60S acidic ribosomal prot... | \n", "sp|P05387|RLA2_HUMAN | \n", "RPLP2 | \n", "P05387 | \n", "MRYVASYLLAALGGNSSPSAKDIKKILDSVGIEADDDRLNKVISEL... | \n", "
| 20393 | \n", "sp|P51991|ROA3_HUMAN Heterogeneous nuclear rib... | \n", "sp|P51991|ROA3_HUMAN | \n", "HNRNPA3 | \n", "P51991 | \n", "MEVKPPPGRPQPDSGRRRRRRGEEGHDPKEPEQLRKLFIGGLSFET... | \n", "
| 20394 | \n", "sp|Q9BZX4|ROP1B_HUMAN Ropporin-1B OS=Homo sapi... | \n", "sp|Q9BZX4|ROP1B_HUMAN | \n", "ROPN1B | \n", "Q9BZX4 | \n", "MAQTDKPTCIPPELPKMLKEFAKAAIRAQPQDLIQWGADYFEALSR... | \n", "
| 20395 | \n", "sp|P34096|RNAS4_HUMAN Ribonuclease 4 OS=Homo s... | \n", "sp|P34096|RNAS4_HUMAN | \n", "RNASE4 | \n", "P34096 | \n", "MALQRTHSLLLLLLLTLLGLGLVQPSYGQDGMYQRFLRQHVHPEET... | \n", "
20396 rows × 5 columns
\n", "| \n", " | ccs_pred | \n", "charge | \n", "frag_stop_idx | \n", "frag_start_idx | \n", "miss_cleavage | \n", "mobility_pred | \n", "mod_seq_charge_hash | \n", "mod_seq_hash | \n", "nAA | \n", "precursor_mz | \n", "... | \n", "rt_pred | \n", "instrument | \n", "irt_pred | \n", "is_prot_cterm | \n", "is_prot_nterm | \n", "mod_sites | \n", "mods | \n", "nce | \n", "protein_idxes | \n", "sequence | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "315.529022 | \n", "2 | \n", "6 | \n", "0 | \n", "1 | \n", "0.775438 | \n", "471662500970219630 | \n", "471662500970219628 | \n", "7 | \n", "434.249018 | \n", "... | \n", "0.115377 | \n", "timsTOF | \n", "-37.187631 | \n", "False | \n", "False | \n", "\n", " | \n", " | 30 | \n", "19786 | \n", "RIHTGQR | \n", "
| 1 | \n", "304.965790 | \n", "2 | \n", "12 | \n", "6 | \n", "0 | \n", "0.748912 | \n", "-5301076820607700088 | \n", "-5301076820607700090 | \n", "7 | \n", "414.216952 | \n", "... | \n", "0.208976 | \n", "timsTOF | \n", "-16.331142 | \n", "False | \n", "False | \n", "\n", " | \n", " | 30 | \n", "9448 | \n", "PMPMPVR | \n", "
| 2 | \n", "304.080536 | \n", "2 | \n", "18 | \n", "12 | \n", "0 | \n", "0.746970 | \n", "6057464136741449833 | \n", "6057464136741449831 | \n", "7 | \n", "422.214409 | \n", "... | \n", "0.158058 | \n", "timsTOF | \n", "-27.677099 | \n", "False | \n", "False | \n", "4 | \n", "Oxidation@M | \n", "30 | \n", "9448 | \n", "PMPMPVR | \n", "
| 3 | \n", "305.825348 | \n", "2 | \n", "24 | \n", "18 | \n", "0 | \n", "0.751256 | \n", "-6431722582867031754 | \n", "-6431722582867031756 | \n", "7 | \n", "422.214409 | \n", "... | \n", "0.157143 | \n", "timsTOF | \n", "-27.881022 | \n", "False | \n", "False | \n", "2 | \n", "Oxidation@M | \n", "30 | \n", "9448 | \n", "PMPMPVR | \n", "
| 4 | \n", "330.547638 | \n", "2 | \n", "30 | \n", "24 | \n", "0 | \n", "0.814317 | \n", "-7409729050206298799 | \n", "-7409729050206298801 | \n", "7 | \n", "513.726727 | \n", "... | \n", "0.423747 | \n", "timsTOF | \n", "31.526291 | \n", "False | \n", "False | \n", "5 | \n", "Carbamidomethyl@C | \n", "30 | \n", "12819 | \n", "QEWFCTR | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 3654202 | \n", "891.748413 | \n", "4 | \n", "60404866 | \n", "60404832 | \n", "1 | \n", "1.108824 | \n", "7192344052213098708 | \n", "7192344052213098704 | \n", "35 | \n", "866.228888 | \n", "... | \n", "0.831350 | \n", "timsTOF | \n", "122.352159 | \n", "False | \n", "False | \n", "\n", " | \n", " | 30 | \n", "978 | \n", "NLTYVRGSVGPATSTLMFVAGVVGNGLALGILSAR | \n", "
| 3654203 | \n", "785.478699 | \n", "3 | \n", "60404900 | \n", "60404866 | \n", "1 | \n", "1.302269 | \n", "-1485306056792248108 | \n", "-1485306056792248111 | \n", "35 | \n", "1159.967730 | \n", "... | \n", "0.826977 | \n", "timsTOF | \n", "121.377815 | \n", "False | \n", "False | \n", "17 | \n", "Oxidation@M | \n", "30 | \n", "978 | \n", "NLTYVRGSVGPATSTLMFVAGVVGNGLALGILSAR | \n", "
| 3654204 | \n", "892.459656 | \n", "4 | \n", "60404934 | \n", "60404900 | \n", "1 | \n", "1.109729 | \n", "-1485306056792248107 | \n", "-1485306056792248111 | \n", "35 | \n", "870.227616 | \n", "... | \n", "0.826977 | \n", "timsTOF | \n", "121.377815 | \n", "False | \n", "False | \n", "17 | \n", "Oxidation@M | \n", "30 | \n", "978 | \n", "NLTYVRGSVGPATSTLMFVAGVVGNGLALGILSAR | \n", "
| 3654205 | \n", "791.322266 | \n", "4 | \n", "60404968 | \n", "60404934 | \n", "1 | \n", "0.984398 | \n", "5191231126132273755 | \n", "5191231126132273751 | \n", "35 | \n", "976.910866 | \n", "... | \n", "0.670129 | \n", "timsTOF | \n", "86.427514 | \n", "False | \n", "False | \n", "\n", " | \n", " | 30 | \n", "2299 | \n", "KNQAADDDDEDLNDTNYDEFNGYAGSLFSSGPYEK | \n", "
| 3654206 | \n", "823.819214 | \n", "4 | \n", "60405002 | \n", "60404968 | \n", "1 | \n", "1.024754 | \n", "-7707559913944666934 | \n", "-7707559913944666938 | \n", "35 | \n", "958.434460 | \n", "... | \n", "0.725150 | \n", "timsTOF | \n", "98.687774 | \n", "False | \n", "False | \n", "24 | \n", "Carbamidomethyl@C | \n", "30 | \n", "10080 | \n", "AYDADSGFNGKVLFTISDGNTDSCFNIDMETGQLK | \n", "
3654207 rows × 21 columns
\n", "| \n", " | description | \n", "full_name | \n", "gene_name | \n", "protein_id | \n", "sequence | \n", "
|---|---|---|---|---|---|
| 0 | \n", "sp|Q9H9K5|MER34_HUMAN Endogenous retroviral en... | \n", "sp|Q9H9K5|MER34_HUMAN | \n", "ERVMER34-1 | \n", "Q9H9K5 | \n", "MGSLSNYALLQLTLTAFLTILVQPQHLLAPVFRTLSILTNQSNCWL... | \n", "
| 1 | \n", "sp|P04439|HLAA_HUMAN HLA class I histocompatib... | \n", "sp|P04439|HLAA_HUMAN | \n", "HLA-A | \n", "P04439 | \n", "MAVMAPRTLLLLLSGALALTQTWAGSHSMRYFFTSVSRPGRGEPRF... | \n", "
| 2 | \n", "sp|P01911|DRB1_HUMAN HLA class II histocompati... | \n", "sp|P01911|DRB1_HUMAN | \n", "HLA-DRB1 | \n", "P01911 | \n", "MVCLKLPGGSCMTALTVTLMVLSSPLALSGDTRPRFLWQPKRECHF... | \n", "
| 3 | \n", "sp|P01889|HLAB_HUMAN HLA class I histocompatib... | \n", "sp|P01889|HLAB_HUMAN | \n", "HLA-B | \n", "P01889 | \n", "MLVMAPRTVLLLLSAALALTETWAGSHSMRYFYTSVSRPGRGEPRF... | \n", "
| 4 | \n", "sp|P31689|DNJA1_HUMAN DnaJ homolog subfamily A... | \n", "sp|P31689|DNJA1_HUMAN | \n", "DNAJA1 | \n", "P31689 | \n", "MVKETTYYDVLGVKPNATQEELKKAYRKLALKYHPDKNPNEGEKFK... | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 20391 | \n", "sp|Q8WVZ7|RN133_HUMAN E3 ubiquitin-protein lig... | \n", "sp|Q8WVZ7|RN133_HUMAN | \n", "RNF133 | \n", "Q8WVZ7 | \n", "MHLLKVGTWRNNTASSWLMKFSVLWLVSQNCCRASVVWMAYMNISF... | \n", "
| 20392 | \n", "sp|P05387|RLA2_HUMAN 60S acidic ribosomal prot... | \n", "sp|P05387|RLA2_HUMAN | \n", "RPLP2 | \n", "P05387 | \n", "MRYVASYLLAALGGNSSPSAKDIKKILDSVGIEADDDRLNKVISEL... | \n", "
| 20393 | \n", "sp|P51991|ROA3_HUMAN Heterogeneous nuclear rib... | \n", "sp|P51991|ROA3_HUMAN | \n", "HNRNPA3 | \n", "P51991 | \n", "MEVKPPPGRPQPDSGRRRRRRGEEGHDPKEPEQLRKLFIGGLSFET... | \n", "
| 20394 | \n", "sp|Q9BZX4|ROP1B_HUMAN Ropporin-1B OS=Homo sapi... | \n", "sp|Q9BZX4|ROP1B_HUMAN | \n", "ROPN1B | \n", "Q9BZX4 | \n", "MAQTDKPTCIPPELPKMLKEFAKAAIRAQPQDLIQWGADYFEALSR... | \n", "
| 20395 | \n", "sp|P34096|RNAS4_HUMAN Ribonuclease 4 OS=Homo s... | \n", "sp|P34096|RNAS4_HUMAN | \n", "RNASE4 | \n", "P34096 | \n", "MALQRTHSLLLLLLLTLLGLGLVQPSYGQDGMYQRFLRQHVHPEET... | \n", "
20396 rows × 5 columns
\n", "