Spaces:
Running
on
Zero
Running
on
Zero
Upgrade to InstaNovo v1.1.3 with new diffusion checkpoint
Browse files- README.md +15 -16
- app.py +7 -8
- pyproject.toml +3 -3
- requirements.txt +3 -3
- uv.lock +0 -0
README.md
CHANGED
@@ -1,17 +1,17 @@
|
|
1 |
-
---
|
2 |
-
title: De Novo Peptide Sequencing With InstaNovo and InstaNovo+
|
3 |
-
emoji: π
|
4 |
-
colorFrom: green
|
5 |
-
colorTo: green
|
6 |
-
sdk: gradio
|
7 |
-
sdk_version: 5.23.1
|
8 |
-
app_file: app.py
|
9 |
-
pinned: true
|
10 |
-
license: apache-2.0
|
11 |
-
thumbnail: >-
|
12 |
-
https://cdn-uploads.huggingface.co/production/uploads/6189aee17d9b289cdebafbd6/tb9e-8Z2_pDsRMkGglcvh.png
|
13 |
-
short_description: Translate fragment ion peaks into sequence of amino acids
|
14 |
-
---
|
15 |
|
16 |
# _De Novo_ Peptide Sequencing With InstaNovo and InstaNovo+
|
17 |
|
@@ -35,7 +35,6 @@ This Space provides a web interface for the [InstaNovo](https://github.com/insta
|
|
35 |
|
36 |
This demo uses the pretrained model checkpoint.
|
37 |
|
38 |
-
* Predictions use version `instanovo-v1.1.0` for the transformer-based InstaNovo model and version `instanovoplus-v1.1.0
|
39 |
-
* The InstaNovo+ model `instanovoplus-v1.1.0-alpha` is an alpha release.
|
40 |
|
41 |
**Note:** Processing large files can take time, depending on the file size and the chosen decoding method. Knapsack generation can also add to the initial startup time.
|
|
|
1 |
+
---
|
2 |
+
title: De Novo Peptide Sequencing With InstaNovo and InstaNovo+
|
3 |
+
emoji: π
|
4 |
+
colorFrom: green
|
5 |
+
colorTo: green
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 5.23.1
|
8 |
+
app_file: app.py
|
9 |
+
pinned: true
|
10 |
+
license: apache-2.0
|
11 |
+
thumbnail: >-
|
12 |
+
https://cdn-uploads.huggingface.co/production/uploads/6189aee17d9b289cdebafbd6/tb9e-8Z2_pDsRMkGglcvh.png
|
13 |
+
short_description: Translate fragment ion peaks into sequence of amino acids
|
14 |
+
---
|
15 |
|
16 |
# _De Novo_ Peptide Sequencing With InstaNovo and InstaNovo+
|
17 |
|
|
|
35 |
|
36 |
This demo uses the pretrained model checkpoint.
|
37 |
|
38 |
+
* Predictions use version `instanovo-v1.1.0` for the transformer-based InstaNovo model and version `instanovoplus-v1.1.0` for the diffusion-based InstaNovo+ model.
|
|
|
39 |
|
40 |
**Note:** Processing large files can take time, depending on the file size and the chosen decoding method. Knapsack generation can also add to the initial startup time.
|
app.py
CHANGED
@@ -38,7 +38,7 @@ except ImportError as e:
|
|
38 |
|
39 |
# --- Configuration ---
|
40 |
TRANSFORMER_MODEL_ID = "instanovo-v1.1.0"
|
41 |
-
DIFFUSION_MODEL_ID = "instanovoplus-v1.1.0
|
42 |
KNAPSACK_DIR = Path("./knapsack_cache")
|
43 |
|
44 |
# Determine device
|
@@ -111,10 +111,10 @@ def load_models_and_knapsack():
|
|
111 |
INSTANOVOPLUS, INSTANOVOPLUS_CONFIG = InstaNovoPlus.from_pretrained(DIFFUSION_MODEL_ID)
|
112 |
INSTANOVOPLUS.to(DEVICE)
|
113 |
INSTANOVOPLUS.eval()
|
114 |
-
if RESIDUE_SET is not None and INSTANOVOPLUS.
|
115 |
logger.warning("Residue sets between Transformer and Diffusion models differ. Using Transformer's set.")
|
116 |
elif RESIDUE_SET is None:
|
117 |
-
RESIDUE_SET = INSTANOVOPLUS.
|
118 |
|
119 |
logger.info("Diffusion model loaded successfully.")
|
120 |
except Exception as e:
|
@@ -581,7 +581,7 @@ def predict_peptides(input_file, mode_selection, transformer_decoder_selection):
|
|
581 |
bin_spectra=config.get("conv_peak_encoder", False),
|
582 |
peptide_pad_length=config.get("max_length", 40) if config.get("compile_model", False) else 0,
|
583 |
reverse_peptide=reverse_for_transformer, # Key change based on mode
|
584 |
-
|
585 |
)
|
586 |
dl = DataLoader(ds, batch_size=config.batch_size, num_workers=0, shuffle=False, collate_fn=collate_batch)
|
587 |
|
@@ -728,7 +728,7 @@ with gr.Blocks(
|
|
728 |
[
|
729 |
"InstaNovo with InstaNovo+ refinement (Default, Recommended)",
|
730 |
"InstaNovo Only (Transformer)",
|
731 |
-
"InstaNovo+ Only (Diffusion
|
732 |
],
|
733 |
label="Prediction Mode",
|
734 |
value="InstaNovo with InstaNovo+ refinement (Default, Recommended)",
|
@@ -783,7 +783,7 @@ with gr.Blocks(
|
|
783 |
["assets/sample_spectra.mgf", "InstaNovo with InstaNovo+ refinement (Default, Recommended)", "Knapsack Beam Search (Accurate, Slower)"],
|
784 |
["assets/sample_spectra.mgf", "InstaNovo Only (Transformer)", "Greedy Search (Fast)"],
|
785 |
["assets/sample_spectra.mgf", "InstaNovo Only (Transformer)", "Knapsack Beam Search (Accurate, Slower)"],
|
786 |
-
["assets/sample_spectra.mgf", "InstaNovo+ Only (Diffusion
|
787 |
],
|
788 |
inputs=[input_file, mode_selection, transformer_decoder_selection],
|
789 |
# outputs=[output_df, output_file],
|
@@ -793,11 +793,10 @@ with gr.Blocks(
|
|
793 |
|
794 |
gr.Markdown(f"""**Notes:**
|
795 |
* Predictions use version `{TRANSFORMER_MODEL_ID}` for the transformer-based InstaNovo model and version `{DIFFUSION_MODEL_ID}` for the diffusion-based InstaNovo+ model.
|
796 |
-
* The InstaNovo+ model `{DIFFUSION_MODEL_ID}` is an alpha release.
|
797 |
* **Predction Modes:**
|
798 |
* **InstaNovo with InstaNovo+ refinement** Runs initial prediction with the selected Transformer method (Greedy/Knapsack), then refines using InstaNovo+.
|
799 |
* **InstaNovo Only:** Uses only the Transformer with the selected decoding method.
|
800 |
-
* **InstaNovo+ Only:** Predicts directly using the Diffusion model
|
801 |
* **Transformer Decoding Methods:**
|
802 |
* **Greedy Search:** use this for optimal performance, has similar performance as Knapsack Beam Search at 5% FDR.
|
803 |
* **Knapsack Beam Search:** use this for the best results and highest peptide recall, but is about 10x slower than Greedy Search.
|
|
|
38 |
|
39 |
# --- Configuration ---
|
40 |
TRANSFORMER_MODEL_ID = "instanovo-v1.1.0"
|
41 |
+
DIFFUSION_MODEL_ID = "instanovoplus-v1.1.0"
|
42 |
KNAPSACK_DIR = Path("./knapsack_cache")
|
43 |
|
44 |
# Determine device
|
|
|
111 |
INSTANOVOPLUS, INSTANOVOPLUS_CONFIG = InstaNovoPlus.from_pretrained(DIFFUSION_MODEL_ID)
|
112 |
INSTANOVOPLUS.to(DEVICE)
|
113 |
INSTANOVOPLUS.eval()
|
114 |
+
if RESIDUE_SET is not None and INSTANOVOPLUS.residue_set != RESIDUE_SET:
|
115 |
logger.warning("Residue sets between Transformer and Diffusion models differ. Using Transformer's set.")
|
116 |
elif RESIDUE_SET is None:
|
117 |
+
RESIDUE_SET = INSTANOVOPLUS.residue_set
|
118 |
|
119 |
logger.info("Diffusion model loaded successfully.")
|
120 |
except Exception as e:
|
|
|
581 |
bin_spectra=config.get("conv_peak_encoder", False),
|
582 |
peptide_pad_length=config.get("max_length", 40) if config.get("compile_model", False) else 0,
|
583 |
reverse_peptide=reverse_for_transformer, # Key change based on mode
|
584 |
+
add_eos="InstaNovo+ Only" not in mode_selection # Signal if input is for diffusion
|
585 |
)
|
586 |
dl = DataLoader(ds, batch_size=config.batch_size, num_workers=0, shuffle=False, collate_fn=collate_batch)
|
587 |
|
|
|
728 |
[
|
729 |
"InstaNovo with InstaNovo+ refinement (Default, Recommended)",
|
730 |
"InstaNovo Only (Transformer)",
|
731 |
+
"InstaNovo+ Only (Diffusion)",
|
732 |
],
|
733 |
label="Prediction Mode",
|
734 |
value="InstaNovo with InstaNovo+ refinement (Default, Recommended)",
|
|
|
783 |
["assets/sample_spectra.mgf", "InstaNovo with InstaNovo+ refinement (Default, Recommended)", "Knapsack Beam Search (Accurate, Slower)"],
|
784 |
["assets/sample_spectra.mgf", "InstaNovo Only (Transformer)", "Greedy Search (Fast)"],
|
785 |
["assets/sample_spectra.mgf", "InstaNovo Only (Transformer)", "Knapsack Beam Search (Accurate, Slower)"],
|
786 |
+
["assets/sample_spectra.mgf", "InstaNovo+ Only (Diffusion)", ""],
|
787 |
],
|
788 |
inputs=[input_file, mode_selection, transformer_decoder_selection],
|
789 |
# outputs=[output_df, output_file],
|
|
|
793 |
|
794 |
gr.Markdown(f"""**Notes:**
|
795 |
* Predictions use version `{TRANSFORMER_MODEL_ID}` for the transformer-based InstaNovo model and version `{DIFFUSION_MODEL_ID}` for the diffusion-based InstaNovo+ model.
|
|
|
796 |
* **Predction Modes:**
|
797 |
* **InstaNovo with InstaNovo+ refinement** Runs initial prediction with the selected Transformer method (Greedy/Knapsack), then refines using InstaNovo+.
|
798 |
* **InstaNovo Only:** Uses only the Transformer with the selected decoding method.
|
799 |
+
* **InstaNovo+ Only:** Predicts directly using the Diffusion model.
|
800 |
* **Transformer Decoding Methods:**
|
801 |
* **Greedy Search:** use this for optimal performance, has similar performance as Knapsack Beam Search at 5% FDR.
|
802 |
* **Knapsack Beam Search:** use this for the best results and highest peptide recall, but is about 10x slower than Greedy Search.
|
pyproject.toml
CHANGED
@@ -5,10 +5,10 @@ description = "InstaNovo Gradio App"
|
|
5 |
readme = "README.md"
|
6 |
requires-python = ">=3.12"
|
7 |
dependencies = [
|
8 |
-
"gradio>=5.
|
9 |
"gradio-log>=0.0.8",
|
10 |
-
"instanovo>=1.1.
|
11 |
-
"spaces>=0.
|
12 |
"torch==2.4.1",
|
13 |
]
|
14 |
|
|
|
5 |
readme = "README.md"
|
6 |
requires-python = ">=3.12"
|
7 |
dependencies = [
|
8 |
+
"gradio>=5.33.1",
|
9 |
"gradio-log>=0.0.8",
|
10 |
+
"instanovo[cu124]>=1.1.3",
|
11 |
+
"spaces>=0.37.0",
|
12 |
"torch==2.4.1",
|
13 |
]
|
14 |
|
requirements.txt
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
gradio>=5.
|
2 |
gradio-log>=0.0.8
|
3 |
-
instanovo[cu124]>=1.1.
|
4 |
-
spaces>=0.
|
|
|
1 |
+
gradio>=5.33.1
|
2 |
gradio-log>=0.0.8
|
3 |
+
instanovo[cu124]>=1.1.3
|
4 |
+
spaces>=0.37.0
|
uv.lock
CHANGED
The diff for this file is too large to render.
See raw diff
|
|