BioGeek commited on
Commit
802d9be
Β·
1 Parent(s): a068c4e

Upgrade to InstaNovo v1.1.3 with new diffusion checkpoint

Browse files
Files changed (5) hide show
  1. README.md +15 -16
  2. app.py +7 -8
  3. pyproject.toml +3 -3
  4. requirements.txt +3 -3
  5. uv.lock +0 -0
README.md CHANGED
@@ -1,17 +1,17 @@
1
- ---
2
- title: De Novo Peptide Sequencing With InstaNovo and InstaNovo+
3
- emoji: πŸ“Š
4
- colorFrom: green
5
- colorTo: green
6
- sdk: gradio
7
- sdk_version: 5.23.1
8
- app_file: app.py
9
- pinned: true
10
- license: apache-2.0
11
- thumbnail: >-
12
- https://cdn-uploads.huggingface.co/production/uploads/6189aee17d9b289cdebafbd6/tb9e-8Z2_pDsRMkGglcvh.png
13
- short_description: Translate fragment ion peaks into sequence of amino acids
14
- ---
15
 
16
  # _De Novo_ Peptide Sequencing With InstaNovo and InstaNovo+
17
 
@@ -35,7 +35,6 @@ This Space provides a web interface for the [InstaNovo](https://github.com/insta
35
 
36
  This demo uses the pretrained model checkpoint.
37
 
38
- * Predictions use version `instanovo-v1.1.0` for the transformer-based InstaNovo model and version `instanovoplus-v1.1.0-alpha` for the diffusion-based InstaNovo+ model.
39
- * The InstaNovo+ model `instanovoplus-v1.1.0-alpha` is an alpha release.
40
 
41
  **Note:** Processing large files can take time, depending on the file size and the chosen decoding method. Knapsack generation can also add to the initial startup time.
 
1
+ ---
2
+ title: De Novo Peptide Sequencing With InstaNovo and InstaNovo+
3
+ emoji: πŸ“Š
4
+ colorFrom: green
5
+ colorTo: green
6
+ sdk: gradio
7
+ sdk_version: 5.23.1
8
+ app_file: app.py
9
+ pinned: true
10
+ license: apache-2.0
11
+ thumbnail: >-
12
+ https://cdn-uploads.huggingface.co/production/uploads/6189aee17d9b289cdebafbd6/tb9e-8Z2_pDsRMkGglcvh.png
13
+ short_description: Translate fragment ion peaks into sequence of amino acids
14
+ ---
15
 
16
  # _De Novo_ Peptide Sequencing With InstaNovo and InstaNovo+
17
 
 
35
 
36
  This demo uses the pretrained model checkpoint.
37
 
38
+ * Predictions use version `instanovo-v1.1.0` for the transformer-based InstaNovo model and version `instanovoplus-v1.1.0` for the diffusion-based InstaNovo+ model.
 
39
 
40
  **Note:** Processing large files can take time, depending on the file size and the chosen decoding method. Knapsack generation can also add to the initial startup time.
app.py CHANGED
@@ -38,7 +38,7 @@ except ImportError as e:
38
 
39
  # --- Configuration ---
40
  TRANSFORMER_MODEL_ID = "instanovo-v1.1.0"
41
- DIFFUSION_MODEL_ID = "instanovoplus-v1.1.0-alpha"
42
  KNAPSACK_DIR = Path("./knapsack_cache")
43
 
44
  # Determine device
@@ -111,10 +111,10 @@ def load_models_and_knapsack():
111
  INSTANOVOPLUS, INSTANOVOPLUS_CONFIG = InstaNovoPlus.from_pretrained(DIFFUSION_MODEL_ID)
112
  INSTANOVOPLUS.to(DEVICE)
113
  INSTANOVOPLUS.eval()
114
- if RESIDUE_SET is not None and INSTANOVOPLUS.residues != RESIDUE_SET:
115
  logger.warning("Residue sets between Transformer and Diffusion models differ. Using Transformer's set.")
116
  elif RESIDUE_SET is None:
117
- RESIDUE_SET = INSTANOVOPLUS.residues
118
 
119
  logger.info("Diffusion model loaded successfully.")
120
  except Exception as e:
@@ -581,7 +581,7 @@ def predict_peptides(input_file, mode_selection, transformer_decoder_selection):
581
  bin_spectra=config.get("conv_peak_encoder", False),
582
  peptide_pad_length=config.get("max_length", 40) if config.get("compile_model", False) else 0,
583
  reverse_peptide=reverse_for_transformer, # Key change based on mode
584
- diffusion="InstaNovo+ Only" in mode_selection # Signal if input is for diffusion
585
  )
586
  dl = DataLoader(ds, batch_size=config.batch_size, num_workers=0, shuffle=False, collate_fn=collate_batch)
587
 
@@ -728,7 +728,7 @@ with gr.Blocks(
728
  [
729
  "InstaNovo with InstaNovo+ refinement (Default, Recommended)",
730
  "InstaNovo Only (Transformer)",
731
- "InstaNovo+ Only (Diffusion, Alpha release)",
732
  ],
733
  label="Prediction Mode",
734
  value="InstaNovo with InstaNovo+ refinement (Default, Recommended)",
@@ -783,7 +783,7 @@ with gr.Blocks(
783
  ["assets/sample_spectra.mgf", "InstaNovo with InstaNovo+ refinement (Default, Recommended)", "Knapsack Beam Search (Accurate, Slower)"],
784
  ["assets/sample_spectra.mgf", "InstaNovo Only (Transformer)", "Greedy Search (Fast)"],
785
  ["assets/sample_spectra.mgf", "InstaNovo Only (Transformer)", "Knapsack Beam Search (Accurate, Slower)"],
786
- ["assets/sample_spectra.mgf", "InstaNovo+ Only (Diffusion, Alpha release)", ""],
787
  ],
788
  inputs=[input_file, mode_selection, transformer_decoder_selection],
789
  # outputs=[output_df, output_file],
@@ -793,11 +793,10 @@ with gr.Blocks(
793
 
794
  gr.Markdown(f"""**Notes:**
795
  * Predictions use version `{TRANSFORMER_MODEL_ID}` for the transformer-based InstaNovo model and version `{DIFFUSION_MODEL_ID}` for the diffusion-based InstaNovo+ model.
796
- * The InstaNovo+ model `{DIFFUSION_MODEL_ID}` is an alpha release.
797
  * **Predction Modes:**
798
  * **InstaNovo with InstaNovo+ refinement** Runs initial prediction with the selected Transformer method (Greedy/Knapsack), then refines using InstaNovo+.
799
  * **InstaNovo Only:** Uses only the Transformer with the selected decoding method.
800
- * **InstaNovo+ Only:** Predicts directly using the Diffusion model (alpha release).
801
  * **Transformer Decoding Methods:**
802
  * **Greedy Search:** use this for optimal performance, has similar performance as Knapsack Beam Search at 5% FDR.
803
  * **Knapsack Beam Search:** use this for the best results and highest peptide recall, but is about 10x slower than Greedy Search.
 
38
 
39
  # --- Configuration ---
40
  TRANSFORMER_MODEL_ID = "instanovo-v1.1.0"
41
+ DIFFUSION_MODEL_ID = "instanovoplus-v1.1.0"
42
  KNAPSACK_DIR = Path("./knapsack_cache")
43
 
44
  # Determine device
 
111
  INSTANOVOPLUS, INSTANOVOPLUS_CONFIG = InstaNovoPlus.from_pretrained(DIFFUSION_MODEL_ID)
112
  INSTANOVOPLUS.to(DEVICE)
113
  INSTANOVOPLUS.eval()
114
+ if RESIDUE_SET is not None and INSTANOVOPLUS.residue_set != RESIDUE_SET:
115
  logger.warning("Residue sets between Transformer and Diffusion models differ. Using Transformer's set.")
116
  elif RESIDUE_SET is None:
117
+ RESIDUE_SET = INSTANOVOPLUS.residue_set
118
 
119
  logger.info("Diffusion model loaded successfully.")
120
  except Exception as e:
 
581
  bin_spectra=config.get("conv_peak_encoder", False),
582
  peptide_pad_length=config.get("max_length", 40) if config.get("compile_model", False) else 0,
583
  reverse_peptide=reverse_for_transformer, # Key change based on mode
584
+ add_eos="InstaNovo+ Only" not in mode_selection # Signal if input is for diffusion
585
  )
586
  dl = DataLoader(ds, batch_size=config.batch_size, num_workers=0, shuffle=False, collate_fn=collate_batch)
587
 
 
728
  [
729
  "InstaNovo with InstaNovo+ refinement (Default, Recommended)",
730
  "InstaNovo Only (Transformer)",
731
+ "InstaNovo+ Only (Diffusion)",
732
  ],
733
  label="Prediction Mode",
734
  value="InstaNovo with InstaNovo+ refinement (Default, Recommended)",
 
783
  ["assets/sample_spectra.mgf", "InstaNovo with InstaNovo+ refinement (Default, Recommended)", "Knapsack Beam Search (Accurate, Slower)"],
784
  ["assets/sample_spectra.mgf", "InstaNovo Only (Transformer)", "Greedy Search (Fast)"],
785
  ["assets/sample_spectra.mgf", "InstaNovo Only (Transformer)", "Knapsack Beam Search (Accurate, Slower)"],
786
+ ["assets/sample_spectra.mgf", "InstaNovo+ Only (Diffusion)", ""],
787
  ],
788
  inputs=[input_file, mode_selection, transformer_decoder_selection],
789
  # outputs=[output_df, output_file],
 
793
 
794
  gr.Markdown(f"""**Notes:**
795
  * Predictions use version `{TRANSFORMER_MODEL_ID}` for the transformer-based InstaNovo model and version `{DIFFUSION_MODEL_ID}` for the diffusion-based InstaNovo+ model.
 
796
  * **Predction Modes:**
797
  * **InstaNovo with InstaNovo+ refinement** Runs initial prediction with the selected Transformer method (Greedy/Knapsack), then refines using InstaNovo+.
798
  * **InstaNovo Only:** Uses only the Transformer with the selected decoding method.
799
+ * **InstaNovo+ Only:** Predicts directly using the Diffusion model.
800
  * **Transformer Decoding Methods:**
801
  * **Greedy Search:** use this for optimal performance, has similar performance as Knapsack Beam Search at 5% FDR.
802
  * **Knapsack Beam Search:** use this for the best results and highest peptide recall, but is about 10x slower than Greedy Search.
pyproject.toml CHANGED
@@ -5,10 +5,10 @@ description = "InstaNovo Gradio App"
5
  readme = "README.md"
6
  requires-python = ">=3.12"
7
  dependencies = [
8
- "gradio>=5.23.1",
9
  "gradio-log>=0.0.8",
10
- "instanovo>=1.1.0",
11
- "spaces>=0.34.0",
12
  "torch==2.4.1",
13
  ]
14
 
 
5
  readme = "README.md"
6
  requires-python = ">=3.12"
7
  dependencies = [
8
+ "gradio>=5.33.1",
9
  "gradio-log>=0.0.8",
10
+ "instanovo[cu124]>=1.1.3",
11
+ "spaces>=0.37.0",
12
  "torch==2.4.1",
13
  ]
14
 
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- gradio>=5.23.1
2
  gradio-log>=0.0.8
3
- instanovo[cu124]>=1.1.1
4
- spaces>=0.34.0
 
1
+ gradio>=5.33.1
2
  gradio-log>=0.0.8
3
+ instanovo[cu124]>=1.1.3
4
+ spaces>=0.37.0
uv.lock CHANGED
The diff for this file is too large to render. See raw diff