Spaces:

InstaDeepAI
/

InstaNovo

Running on Zero

App Files Files Community

BioGeek commited on 27 days ago

Commit

802d9be

1 Parent(s): a068c4e

Upgrade to InstaNovo v1.1.3 with new diffusion checkpoint

Browse files

Files changed (5) hide show

README.md +15 -16
app.py +7 -8
pyproject.toml +3 -3
requirements.txt +3 -3
uv.lock +0 -0

README.md CHANGED Viewed

@@ -1,17 +1,17 @@
----
-title: De Novo Peptide Sequencing With InstaNovo and InstaNovo+
-emoji: 📊
-colorFrom: green
-colorTo: green
-sdk: gradio
-sdk_version: 5.23.1
-app_file: app.py
-pinned: true
-license: apache-2.0
-thumbnail: >-
-  https://cdn-uploads.huggingface.co/production/uploads/6189aee17d9b289cdebafbd6/tb9e-8Z2_pDsRMkGglcvh.png
-short_description: Translate fragment ion peaks into sequence of amino acids
----
 # _De Novo_ Peptide Sequencing With InstaNovo and InstaNovo+
@@ -35,7 +35,6 @@ This Space provides a web interface for the [InstaNovo](https://github.com/insta
 This demo uses the  pretrained model checkpoint.
-* Predictions use version `instanovo-v1.1.0` for the transformer-based InstaNovo model and version `instanovoplus-v1.1.0-alpha` for the diffusion-based InstaNovo+ model.
-* The InstaNovo+ model `instanovoplus-v1.1.0-alpha` is an alpha release.
 **Note:** Processing large files can take time, depending on the file size and the chosen decoding method. Knapsack generation can also add to the initial startup time.

+---
+title: De Novo Peptide Sequencing With InstaNovo and InstaNovo+
+emoji: 📊
+colorFrom: green
+colorTo: green
+sdk: gradio
+sdk_version: 5.23.1
+app_file: app.py
+pinned: true
+license: apache-2.0
+thumbnail: >-
+  https://cdn-uploads.huggingface.co/production/uploads/6189aee17d9b289cdebafbd6/tb9e-8Z2_pDsRMkGglcvh.png
+short_description: Translate fragment ion peaks into sequence of amino acids
+---
 # _De Novo_ Peptide Sequencing With InstaNovo and InstaNovo+
 This demo uses the  pretrained model checkpoint.
+* Predictions use version `instanovo-v1.1.0` for the transformer-based InstaNovo model and version `instanovoplus-v1.1.0` for the diffusion-based InstaNovo+ model.
 **Note:** Processing large files can take time, depending on the file size and the chosen decoding method. Knapsack generation can also add to the initial startup time.

app.py CHANGED Viewed

@@ -38,7 +38,7 @@ except ImportError as e:
 # --- Configuration ---
 TRANSFORMER_MODEL_ID = "instanovo-v1.1.0"
-DIFFUSION_MODEL_ID = "instanovoplus-v1.1.0-alpha"
 KNAPSACK_DIR = Path("./knapsack_cache")
 # Determine device
@@ -111,10 +111,10 @@ def load_models_and_knapsack():
             INSTANOVOPLUS, INSTANOVOPLUS_CONFIG = InstaNovoPlus.from_pretrained(DIFFUSION_MODEL_ID)
             INSTANOVOPLUS.to(DEVICE)
             INSTANOVOPLUS.eval()
-            if RESIDUE_SET is not None and INSTANOVOPLUS.residues != RESIDUE_SET:
                  logger.warning("Residue sets between Transformer and Diffusion models differ. Using Transformer's set.")
             elif RESIDUE_SET is None:
-                 RESIDUE_SET = INSTANOVOPLUS.residues
             logger.info("Diffusion model loaded successfully.")
         except Exception as e:
@@ -581,7 +581,7 @@ def predict_peptides(input_file, mode_selection, transformer_decoder_selection):
             bin_spectra=config.get("conv_peak_encoder", False),
             peptide_pad_length=config.get("max_length", 40) if config.get("compile_model", False) else 0,
             reverse_peptide=reverse_for_transformer, # Key change based on mode
-            diffusion="InstaNovo+ Only" in mode_selection # Signal if input is for diffusion
         )
         dl = DataLoader(ds, batch_size=config.batch_size, num_workers=0, shuffle=False, collate_fn=collate_batch)
@@ -728,7 +728,7 @@ with gr.Blocks(
                 [
                     "InstaNovo with InstaNovo+ refinement (Default, Recommended)",
                     "InstaNovo Only (Transformer)",
-                    "InstaNovo+ Only (Diffusion, Alpha release)",
                 ],
                 label="Prediction Mode",
                 value="InstaNovo with InstaNovo+ refinement (Default, Recommended)",
@@ -783,7 +783,7 @@ with gr.Blocks(
             ["assets/sample_spectra.mgf", "InstaNovo with InstaNovo+ refinement (Default, Recommended)", "Knapsack Beam Search (Accurate, Slower)"],
             ["assets/sample_spectra.mgf", "InstaNovo Only (Transformer)", "Greedy Search (Fast)"],
             ["assets/sample_spectra.mgf", "InstaNovo Only (Transformer)", "Knapsack Beam Search (Accurate, Slower)"],
-            ["assets/sample_spectra.mgf", "InstaNovo+ Only (Diffusion, Alpha release)", ""],
         ],
         inputs=[input_file, mode_selection, transformer_decoder_selection],
         # outputs=[output_df, output_file],
@@ -793,11 +793,10 @@ with gr.Blocks(
     gr.Markdown(f"""**Notes:**
          *   Predictions use version `{TRANSFORMER_MODEL_ID}` for the transformer-based InstaNovo model and version `{DIFFUSION_MODEL_ID}` for the diffusion-based InstaNovo+ model.
-         *   The InstaNovo+ model `{DIFFUSION_MODEL_ID}` is an alpha release.
          * **Predction Modes:**
              *   **InstaNovo with InstaNovo+ refinement** Runs initial prediction with the selected Transformer method (Greedy/Knapsack), then refines using InstaNovo+.
              *   **InstaNovo Only:** Uses only the Transformer with the selected decoding method.
-             *   **InstaNovo+ Only:** Predicts directly using the Diffusion model (alpha release).
         * **Transformer Decoding Methods:**
              *   **Greedy Search:** use this for optimal performance, has similar performance as Knapsack Beam Search at 5% FDR.
              *   **Knapsack Beam Search:** use this for the best results and highest peptide recall, but is about 10x slower than Greedy Search.

 # --- Configuration ---
 TRANSFORMER_MODEL_ID = "instanovo-v1.1.0"
+DIFFUSION_MODEL_ID = "instanovoplus-v1.1.0"
 KNAPSACK_DIR = Path("./knapsack_cache")
 # Determine device
             INSTANOVOPLUS, INSTANOVOPLUS_CONFIG = InstaNovoPlus.from_pretrained(DIFFUSION_MODEL_ID)
             INSTANOVOPLUS.to(DEVICE)
             INSTANOVOPLUS.eval()
+            if RESIDUE_SET is not None and INSTANOVOPLUS.residue_set != RESIDUE_SET:
                  logger.warning("Residue sets between Transformer and Diffusion models differ. Using Transformer's set.")
             elif RESIDUE_SET is None:
+                 RESIDUE_SET = INSTANOVOPLUS.residue_set
             logger.info("Diffusion model loaded successfully.")
         except Exception as e:
             bin_spectra=config.get("conv_peak_encoder", False),
             peptide_pad_length=config.get("max_length", 40) if config.get("compile_model", False) else 0,
             reverse_peptide=reverse_for_transformer, # Key change based on mode
+            add_eos="InstaNovo+ Only" not in mode_selection # Signal if input is for diffusion
         )
         dl = DataLoader(ds, batch_size=config.batch_size, num_workers=0, shuffle=False, collate_fn=collate_batch)
                 [
                     "InstaNovo with InstaNovo+ refinement (Default, Recommended)",
                     "InstaNovo Only (Transformer)",
+                    "InstaNovo+ Only (Diffusion)",
                 ],
                 label="Prediction Mode",
                 value="InstaNovo with InstaNovo+ refinement (Default, Recommended)",
             ["assets/sample_spectra.mgf", "InstaNovo with InstaNovo+ refinement (Default, Recommended)", "Knapsack Beam Search (Accurate, Slower)"],
             ["assets/sample_spectra.mgf", "InstaNovo Only (Transformer)", "Greedy Search (Fast)"],
             ["assets/sample_spectra.mgf", "InstaNovo Only (Transformer)", "Knapsack Beam Search (Accurate, Slower)"],
+            ["assets/sample_spectra.mgf", "InstaNovo+ Only (Diffusion)", ""],
         ],
         inputs=[input_file, mode_selection, transformer_decoder_selection],
         # outputs=[output_df, output_file],
     gr.Markdown(f"""**Notes:**
          *   Predictions use version `{TRANSFORMER_MODEL_ID}` for the transformer-based InstaNovo model and version `{DIFFUSION_MODEL_ID}` for the diffusion-based InstaNovo+ model.
          * **Predction Modes:**
              *   **InstaNovo with InstaNovo+ refinement** Runs initial prediction with the selected Transformer method (Greedy/Knapsack), then refines using InstaNovo+.
              *   **InstaNovo Only:** Uses only the Transformer with the selected decoding method.
+             *   **InstaNovo+ Only:** Predicts directly using the Diffusion model.
         * **Transformer Decoding Methods:**
              *   **Greedy Search:** use this for optimal performance, has similar performance as Knapsack Beam Search at 5% FDR.
              *   **Knapsack Beam Search:** use this for the best results and highest peptide recall, but is about 10x slower than Greedy Search.

pyproject.toml CHANGED Viewed

@@ -5,10 +5,10 @@ description = "InstaNovo Gradio App"
 readme = "README.md"
 requires-python = ">=3.12"
 dependencies = [
-    "gradio>=5.23.1",
     "gradio-log>=0.0.8",
-    "instanovo>=1.1.0",
-    "spaces>=0.34.0",
     "torch==2.4.1",
 ]

 readme = "README.md"
 requires-python = ">=3.12"
 dependencies = [
+    "gradio>=5.33.1",
     "gradio-log>=0.0.8",
+    "instanovo[cu124]>=1.1.3",
+    "spaces>=0.37.0",
     "torch==2.4.1",
 ]

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-gradio>=5.23.1
 gradio-log>=0.0.8
-instanovo[cu124]>=1.1.1
-spaces>=0.34.0

+gradio>=5.33.1
 gradio-log>=0.0.8
+instanovo[cu124]>=1.1.3
+spaces>=0.37.0

uv.lock CHANGED Viewed

The diff for this file is too large to render. See raw diff