Spaces:
Running
Running
File size: 2,693 Bytes
c1f1ea8 326c06d cafd975 af773f6 326c06d e4049b2 ea4ca8c e4049b2 72ae7f9 e4049b2 72ae7f9 e4049b2 72ae7f9 e4049b2 72ae7f9 e4049b2 72ae7f9 98d3cfd 72ae7f9 ea4ca8c 7da589d c1f1ea8 ea4ca8c e4049b2 c1f1ea8 ea4ca8c 1357470 326c06d c1f1ea8 ea4ca8c 72ae7f9 839b6bc e4049b2 cafd975 839b6bc ea4ca8c c1f1ea8 1357470 c1f1ea8 98d3cfd e4049b2 c1f1ea8 cafd975 ab6ae03 c1f1ea8 3fcfcd1 ea4ca8c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
import os
import gradio as gr
import pandas as pd
from optimizer import optimization
BASE_DIR = os.path.dirname(__file__)
SUMMARY_PATH = os.path.join(BASE_DIR, "region_sweep_summary.csv")
AA_ORDER = list("ACDEFGHIKLMNPQRSTVWY*") # optional: desired row order
AA_ALLOWED = set(AA_ORDER)
def aa_percent_to_onecol_df(aa_percent: dict, digits: int = 0) -> pd.DataFrame:
"""
Build a 2-column table: AA | Codon mix
Example cell: 'CCC (97%) - CCG (3%)'
"""
rows = []
order = AA_ORDER if set(aa_percent).issubset(set(AA_ORDER)) else sorted(aa_percent)
for aa in order:
mix = aa_percent.get(aa, {})
if not mix:
rows.append([aa, "—"])
continue
parts = sorted(mix.items(), key=lambda kv: (-kv[1], kv[0]))
cell = " - ".join(f"{cod} ({val*100:.{digits}f}%)" for cod, val in parts)
rows.append([aa, cell])
return pd.DataFrame(rows, columns=["AA", "Codon percentage"])
def _clean_aa_seq(raw: str) -> str:
"""
Upper-case, remove whitespace, and drop any character not in AA_ORDER.
Allows '*' (stop) because it's included in AA_ORDER.
"""
s = (raw or "").upper()
# keep only valid AAs; this also drops spaces, digits, punctuation, etc.
cleaned = "".join(ch for ch in s if ch in AA_ALLOWED)
return cleaned
def run(aa_seq, use_percent_intervals):
# 1) sanitize the AA input
cleaned = _clean_aa_seq(aa_seq)
# 2) guard: empty after cleaning
if not cleaned:
# Gradio-friendly error (shows as a toast / modal in Spaces)
raise gr.Error("Input sequence contains no valid amino-acid characters after cleaning.")
# 3) proceed with your main function
designed_nt, aa_percent, gc_percent, _ = optimization(
summary_path=SUMMARY_PATH,
aa_seq=cleaned, # pass the cleaned AA sequence
use_percent_intervals=True,
)
# 4) build tables
aa_table = aa_percent_to_onecol_df(aa_percent, digits=0)
if not isinstance(gc_percent, pd.DataFrame):
gc_percent = pd.DataFrame(gc_percent)
return designed_nt, aa_table, gc_percent
# ---- Gradio Interface ----
iface = gr.Interface(
fn=run,
inputs=[
gr.Textbox(label="Amino Acid Sequence", lines=5, placeholder="e.g. MKKLLPTAA...")
],
outputs=[
gr.Textbox(label="Optimized Nucleotide Sequence"),
gr.Dataframe(label="Codon Usage Percent (per AA)", wrap=True),
gr.Dataframe(label="GC Content (%)", wrap=True),
],
title="Codon Optimizer",
flagging_mode="never",
)
if __name__ == "__main__":
# queue() is nice for HF Spaces concurrency, but optional
# iface.queue().launch()
iface.launch()
|