Spaces:

farshidk
/

codon-optimizer

Running

App Files Files Community

codon-optimizer / app.py

farshidk

Update app.py

1357470 verified 3 days ago

raw

history blame contribute delete

2.69 kB

	import os
	import gradio as gr
	import pandas as pd
	from optimizer import optimization

	BASE_DIR = os.path.dirname(__file__)
	SUMMARY_PATH = os.path.join(BASE_DIR, "region_sweep_summary.csv")

	AA_ORDER = list("ACDEFGHIKLMNPQRSTVWY*") # optional: desired row order
	AA_ALLOWED = set(AA_ORDER)

	def aa_percent_to_onecol_df(aa_percent: dict, digits: int = 0) -> pd.DataFrame:
	"""
	Build a 2-column table: AA \| Codon mix
	Example cell: 'CCC (97%) - CCG (3%)'
	"""
	rows = []
	order = AA_ORDER if set(aa_percent).issubset(set(AA_ORDER)) else sorted(aa_percent)
	for aa in order:
	mix = aa_percent.get(aa, {})
	if not mix:
	rows.append([aa, "—"])
	continue
	parts = sorted(mix.items(), key=lambda kv: (-kv[1], kv[0]))
	cell = " - ".join(f"{cod} ({val*100:.{digits}f}%)" for cod, val in parts)
	rows.append([aa, cell])
	return pd.DataFrame(rows, columns=["AA", "Codon percentage"])

	def _clean_aa_seq(raw: str) -> str:
	"""
	Upper-case, remove whitespace, and drop any character not in AA_ORDER.
	Allows '*' (stop) because it's included in AA_ORDER.
	"""
	s = (raw or "").upper()
	# keep only valid AAs; this also drops spaces, digits, punctuation, etc.
	cleaned = "".join(ch for ch in s if ch in AA_ALLOWED)
	return cleaned

	def run(aa_seq, use_percent_intervals):
	# 1) sanitize the AA input
	cleaned = _clean_aa_seq(aa_seq)

	# 2) guard: empty after cleaning
	if not cleaned:
	# Gradio-friendly error (shows as a toast / modal in Spaces)
	raise gr.Error("Input sequence contains no valid amino-acid characters after cleaning.")

	# 3) proceed with your main function
	designed_nt, aa_percent, gc_percent, _ = optimization(
	summary_path=SUMMARY_PATH,
	aa_seq=cleaned, # pass the cleaned AA sequence
	use_percent_intervals=True,
	)

	# 4) build tables
	aa_table = aa_percent_to_onecol_df(aa_percent, digits=0)

	if not isinstance(gc_percent, pd.DataFrame):
	gc_percent = pd.DataFrame(gc_percent)

	return designed_nt, aa_table, gc_percent

	# ---- Gradio Interface ----
	iface = gr.Interface(
	fn=run,
	inputs=[
	gr.Textbox(label="Amino Acid Sequence", lines=5, placeholder="e.g. MKKLLPTAA...")
	],
	outputs=[
	gr.Textbox(label="Optimized Nucleotide Sequence"),
	gr.Dataframe(label="Codon Usage Percent (per AA)", wrap=True),
	gr.Dataframe(label="GC Content (%)", wrap=True),
	],
	title="Codon Optimizer",
	flagging_mode="never",
	)

	if __name__ == "__main__":
	# queue() is nice for HF Spaces concurrency, but optional
	# iface.queue().launch()
	iface.launch()