Spaces:
Runtime error
Runtime error
File size: 6,056 Bytes
bfb1c73 f225ee6 bfb1c73 f225ee6 bfb1c73 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 |
import streamlit as st
from datetime import datetime
import os
import json
import requests
import uuid
import torch
import whisper
from transformers import pipeline
st.set_page_config(page_title="Jugaadu Translator", layout="centered")
SUPPORTED_LANGUAGES = {
"Hindi": {
"code": "hi",
"translation_model": "Helsinki-NLP/opus-mt-hi-en",
"whisper_language": "hi"
},
"Telugu": {
"code": "te",
"translation_model": "Helsinki-NLP/opus-mt-te-en",
"whisper_language": "te"
},
"Sanskrit": {
"code": "sa",
"translation_model": "Helsinki-NLP/opus-mt-sa-en",
"whisper_language": "sa"
}
}
AUDIO_SAVE_DIR = "data/audio"
RECORDS_PATH = "data/records.json"
os.makedirs(AUDIO_SAVE_DIR, exist_ok=True)
os.makedirs("data", exist_ok=True)
# Use lightweight Whisper model for best HF Spaces compatibility!
@st.cache_resource(show_spinner="Loading Whisper model...")
def get_whisper_model():
return whisper.load_model("tiny")
@st.cache_resource(show_spinner="Loading translation models...")
def get_translator(language):
model_name = SUPPORTED_LANGUAGES[language]["translation_model"]
return pipeline("translation", model=model_name)
@st.cache_resource(show_spinner="Loading summarizer...")
def get_summarizer():
return pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
def get_location():
try:
resp = requests.get("https://ipinfo.io/json", timeout=5)
data = resp.json()
loc_str = f"{data.get('city', '')}, {data.get('region', '')}, {data.get('country', '')}"
return loc_str.strip(", ")
except:
return "Unknown Location"
def save_record(record):
if os.path.exists(RECORDS_PATH):
with open(RECORDS_PATH, "r", encoding="utf-8") as f:
records = json.load(f)
else:
records = []
records.append(record)
with open(RECORDS_PATH, "w", encoding="utf-8") as f:
json.dump(records, f, indent=2, ensure_ascii=False)
def show_records():
if not os.path.exists(RECORDS_PATH):
st.info("No contributions yet.")
return
with open(RECORDS_PATH, "r", encoding="utf-8") as f:
records = json.load(f)
st.subheader("Previous Contributions")
for rec in reversed(records[-5:]):
st.markdown(f"**User:** {rec['username']} \n"
f"**Time:** {rec['timestamp']} \n"
f"**Location:** {rec['location']} \n"
f"**Title:** {rec['title']}")
st.markdown(f"**Idiom:** {rec['input_text']}")
st.markdown(f"**Translation:** {rec['translation']}")
st.markdown(f"**Description:** {rec['description']}")
if rec['audio_path'] and os.path.exists(rec['audio_path']):
with open(rec['audio_path'], 'rb') as f_:
st.audio(f_.read())
st.markdown("---")
if "username" not in st.session_state:
st.title("Jugaadu Translator 🧠")
st.markdown("Enter a username to begin contributing to the idioms corpus.")
username = st.text_input("Username (choose a unique handle)", max_chars=30)
if st.button("Continue") and username:
st.session_state["username"] = username.strip()
st.success(f"Welcome, {username.strip()}! Proceed to record or type idioms.")
st.experimental_rerun()
st.stop()
st.title("Jugaadu Translator 🧠")
st.markdown(f"Hi, **{st.session_state['username']}**!")
col1, col2 = st.columns(2)
with col1:
language = st.selectbox("Pick Idiom Language", list(SUPPORTED_LANGUAGES.keys()))
with col2:
input_mode = st.radio("Input Type", ["Type", "Upload Voice"])
input_text = ""
audio_path = None
if input_mode == "Type":
input_text = st.text_area("Type the idiom/dialect phrase:", height=100)
else:
st.markdown("Upload a short voice note of your idiom (.wav, .mp3):")
audio_file = st.file_uploader("Choose audio file", type=['wav', 'mp3'])
if audio_file:
uid = str(uuid.uuid4())
# Preserve file extension
audio_path = os.path.join(AUDIO_SAVE_DIR, f"{st.session_state['username']}_{uid}.{audio_file.name.split('.')[-1]}")
with open(audio_path, "wb") as f:
f.write(audio_file.read())
st.success("Audio uploaded and saved.")
asr_model = get_whisper_model()
result = asr_model.transcribe(audio_path, language=SUPPORTED_LANGUAGES[language]['whisper_language'])
input_text = result["text"]
st.markdown("**Transcription:** " + input_text)
if st.button("Translate", disabled=not input_text.strip()):
with st.spinner("Translating and generating summary..."):
translator = get_translator(language)
translation = translator(input_text)[0]['translation_text']
summarizer = get_summarizer()
try:
desc = summarizer(translation, max_length=60, min_length=15, do_sample=False)[0]['summary_text']
except Exception:
desc = translation
title = desc.split(".")[0][:40]
location = get_location()
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
record = {
"username": st.session_state['username'],
"input_text": input_text,
"translation": translation,
"audio_path": audio_path if audio_path else "",
"title": title,
"description": desc,
"timestamp": timestamp,
"location": location
}
save_record(record)
st.success("Submission saved!")
st.markdown(f"#### Title: {title}")
st.markdown(f"**Translation:** {translation}")
st.markdown(f"**Description:** {desc}")
st.markdown(f"**Location:** {location}")
if audio_path and os.path.exists(audio_path):
with open(audio_path, 'rb') as f:
st.audio(f.read())
st.balloons()
st.markdown("---")
show_records()
st.markdown("---")
st.markdown("**All data stays local! You can find files inside `data/` for research use. No cloud required.**")
|