Spaces:

krishnamishra006
/

jugaadutranslator

Runtime error

File size: 6,056 Bytes

import streamlit as st
from datetime import datetime
import os
import json
import requests
import uuid
import torch
import whisper
from transformers import pipeline

st.set_page_config(page_title="Jugaadu Translator", layout="centered")

SUPPORTED_LANGUAGES = {
    "Hindi": {
        "code": "hi",
        "translation_model": "Helsinki-NLP/opus-mt-hi-en",
        "whisper_language": "hi"
    },
    "Telugu": {
        "code": "te",
        "translation_model": "Helsinki-NLP/opus-mt-te-en",
        "whisper_language": "te"
    },
    "Sanskrit": {
        "code": "sa",
        "translation_model": "Helsinki-NLP/opus-mt-sa-en",
        "whisper_language": "sa"
    }
}

AUDIO_SAVE_DIR = "data/audio"
RECORDS_PATH = "data/records.json"
os.makedirs(AUDIO_SAVE_DIR, exist_ok=True)
os.makedirs("data", exist_ok=True)

# Use lightweight Whisper model for best HF Spaces compatibility!
@st.cache_resource(show_spinner="Loading Whisper model...")
def get_whisper_model():
    return whisper.load_model("tiny")

@st.cache_resource(show_spinner="Loading translation models...")
def get_translator(language):
    model_name = SUPPORTED_LANGUAGES[language]["translation_model"]
    return pipeline("translation", model=model_name)

@st.cache_resource(show_spinner="Loading summarizer...")
def get_summarizer():
    return pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")

def get_location():
    try:
        resp = requests.get("https://ipinfo.io/json", timeout=5)
        data = resp.json()
        loc_str = f"{data.get('city', '')}, {data.get('region', '')}, {data.get('country', '')}"
        return loc_str.strip(", ")
    except:
        return "Unknown Location"

def save_record(record):
    if os.path.exists(RECORDS_PATH):
        with open(RECORDS_PATH, "r", encoding="utf-8") as f:
            records = json.load(f)
    else:
        records = []
    records.append(record)
    with open(RECORDS_PATH, "w", encoding="utf-8") as f:
        json.dump(records, f, indent=2, ensure_ascii=False)

def show_records():
    if not os.path.exists(RECORDS_PATH):
        st.info("No contributions yet.")
        return
    with open(RECORDS_PATH, "r", encoding="utf-8") as f:
        records = json.load(f)
    st.subheader("Previous Contributions")
    for rec in reversed(records[-5:]):
        st.markdown(f"**User:** {rec['username']}  \n"
                    f"**Time:** {rec['timestamp']}  \n"
                    f"**Location:** {rec['location']}  \n"
                    f"**Title:** {rec['title']}")
        st.markdown(f"**Idiom:** {rec['input_text']}")
        st.markdown(f"**Translation:** {rec['translation']}")
        st.markdown(f"**Description:** {rec['description']}")
        if rec['audio_path'] and os.path.exists(rec['audio_path']):
            with open(rec['audio_path'], 'rb') as f_:
                st.audio(f_.read())
        st.markdown("---")

if "username" not in st.session_state:
    st.title("Jugaadu Translator 🧠")
    st.markdown("Enter a username to begin contributing to the idioms corpus.")
    username = st.text_input("Username (choose a unique handle)", max_chars=30)
    if st.button("Continue") and username:
        st.session_state["username"] = username.strip()
        st.success(f"Welcome, {username.strip()}! Proceed to record or type idioms.")
        st.experimental_rerun()
    st.stop()

st.title("Jugaadu Translator 🧠")
st.markdown(f"Hi, **{st.session_state['username']}**!")

col1, col2 = st.columns(2)
with col1:
    language = st.selectbox("Pick Idiom Language", list(SUPPORTED_LANGUAGES.keys()))
with col2:
    input_mode = st.radio("Input Type", ["Type", "Upload Voice"])

input_text = ""
audio_path = None

if input_mode == "Type":
    input_text = st.text_area("Type the idiom/dialect phrase:", height=100)
else:
    st.markdown("Upload a short voice note of your idiom (.wav, .mp3):")
    audio_file = st.file_uploader("Choose audio file", type=['wav', 'mp3'])
    if audio_file:
        uid = str(uuid.uuid4())
        # Preserve file extension
        audio_path = os.path.join(AUDIO_SAVE_DIR, f"{st.session_state['username']}_{uid}.{audio_file.name.split('.')[-1]}")
        with open(audio_path, "wb") as f:
            f.write(audio_file.read())
        st.success("Audio uploaded and saved.")
        asr_model = get_whisper_model()
        result = asr_model.transcribe(audio_path, language=SUPPORTED_LANGUAGES[language]['whisper_language'])
        input_text = result["text"]
        st.markdown("**Transcription:** " + input_text)

if st.button("Translate", disabled=not input_text.strip()):
    with st.spinner("Translating and generating summary..."):
        translator = get_translator(language)
        translation = translator(input_text)[0]['translation_text']
        summarizer = get_summarizer()
        try:
            desc = summarizer(translation, max_length=60, min_length=15, do_sample=False)[0]['summary_text']
        except Exception:
            desc = translation
        title = desc.split(".")[0][:40]
        location = get_location()
        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        record = {
            "username": st.session_state['username'],
            "input_text": input_text,
            "translation": translation,
            "audio_path": audio_path if audio_path else "",
            "title": title,
            "description": desc,
            "timestamp": timestamp,
            "location": location
        }
        save_record(record)
        st.success("Submission saved!")
        st.markdown(f"#### Title: {title}")
        st.markdown(f"**Translation:** {translation}")
        st.markdown(f"**Description:** {desc}")
        st.markdown(f"**Location:** {location}")
        if audio_path and os.path.exists(audio_path):
            with open(audio_path, 'rb') as f:
                st.audio(f.read())
        st.balloons()

st.markdown("---")
show_records()
st.markdown("---")
st.markdown("**All data stays local! You can find files inside `data/` for research use. No cloud required.**")