Spaces:
Running
Running
File size: 4,212 Bytes
1b330b0 f4dc23c 3037415 f4dc23c 1b330b0 3037415 0ce2cdb 0a11aca 1b330b0 9911c68 1b330b0 9911c68 1b330b0 f4dc23c d72a49d 9911c68 1b330b0 f4dc23c 1b330b0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
import streamlit as st
from utils import extract_text, anonymize_text, score_synopsis
from llama_cpp import Llama
import os
from huggingface_hub import snapshot_download
from huggingface_hub import login
st.set_page_config(page_title="Synopsis Scorer", layout="wide")
# --- Access Control ---
TOKEN = st.secrets.get("access_token")
user_token = st.text_input("Enter Access Token to Continue", type="password")
if user_token != TOKEN:
st.warning("Please enter a valid access token.")
st.stop()
# --- Hugging Face Token Configuration ---
hf_token = st.secrets.get("hf_token") if "hf_token" in st.secrets else os.environ.get("HF_TOKEN")
if not hf_token and not os.path.exists("models/gemma-3-4b-it-q4_0.gguf"):
st.warning("Hugging Face token not found. Please add it to your secrets or environment variables.")
hf_token = st.text_input("Enter your Hugging Face token:", type="password")
login(hf_token)
print("Looking for model at:", os.path.abspath("gemma-3-4b-it-q4_0.gguf"))
# Choose a directory to store the model
model_dir = "./gemma-3-4b-it-qat-q4_0"
# Download the GGUF model
snapshot_download(
repo_id="google/gemma-3-4b-it-qat-q4_0-gguf",
local_dir=model_dir,
local_dir_use_symlinks=False # Ensures real files are written, not symlinks
)
# --- File Upload ---
st.title("📘 Synopsis Scorer with Privacy Protection")
article_file = st.file_uploader("Upload the Article (.pdf/.txt)", type=["pdf", "txt"])
synopsis_file = st.file_uploader("Upload the Synopsis (.txt)", type=["txt"])
if article_file and synopsis_file:
with st.spinner("Reading files..."):
article = extract_text(article_file)
synopsis = extract_text(synopsis_file)
st.subheader("Preview")
st.text_area("Article", article[:1000] + "...", height=200)
st.text_area("Synopsis", synopsis, height=150)
if st.button("Evaluate"):
with st.spinner("Scoring..."):
scores = score_synopsis(article, synopsis)
# Anonymization
article_anon = anonymize_text(article)
synopsis_anon = anonymize_text(synopsis)
# Estimate n_ctx
total_text = article_anon + synopsis_anon
estimated_tokens = int(len(total_text)/3.5)
n_ctx = estimated_tokens + 500
article_limit = 80000 # max_article_chars = 32,000 tokens×3.5 (approx_chars_per_token)≈112,000 characters; 112,000 - 32000(space for synopsis)= 80000
# LLM feedback
try:
llm = Llama(
model_path="./gemma-3-4b-it-qat-q4_0/gemma-3-4b-it-q4_0.gguf",
n_ctx=n_ctx,
n_threads=2,
n_batch=128
)
prompt = (
"You are an expert writing evaluator. The user has uploaded two text documents: "
"1) a short synopsis, and 2) a longer article (source content). "
"Without copying or storing the full content, analyze the synopsis and evaluate its quality in comparison to the article. "
"Assess it on the basis of relevance, coverage, clarity, and coherence.\n\n"
"Return:\n- A score out of 100\n- 2 to 3 lines of qualitative feedback\n\n"
f"Here is the source article:\n{article_anon[:article_limit]}\n\nHere is the synopsis:\n{synopsis_anon}"
)
result = llm.create_chat_completion(messages=[{"role": "user", "content": [{"type": "text", "text": prompt}]}])
feedback = result["choices"][0]["message"]["content"]
except Exception as e:
feedback = "LLM feedback not available: " + str(e)
st.success("Evaluation Complete ✅")
st.metric("Total Score", f"{scores['total']} / 100")
st.progress(scores["total"] / 100)
st.subheader("Score Breakdown")
st.write(f"📘 Content Coverage: {scores['content_coverage']} / 50")
st.write(f"🧠 Clarity: {scores['clarity']} / 25")
st.write(f"🔗 Coherence: {scores['coherence']} / 25")
st.subheader("LLM Feedback")
st.write(feedback)
|