naveenus's picture
Update app.py
94dcdac verified
import os, re
from googleapiclient.discovery import build
from sentence_transformers import SentenceTransformer, util
import gradio as gr
# YouTube API key
YT_API_KEY = os.environ.get("YOUTUBE_API_KEY")
if not YT_API_KEY:
raise ValueError("Set YOUTUBE_API_KEY in Settings → Secrets")
youtube = build("youtube", "v3", developerKey=YT_API_KEY)
MODEL_NAMES = [
"sentence-transformers/all-MiniLM-L6-v2",
"sentence-transformers/multi-qa-MiniLM-L6-cos-v1",
"sentence-transformers/paraphrase-MiniLM-L3-v2",
"sentence-transformers/all-mpnet-base-v2",
"sentence-transformers/distilbert-base-nli-mean-tokens",
]
models = [SentenceTransformer(name) for name in MODEL_NAMES]
def extract_video_id(url):
patterns = [ r"v=([A-Za-z0-9_-]{11})", r"youtu\.be/([A-Za-z0-9_-]{11})" ]
for p in patterns:
m = re.search(p, url)
if m: return m.group(1)
raise ValueError("Invalid YouTube URL")
def fetch_metadata(video_url):
vid = extract_video_id(video_url)
resp = youtube.videos().list(part="snippet", id=vid).execute()
items = resp.get("items", [])
if not items: raise ValueError("Video not found")
snip = items[0]["snippet"]
return snip.get("title",""), snip.get("description","")
def compute_score(video_url, goal):
title, desc = fetch_metadata(video_url)
text = title + "\n\n" + desc
scores = []
for m in models:
emb_text = m.encode(text, convert_to_tensor=True, normalize_embeddings=True)
emb_goal = m.encode(goal, convert_to_tensor=True, normalize_embeddings=True)
cos_sim = util.cos_sim(emb_text, emb_goal).item()
pct = int((cos_sim + 1) * 50)
scores.append(max(0, min(100, pct)))
return int(round(sum(scores)/len(scores)))
iface = gr.Interface(
fn=compute_score,
inputs=[ gr.Textbox(label="YouTube URL"), gr.Textbox(label="Your Goal") ],
outputs=gr.Number(label="Score 0–100"),
description="Average of 5 sentence-transformer models"
)
if __name__=="__main__":
iface.launch(server_name="0.0.0.0", server_port=7860, mcp_server=True)