Spaces:
Runtime error
Runtime error
import requests | |
from bs4 import BeautifulSoup | |
from sentence_transformers import SentenceTransformer, util | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
import torch | |
device = torch.device("cpu") | |
model_id = "TheBloke/Mistral-7B-Instruct-v0.1" # replace if needed | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
llm = AutoModelForCausalLM.from_pretrained(model_id).to(device) | |
embedder = SentenceTransformer('all-MiniLM-L6-v2') | |
def search_and_summarize(query, max_papers=5): | |
url = "https://arxiv.org/rss/cs.AI" | |
res = requests.get(url) | |
soup = BeautifulSoup(res.text, 'xml') | |
items = soup.find_all('item') | |
papers = [] | |
for item in items: | |
title = item.title.text | |
abstract = item.description.text | |
link = item.link.text | |
papers.append({'title': title, 'abstract': abstract, 'link': link}) | |
# embed & find top matches | |
query_emb = embedder.encode(query) | |
paper_embs = embedder.encode([p['abstract'] for p in papers]) | |
sims = util.cos_sim(query_emb, paper_embs)[0] | |
top_idx = sims.argsort(descending=True)[:max_papers] | |
results = [] | |
for idx in top_idx: | |
paper = papers[idx] | |
context = f"Title: {paper['title']}\nAbstract: {paper['abstract']}" | |
prompt = f"{context}\n\nExplain this paper in simple terms for an AI researcher:" | |
inputs = tokenizer(prompt, return_tensors="pt").to(device) | |
outputs = llm.generate(**inputs, max_new_tokens=200) | |
explanation = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
explanation = explanation[len(prompt):].strip() | |
results.append({ | |
'title': paper['title'], | |
'summary': explanation, | |
'link': paper['link'] | |
}) | |
return results |