|
import requests |
|
from collections import Counter |
|
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification |
|
import re |
|
import gradio as gr |
|
|
|
|
|
model_name = "hamzab/roberta-fake-news-classification" |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
model = AutoModelForSequenceClassification.from_pretrained(model_name) |
|
classifier = pipeline("text-classification", model=model, tokenizer=tokenizer) |
|
|
|
|
|
GOOGLE_API_KEY = "AIzaSyCKnDlhWih34GdCuheNusnrEw_YE_q6GWQ" |
|
NEWSAPI_KEY = "fcb304918fce4fb29b17b6c95dbc7518" |
|
|
|
|
|
def extract_keywords(text, max_words=5): |
|
words = re.findall(r'\b\w+\b', text.lower()) |
|
stopwords = { |
|
"the", "and", "is", "in", "to", "of", "a", "on", "for", "with", |
|
"as", "by", "at", "an", "be", "are", "from", "this", "it", "that" |
|
} |
|
filtered = [w for w in words if w not in stopwords and len(w) > 2] |
|
return " ".join([w for w, _ in Counter(filtered).most_common(max_words)]) |
|
|
|
|
|
def check_google_fact_check(query): |
|
try: |
|
response = requests.get( |
|
"https://factchecktools.googleapis.com/v1alpha1/claims:search", |
|
params={"query": query, "key": GOOGLE_API_KEY} |
|
).json() |
|
if "claims" in response and response["claims"]: |
|
result = "" |
|
for claim in response["claims"][:3]: |
|
text = claim.get("text", "") |
|
review = claim.get("claimReview", [{}])[0] |
|
rating = review.get("textualRating", "Unrated") |
|
source = review.get("publisher", {}).get("name", "Unknown") |
|
url = review.get("url", "") |
|
result += f"β
Claim: {text}\nπ Verdict: {rating}\nπ° Source: {source}\nπ {url}\n\n" |
|
return result.strip() |
|
return None |
|
except Exception as e: |
|
return f"β Google API error: {e}" |
|
|
|
|
|
def search_newsapi(query): |
|
try: |
|
response = requests.get("https://newsapi.org/v2/everything", params={ |
|
"q": query, "apiKey": NEWSAPI_KEY, "language": "en", "sortBy": "relevancy", "pageSize": 3 |
|
}).json() |
|
if response.get("status") != "ok": |
|
return f"β NewsAPI error: {response.get('message')}" |
|
articles = response.get("articles", []) |
|
if not articles: |
|
return "βΉοΈ No similar real news articles found." |
|
output = "π° No official fact-check found.\n\nBut here are similar real news articles:\n\n" |
|
for article in articles: |
|
title = article.get("title", "No title") |
|
source = article.get("source", {}).get("name", "Unknown") |
|
url = article.get("url", "#") |
|
output += f"β’ π° Source: {source}\n π Title: {title}\n π {url}\n\n" |
|
return output.strip() |
|
except Exception as e: |
|
return f"β NewsAPI error: {e}" |
|
|
|
|
|
def fake_news_chatbot(message, history): |
|
try: |
|
prediction = classifier(message)[0] |
|
label = prediction['label'] |
|
confidence = round(prediction['score'], 2) |
|
|
|
if confidence < 0.7: |
|
verdict = "π‘ UNSURE" |
|
elif label == "FAKE": |
|
verdict = "π₯ FAKE" |
|
else: |
|
verdict = "π© REAL" |
|
|
|
ml_result = f"π ML Prediction: {verdict}\nπ’ Confidence: {confidence}" |
|
|
|
keywords = extract_keywords(message) |
|
fact_result = check_google_fact_check(keywords) |
|
if fact_result: |
|
return f"{ml_result}\n\nπ Keywords Used: {keywords}\n\n{fact_result}" |
|
|
|
newsapi_result = search_newsapi(keywords) |
|
if "π° Source:" in newsapi_result and verdict == "π₯ FAKE": |
|
ml_result += " β οΈ ML says FAKE but similar real news found" |
|
elif "No similar" in newsapi_result and verdict == "π© REAL": |
|
ml_result += " β οΈ ML says REAL but no matching news found" |
|
|
|
return f"{ml_result}\n\nπ Keywords Used: {keywords}\n\n{newsapi_result}" |
|
|
|
except Exception as e: |
|
return f"β Error: {str(e)}" |
|
|
|
|
|
gr.ChatInterface( |
|
fn=fake_news_chatbot, |
|
title="π§ Fake News Detection Chatbot", |
|
description="Ask me if a news item is real or fake. Iβll use an ML model + Google Fact Check + NewsAPI!", |
|
examples=[ |
|
"The Prime Minister announced a new moon mission", |
|
"Aliens landed in New York yesterday", |
|
"COVID vaccine turns you into a lizard" |
|
], |
|
).launch() |
|
|