import requests from collections import Counter from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification import re import gradio as gr # Load model and tokenizer model_name = "hamzab/roberta-fake-news-classification" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSequenceClassification.from_pretrained(model_name) classifier = pipeline("text-classification", model=model, tokenizer=tokenizer) # API Keys (consider moving these to environment variables in production) GOOGLE_API_KEY = "AIzaSyCKnDlhWih34GdCuheNusnrEw_YE_q6GWQ" NEWSAPI_KEY = "fcb304918fce4fb29b17b6c95dbc7518" # Keyword extractor def extract_keywords(text, max_words=5): words = re.findall(r'\b\w+\b', text.lower()) stopwords = { "the", "and", "is", "in", "to", "of", "a", "on", "for", "with", "as", "by", "at", "an", "be", "are", "from", "this", "it", "that" } filtered = [w for w in words if w not in stopwords and len(w) > 2] return " ".join([w for w, _ in Counter(filtered).most_common(max_words)]) # Google Fact Check def check_google_fact_check(query): try: response = requests.get( "https://factchecktools.googleapis.com/v1alpha1/claims:search", params={"query": query, "key": GOOGLE_API_KEY} ).json() if "claims" in response and response["claims"]: result = "" for claim in response["claims"][:3]: text = claim.get("text", "") review = claim.get("claimReview", [{}])[0] rating = review.get("textualRating", "Unrated") source = review.get("publisher", {}).get("name", "Unknown") url = review.get("url", "") result += f"✅ Claim: {text}\n📊 Verdict: {rating}\n📰 Source: {source}\n🔗 {url}\n\n" return result.strip() return None except Exception as e: return f"❌ Google API error: {e}" # NewsAPI fallback def search_newsapi(query): try: response = requests.get("https://newsapi.org/v2/everything", params={ "q": query, "apiKey": NEWSAPI_KEY, "language": "en", "sortBy": "relevancy", "pageSize": 3 }).json() if response.get("status") != "ok": return f"❌ NewsAPI error: {response.get('message')}" articles = response.get("articles", []) if not articles: return "ℹ️ No similar real news articles found." output = "📰 No official fact-check found.\n\nBut here are similar real news articles:\n\n" for article in articles: title = article.get("title", "No title") source = article.get("source", {}).get("name", "Unknown") url = article.get("url", "#") output += f"• 📰 Source: {source}\n 📌 Title: {title}\n 🔗 {url}\n\n" return output.strip() except Exception as e: return f"❌ NewsAPI error: {e}" # Chatbot function def fake_news_chatbot(message, history): try: prediction = classifier(message)[0] label = prediction['label'] confidence = round(prediction['score'], 2) if confidence < 0.7: verdict = "🟡 UNSURE" elif label == "FAKE": verdict = "🟥 FAKE" else: verdict = "🟩 REAL" ml_result = f"📊 ML Prediction: {verdict}\n🔢 Confidence: {confidence}" keywords = extract_keywords(message) fact_result = check_google_fact_check(keywords) if fact_result: return f"{ml_result}\n\n🔍 Keywords Used: {keywords}\n\n{fact_result}" newsapi_result = search_newsapi(keywords) if "📰 Source:" in newsapi_result and verdict == "🟥 FAKE": ml_result += " ⚠️ ML says FAKE but similar real news found" elif "No similar" in newsapi_result and verdict == "🟩 REAL": ml_result += " ⚠️ ML says REAL but no matching news found" return f"{ml_result}\n\n🔍 Keywords Used: {keywords}\n\n{newsapi_result}" except Exception as e: return f"❌ Error: {str(e)}" # Gradio interface gr.ChatInterface( fn=fake_news_chatbot, title="🧠 Fake News Detection Chatbot", description="Ask me if a news item is real or fake. I’ll use an ML model + Google Fact Check + NewsAPI!", examples=[ "The Prime Minister announced a new moon mission", "Aliens landed in New York yesterday", "COVID vaccine turns you into a lizard" ], ).launch()