Spaces:

harshnigamppt
/

deep_fake_news_detection_chatbot_by_harsh

Sleeping

deep_fake_news_detection_chatbot_by_harsh

File size: 4,540 Bytes

631fcb1
 
 
 
 
 
fb11fb0
631fcb1
 
 
 
 
fb11fb0
631fcb1
 
 
fb11fb0
631fcb1
 
fb11fb0
 
 
 
631fcb1
 
 
fb11fb0
631fcb1
 
fb11fb0
 
 
 
631fcb1
 
 
 
 
 
 
 
 
 
 
 
 
 
fb11fb0
631fcb1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fb11fb0
631fcb1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fb11fb0
631fcb1

import requests
from collections import Counter
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
import re
import gradio as gr

# Load model and tokenizer
model_name = "hamzab/roberta-fake-news-classification"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)

# API Keys (consider moving these to environment variables in production)
GOOGLE_API_KEY = "AIzaSyCKnDlhWih34GdCuheNusnrEw_YE_q6GWQ"
NEWSAPI_KEY = "fcb304918fce4fb29b17b6c95dbc7518"

# Keyword extractor
def extract_keywords(text, max_words=5):
    words = re.findall(r'\b\w+\b', text.lower())
    stopwords = {
        "the", "and", "is", "in", "to", "of", "a", "on", "for", "with", 
        "as", "by", "at", "an", "be", "are", "from", "this", "it", "that"
    }
    filtered = [w for w in words if w not in stopwords and len(w) > 2]
    return " ".join([w for w, _ in Counter(filtered).most_common(max_words)])

# Google Fact Check
def check_google_fact_check(query):
    try:
        response = requests.get(
            "https://factchecktools.googleapis.com/v1alpha1/claims:search",
            params={"query": query, "key": GOOGLE_API_KEY}
        ).json()
        if "claims" in response and response["claims"]:
            result = ""
            for claim in response["claims"][:3]:
                text = claim.get("text", "")
                review = claim.get("claimReview", [{}])[0]
                rating = review.get("textualRating", "Unrated")
                source = review.get("publisher", {}).get("name", "Unknown")
                url = review.get("url", "")
                result += f"✅ Claim: {text}\n📊 Verdict: {rating}\n📰 Source: {source}\n🔗 {url}\n\n"
            return result.strip()
        return None
    except Exception as e:
        return f"❌ Google API error: {e}"

# NewsAPI fallback
def search_newsapi(query):
    try:
        response = requests.get("https://newsapi.org/v2/everything", params={
            "q": query, "apiKey": NEWSAPI_KEY, "language": "en", "sortBy": "relevancy", "pageSize": 3
        }).json()
        if response.get("status") != "ok":
            return f"❌ NewsAPI error: {response.get('message')}"
        articles = response.get("articles", [])
        if not articles:
            return "ℹ️ No similar real news articles found."
        output = "📰 No official fact-check found.\n\nBut here are similar real news articles:\n\n"
        for article in articles:
            title = article.get("title", "No title")
            source = article.get("source", {}).get("name", "Unknown")
            url = article.get("url", "#")
            output += f"• 📰 Source: {source}\n  📌 Title: {title}\n  🔗 {url}\n\n"
        return output.strip()
    except Exception as e:
        return f"❌ NewsAPI error: {e}"

# Chatbot function
def fake_news_chatbot(message, history):
    try:
        prediction = classifier(message)[0]
        label = prediction['label']
        confidence = round(prediction['score'], 2)

        if confidence < 0.7:
            verdict = "🟡 UNSURE"
        elif label == "FAKE":
            verdict = "🟥 FAKE"
        else:
            verdict = "🟩 REAL"

        ml_result = f"📊 ML Prediction: {verdict}\n🔢 Confidence: {confidence}"

        keywords = extract_keywords(message)
        fact_result = check_google_fact_check(keywords)
        if fact_result:
            return f"{ml_result}\n\n🔍 Keywords Used: {keywords}\n\n{fact_result}"

        newsapi_result = search_newsapi(keywords)
        if "📰 Source:" in newsapi_result and verdict == "🟥 FAKE":
            ml_result += " ⚠️ ML says FAKE but similar real news found"
        elif "No similar" in newsapi_result and verdict == "🟩 REAL":
            ml_result += " ⚠️ ML says REAL but no matching news found"

        return f"{ml_result}\n\n🔍 Keywords Used: {keywords}\n\n{newsapi_result}"

    except Exception as e:
        return f"❌ Error: {str(e)}"

# Gradio interface
gr.ChatInterface(
    fn=fake_news_chatbot,
    title="🧠 Fake News Detection Chatbot",
    description="Ask me if a news item is real or fake. I’ll use an ML model + Google Fact Check + NewsAPI!",
    examples=[
        "The Prime Minister announced a new moon mission",
        "Aliens landed in New York yesterday",
        "COVID vaccine turns you into a lizard"
    ],
).launch()