Spaces:
Sleeping
Sleeping
import gradio as gr | |
import re | |
import html | |
from transformers import pipeline | |
import numpy as np | |
# Initialize pipelines | |
ocr = pipeline("image-to-text", model="microsoft/trocr-base-printed") # Consider upgrading to trocr-large-printed | |
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli") # Consider fine-tuning for phishing | |
# Common phishing indicators | |
SUSPICIOUS_PHRASES = [ | |
"urgent", "immediately", "password", "account locked", "wire transfer", | |
"bank verification", "click here", "verification code", "credit card", | |
"suspended", "login now", "reset your password", "act now", "unusual activity", | |
"security alert", "confirm your identity", "gift card", "lottery winner" | |
] | |
# URL detection regex | |
URL_PATTERN = r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+' | |
def extract_text_from_image(image): | |
if image is None: | |
return "" | |
try: | |
result = ocr(image) | |
return result[0]["generated_text"] if result else "" | |
except Exception as e: | |
return f"Error processing image: {str(e)}" | |
def analyze_text(text): | |
if not text.strip(): | |
return "", "", gr.update(visible=False) | |
# Zero-shot Classification | |
candidate_labels = ["Phishing Email", "Legitimate Email"] | |
result = classifier(text, candidate_labels=candidate_labels) | |
label = result["labels"][0] | |
confidence = result["scores"][0] | |
# Determine risk level and styling | |
if label == "Phishing Email": | |
if confidence > 0.8: | |
alert_html = """ | |
<div style="padding: 20px; background: linear-gradient(to right, #ffebee, #ffcdd2); | |
border-radius: 12px; box-shadow: 0 2px 4px rgba(0,0,0,0.1); margin-bottom: 20px;"> | |
<div style="display: flex; align-items: center; gap: 12px;"> | |
<span style="font-size: 24px;">⚠️</span> | |
<h3 style="color: #c62828; margin: 0; font-size: 18px;">High Risk Detected - Likely Phishing Attempt</h3> | |
</div> | |
</div> | |
""" | |
else: | |
alert_html = """ | |
<div style="padding: 20px; background: linear-gradient(to right, #fff3e0, #ffe0b2); | |
border-radius: 12px; box-shadow: 0 2px 4px rgba(0,0,0,0.1); margin-bottom: 20px;"> | |
<div style="display: flex; align-items: center; gap: 12px;"> | |
<span style="font-size: 24px;">⚡</span> | |
<h3 style="color: #ef6c00; margin: 0; font-size: 18px;">Medium Risk - Suspicious Content Detected</h3> | |
</div> | |
</div> | |
""" | |
else: | |
alert_html = """ | |
<div style="padding: 20px; background: linear-gradient(to right, #e8f5e9, #c8e6c9); | |
border-radius: 12px; box-shadow: 0 2px 4px rgba(0,0,0,0.1); margin-bottom: 20px;"> | |
<div style="display: flex; align-items: center; gap: 12px;"> | |
<span style="font-size: 24px;">✅</span> | |
<h3 style="color: #2e7d32; margin: 0; font-size: 18px;">Low Risk - Likely Legitimate</h3> | |
</div> | |
</div> | |
""" | |
# Find suspicious phrases and URLs | |
found_phrases = [] | |
text_lower = text.lower() | |
for phrase in SUSPICIOUS_PHRASES: | |
if phrase in text_lower: | |
found_phrases.append(phrase) | |
urls = re.findall(URL_PATTERN, text) | |
# Generate detailed analysis report with modern styling | |
report = [ | |
"<div style='background: white; padding: 24px; border-radius: 12px; box-shadow: 0 2px 8px rgba(0,0,0,0.05);'>", | |
"<h3 style='color: #1a237e; margin-top: 0;'>Analysis Details</h3>", | |
f"<div style='display: flex; gap: 20px; margin-bottom: 20px;'>", | |
f"<div style='flex: 1; padding: 16px; background: #f5f5f5; border-radius: 8px;'>", | |
f"<strong>Confidence Score:</strong> {confidence:.1%}", | |
"</div>", | |
f"<div style='flex: 1; padding: 16px; background: #f5f5f5; border-radius: 8px;'>", | |
f"<strong>Classification:</strong> {label}", | |
"</div>", | |
"</div>" | |
] | |
if found_phrases or urls: | |
report.append("<div style='margin-top: 20px;'>") | |
if found_phrases: | |
report.extend([ | |
"<h4 style='color: #d32f2f;'>🚩 Suspicious Elements Detected:</h4>", | |
"<ul style='list-style-type: none; padding-left: 0;'>" | |
]) | |
for phrase in found_phrases: | |
report.append(f"<li style='margin-bottom: 8px; padding: 8px 12px; background: #ffebee; border-radius: 6px;'>Found: '{html.escape(phrase)}'</li>") | |
report.append("</ul>") | |
if urls: | |
report.extend([ | |
"<h4 style='color: #d32f2f;'>🔗 Detected URLs:</h4>", | |
"<ul style='list-style-type: none; padding-left: 0;'>" | |
]) | |
for url in urls: | |
report.append(f"<li style='margin-bottom: 8px; padding: 8px 12px; background: #e3f2fd; border-radius: 6px; word-break: break-all;'>{html.escape(url)}</li>") | |
report.append("</ul>") | |
report.append("</div>") | |
else: | |
report.append("<p style='color: #2e7d32;'>✅ No common suspicious phrases or URLs detected.</p>") | |
report.append("<div style='margin-top: 20px; padding: 16px; background: #e3f2fd; border-radius: 8px;'>") | |
if confidence > 0.9: | |
report.append("<p style='margin: 0;'><strong>🔴 High confidence in classification - exercise extreme caution!</strong></p>") | |
elif confidence > 0.7: | |
report.append("<p style='margin: 0;'><strong>🟡 Moderate confidence - review carefully and verify sender.</strong></p>") | |
else: | |
report.append("<p style='margin: 0;'><strong>🟢 Low confidence - but always remain vigilant.</strong></p>") | |
report.append("</div></div>") | |
return alert_html, "\n".join(report), gr.update(visible=True) | |
def process_input(text_input, image_input): | |
if text_input.strip(): | |
return analyze_text(text_input) | |
if image_input is not None: | |
extracted_text = extract_text_from_image(image_input) | |
if extracted_text.strip(): | |
return analyze_text(extracted_text) | |
return ( | |
"""<div style="padding: 20px; background: #fff3e0; border-radius: 12px; margin-bottom: 20px;"> | |
<h3 style="color: #ef6c00; margin: 0;">⚠️ OCR Processing Error</h3> | |
</div>""", | |
"Could not extract text from image. Please ensure the image contains clear, readable text.", | |
gr.update(visible=False) | |
) | |
return "", "Please provide either text or an image to analyze.", gr.update(visible=False) | |
# Custom theme | |
custom_theme = gr.themes.Soft().set( | |
body_background_fill="#f8f9fa", | |
block_background_fill="white", | |
block_label_background_fill="*background-3", | |
input_background_fill="white", | |
button_primary_background_fill="#1a237e", | |
button_primary_text_color="white", | |
button_secondary_background_fill="#e0e0e0", | |
button_secondary_text_color="#333", | |
) | |
# CSS for responsiveness and polish | |
custom_css = """ | |
.container { max-width: 1000px; margin: auto; } | |
.header { text-align: center; margin-bottom: 2rem; } | |
.tool-description { max-width: 800px; margin: 0 auto 2rem auto; } | |
.input-section { margin-bottom: 2rem; } | |
.analysis-section { margin-top: 2rem; } | |
@media (max-width: 600px) { | |
.header h1 { font-size: 1.8rem; } | |
.input-section { padding: 0 10px; } | |
.gr-button { width: 100%; margin-bottom: 10px; } | |
} | |
""" | |
# Create Gradio interface | |
with gr.Blocks(theme=custom_theme, css=custom_css) as demo: | |
gr.HTML(""" | |
<div class="header"> | |
<h1 style="color: #1a237e; font-size: 2.5rem; margin-bottom: 1rem;">🛡️ AI Phishing Guard</h1> | |
<p style="color: #555; font-size: 1.2rem;">Protect yourself from phishing attempts with AI-powered analysis</p> | |
</div> | |
""") | |
with gr.Row(equal_height=True): | |
with gr.Column(): | |
gr.HTML(""" | |
<div class="tool-description"> | |
<h3 style="color: #1a237e;">How to Use</h3> | |
<ol style="color: #555; line-height: 1.6;"> | |
<li>Paste message text or upload a screenshot</li> | |
<li>Analyze instantly as you type or click 'Analyze' for images</li> | |
<li>Review the detailed results and stay cautious</li> | |
</ol> | |
<div style="background: #e3f2fd; padding: 16px; border-radius: 8px; margin-top: 1rem;"> | |
<h4 style="color: #1a237e; margin-top: 0;">This tool detects:</h4> | |
<ul style="color: #555; margin-bottom: 0;"> | |
<li>Suspicious language patterns</li> | |
<li>Common phishing phrases</li> | |
<li>URLs and potential threats</li> | |
<li>Urgency indicators</li> | |
</ul> | |
</div> | |
</div> | |
""") | |
with gr.Tabs(): | |
with gr.TabItem("✏️ Text Input"): | |
text_input = gr.Textbox( | |
lines=8, | |
label="Message Text", | |
placeholder="Paste email or message content here...", | |
elem_id="text_input" | |
) | |
with gr.TabItem("📷 Screenshot Upload"): | |
image_input = gr.Image( | |
label="Upload Screenshot", | |
type="pil", | |
elem_id="image_input" | |
) | |
with gr.Row(): | |
analyze_button = gr.Button("🔍 Analyze", variant="primary", size="lg") | |
clear_button = gr.Button("🗑️ Clear", variant="secondary", size="lg") | |
loading = gr.HTML("<div style='text-align: center; padding: 10px;'>Analyzing... <span>⏳</span></div>", visible=False) | |
with gr.Column(visible=False) as output_col: | |
alert_html = gr.HTML() | |
analysis = gr.HTML() | |
# Examples | |
examples = [ | |
["Subject: URGENT - Account Security Alert\n\nDear User,\n\nWe detected unusual activity in your account. Click here immediately to verify your identity and reset your password at https://fakebank.com/reset. If you don't respond within 24 hours, your account will be suspended.\n\nBank Security Team", None], | |
["Subject: Team Meeting Tomorrow\n\nHi everyone,\n\nJust a reminder that we have our weekly team meeting tomorrow at 10 AM in the main conference room. Please bring your project updates.\n\nBest regards,\nSarah", None], | |
] | |
gr.Examples( | |
examples=examples, | |
inputs=[text_input, image_input], | |
outputs=[alert_html, analysis, output_col], | |
fn=process_input, | |
cache_examples=True | |
) | |
# Event handlers | |
def with_loading(*args): | |
return (gr.update(visible=True), *process_input(*args), gr.update(visible=False)) | |
text_input.change( | |
fn=process_input, | |
inputs=[text_input, image_input], | |
outputs=[alert_html, analysis, output_col] | |
) | |
analyze_button.click( | |
fn=with_loading, | |
inputs=[text_input, image_input], | |
outputs=[loading, alert_html, analysis, output_col] | |
) | |
clear_button.click( | |
fn=lambda: ("", None, "", "", gr.update(visible=False)), | |
inputs=[], | |
outputs=[text_input, image_input, alert_html, analysis, output_col] | |
) | |
# Launch the app | |
if __name__ == "__main__": | |
demo.launch() |