Spaces:

rkihacker
/

Multimodal-Moderation-Demo

Running

File size: 13,980 Bytes

e0f5861
fc016ef
 
 
 
e0f5861
fc016ef
 
 
 
 
 
6f48975
fc016ef
 
 
 
 
 
 
 
 
e0f5861
fc016ef
e0f5861
00ec3d3
fc016ef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ff7d450
 
 
fc016ef
e0f5861
fc016ef
 
e0f5861
fc016ef
 
 
e0f5861
 
 
 
 
 
 
fc016ef
 
e0f5861
fc016ef
e0f5861
6f48975
 
 
 
fc016ef
ff7d450
fc016ef
ff7d450
 
 
 
 
fc016ef
6f48975
e0f5861
fc016ef
 
 
6f48975
 
 
fc016ef
 
e0f5861
fc016ef
e0f5861
 
 
 
fc016ef
 
6f48975
e0f5861
fc016ef
e0f5861
 
00ec3d3
 
 
e0f5861
 
00ec3d3
e0f5861
 
00ec3d3
e0f5861
fc016ef
e0f5861
 
00ec3d3
e0f5861
fc016ef
00ec3d3
e0f5861
fc016ef
 
e0f5861
fc016ef
 
 
e0f5861
 
fc016ef
 
e0f5861
fc016ef
e0f5861
 
 
 
 
 
 
 
 
 
ff7d450
 
 
 
 
 
e0f5861
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fc016ef
e0f5861
 
 
 
 
 
 
 
 
 
 
 
fc016ef
 
e0f5861
 
 
 
 
 
 
fc016ef
 
e0f5861
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fc016ef
e0f5861
fc016ef

# === Gradio Demo App: gradio_app.py (Backward-Compatible Version) ===
# This script creates a user-friendly web interface to demonstrate the
# multimodal moderation capabilities of the main FastAPI server.
#
# It interacts with the /v3/moderations endpoint.
# NOTE: This version removes the "Copy" button for compatibility with older Gradio versions.
# --------------------------------------------------------------------

import base64
import os
import json
import logging
import time

import gradio as gr
import httpx
from dotenv import load_dotenv

# --- Configuration ---
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
load_dotenv()

API_BASE_URL = os.environ.get("API_BASE_URL", "http://127.0.0.1:8000")
MODERATION_ENDPOINT = f"{API_BASE_URL}/v3/moderations"
# ... (rest of the configuration and helper functions remain the same) ...
# --- Full list of Whisper V3 supported languages ---
# Mapping user-friendly names to ISO 639-1 codes
WHISPER_LANGUAGES = {
    "English": "en", "Chinese": "zh", "German": "de", "Spanish": "es", "Russian": "ru",
    "Korean": "ko", "French": "fr", "Japanese": "ja", "Portuguese": "pt", "Turkish": "tr",
    "Polish": "pl", "Catalan": "ca", "Dutch": "nl", "Arabic": "ar", "Swedish": "sv",
    "Italian": "it", "Indonesian": "id", "Hindi": "hi", "Finnish": "fi", "Vietnamese": "vi",
    "Hebrew": "he", "Ukrainian": "uk", "Greek": "el", "Malay": "ms", "Czech": "cs",
    "Romanian": "ro", "Danish": "da", "Hungarian": "hu", "Tamil": "ta", "Norwegian": "no",
    "Thai": "th", "Urdu": "ur", "Croatian": "hr", "Bulgarian": "bg", "Lithuanian": "lt",
    "Latin": "la", "Maori": "mi", "Malayalam": "ml", "Welsh": "cy", "Slovak": "sk",
    "Telugu": "te", "Persian": "fa", "Latvian": "lv", "Bengali": "bn", "Serbian": "sr",
    "Azerbaijani": "az", "Slovenian": "sl", "Kannada": "kn", "Estonian": "et", "Macedonian": "mk",
    "Breton": "br", "Basque": "eu", "Icelandic": "is", "Armenian": "hy", "Nepali": "ne",
    "Mongolian": "mn", "Bosnian": "bs", "Kazakh": "kk", "Albanian": "sq", "Swahili": "sw",
    "Galician": "gl", "Marathi": "mr", "Punjabi": "pa", "Sinhala": "si", "Khmer": "km",
    "Shona": "sn", "Yoruba": "yo", "Somali": "so", "Afrikaans": "af", "Occitan": "oc",
    "Georgian": "ka", "Belarusian": "be", "Tajik": "tg", "Sindhi": "sd", "Gujarati": "gu",
    "Amharic": "am", "Yiddish": "yi", "Lao": "lo", "Uzbek": "uz", "Faroese": "fo",
    "Haitian Creole": "ht", "Pashto": "ps", "Turkmen": "tk", "Nynorsk": "nn", "Maltese": "mt",
    "Sanskrit": "sa", "Luxembourgish": "lb", "Myanmar (Burmese)": "my", "Tibetan": "bo",
    "Tagalog": "tl", "Malagasy": "mg", "Assamese": "as", "Tatar": "tt", "Hawaiian": "haw",
    "Lingala": "ln", "Hausa": "ha", "Bashkir": "ba", "Javanese": "jw", "Sundanese": "su",
}
SORTED_LANGUAGES = dict(sorted(WHISPER_LANGUAGES.items()))
# Add Auto Language Detection as the first option for the dropdown
LANGUAGES_WITH_AUTO = {"Auto Language Detection": "auto", **SORTED_LANGUAGES}

def file_to_base64(filepath: str) -> str:
    if not filepath: return None
    try:
        with open(filepath, "rb") as f:
            return base64.b64encode(f.read()).decode("utf-8")
    except Exception as e:
        logging.error(f"Failed to convert file {filepath} to base64: {e}")
        return None
def create_status_banner(status_type, text):
    colors = {"safe": ("#DFF2BF", "#4F8A10"),"flagged": ("#FFD2D2", "#D8000C"),"error": ("#FEEFB3", "#9F6000"),"info": ("#BDE5F8", "#00529B"),}
    bg_color, text_color = colors.get(status_type, ("#E0E0E0", "#000000"))
    return f"<div style='background-color:{bg_color}; padding: 1rem; border-radius: 8px; margin-bottom: 1rem; border: 1px solid {text_color};'><h2 style='color:{text_color}; text-align:center; margin:0; font-size: 1.5rem;'>{text}</h2></div>"
def clear_outputs():
    initial_text = "Results will appear here after submission."
    return (create_status_banner("info", "SUBMIT CONTENT FOR MODERATION"),"N/A",initial_text,initial_text,initial_text,None,)
def moderate_content(text_input, image_input, video_input, audio_input, language_full_name):
    if not any([text_input, image_input, video_input, audio_input]):
        return (create_status_banner("error", "🚫 NO INPUT PROVIDED 🚫"),"N/A","Please provide at least one input (text, image, video, or audio) before submitting.","N/A", "N/A", None)
    logging.info("Preparing payload for moderation API...")
    payload = {"model": "nai-moderation-latest"}
    if text_input: payload["input"] = text_input
    if image_b64 := file_to_base64(image_input): payload["image"] = image_b64
    if video_b64 := file_to_base64(video_input): payload["video"] = video_b64
    if audio_b64 := file_to_base64(audio_input):
        payload["voice"] = audio_b64
        language_code = LANGUAGES_WITH_AUTO.get(language_full_name, "auto")
        payload["language"] = language_code
        if language_code == "auto":
            logging.info(f"Audio detected. Using language: Auto Language Detection (auto)")
        else:
            logging.info(f"Audio detected. Using language: {language_full_name} ({language_code})")
            
    logging.info(f"Sending request to {MODERATION_ENDPOINT} with inputs: {list(payload.keys())}")
    latency_ms = None
    start_time = time.monotonic()
    try:
        with httpx.Client(timeout=180.0) as client:
            response = client.post(MODERATION_ENDPOINT, json=payload)
            latency_ms = (time.monotonic() - start_time) * 1000
            logging.info(f"API response received in {latency_ms:.2f} ms with status code {response.status_code}")
            response.raise_for_status()
            data = response.json()
            if not data.get("results"):
                return (create_status_banner("error", "EMPTY API RESPONSE"), f"{latency_ms:.2f} ms", "The API returned an empty result. This can happen if media processing fails (e.g., a video with no valid frames).", "N/A", "N/A", data)
            result = data["results"][0]
            status_text, status_type = ("🚨 FLAGGED 🚨", "flagged") if result["flagged"] else ("✅ SAFE ✅", "safe")
            status_banner = create_status_banner(status_type, status_text)
            reason = result.get("reason") or "No specific reason provided."
            transcribed = result.get("transcribed_text") or "No audio was provided or transcription was not applicable."
            flagged_categories = [cat for cat, flagged in result.get("categories", {}).items() if flagged]
            categories_str = ", ".join(flagged_categories) if flagged_categories else "None"
            logging.info("Successfully parsed moderation response.")
            return (status_banner,f"{latency_ms:.2f} ms",reason,categories_str,transcribed,data)
    except httpx.HTTPStatusError as e:
        latency_str = f"{latency_ms:.2f} ms" if latency_ms is not None else "N/A"
        full_response, error_details = {}, ""
        try:
            error_json = e.response.json()
            detail = error_json.get("detail", "No specific error detail provided.")
            error_details = f"Server responded with error: {detail}"
            full_response = {"error": "Backend API Error", "status_code": e.response.status_code, "details": error_json}
        except (json.JSONDecodeError, AttributeError):
            error_details = f"Could not decode the server's error response:\n{e.response.text}"
            full_response = {"error": "Backend API Error", "status_code": e.response.status_code, "details": e.response.text}
        logging.error(f"HTTP Status Error: {e.response.status_code} - Response: {e.response.text}")
        return (create_status_banner("error", f"🚫 API ERROR (HTTP {e.response.status_code}) 🚫"), latency_str, error_details, "N/A", "N/A", full_response)
    except httpx.RequestError as e:
        latency_ms = (time.monotonic() - start_time) * 1000
        error_msg = f"Could not connect to the API server at `{API_BASE_URL}`. Please ensure the backend server is running and the URL is correctly configured."
        logging.error(f"Request Error: Could not connect to {API_BASE_URL}. Details: {e}")
        return (create_status_banner("error", "🔌 CONNECTION ERROR 🔌"), f"{latency_ms:.0f} ms", error_msg, "N/A", "N/A", {"error": "Connection Error", "url": API_BASE_URL, "details": str(e)})
    except Exception as e:
        logging.error(f"Unexpected Error in Gradio App: {e}", exc_info=True)
        return (create_status_banner("error", "💥 UNEXPECTED APP ERROR 💥"),"N/A",f"An unexpected error occurred within the Gradio application itself: {type(e).__name__}","N/A", "N/A",{"error": "Gradio App Internal Error", "type": type(e).__name__, "details": str(e)})

# --- Gradio Interface ---
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky"), css="footer {display: none !important}") as demo:
    gr.Markdown(
        """
        # 🤖 Multimodal Content Moderation Demo
        This interface demonstrates a powerful, multi-input moderation API. 
        Provide any combination of text, image, video, and audio. The system will analyze all inputs together for a comprehensive result.
        """
    )
    with gr.Row(variant="panel"):
        with gr.Column(scale=2):
            gr.Markdown("### 1. Provide Your Content")
            with gr.Tabs():
                with gr.TabItem("📝 Text"):
                    text_input = gr.Textbox(label="Text Input", lines=8, placeholder="Enter any text here...")
                with gr.TabItem("🖼️ Image"):
                    image_input = gr.Image(label="Image Input", type="filepath")
                with gr.TabItem("🎬 Video"):
                    video_input = gr.Video(label="Video Input")
                with gr.TabItem("🎤 Audio"):
                    audio_input = gr.Audio(label="Voice/Audio Input", type="filepath")
                    language_input = gr.Dropdown(
                        label="Audio Language", 
                        choices=list(LANGUAGES_WITH_AUTO.keys()), 
                        value="Auto Language Detection", 
                        interactive=True
                    )
            with gr.Row():
                clear_button = gr.Button("Clear All")
                submit_button = gr.Button("▶️ Moderate Content", variant="primary")
        with gr.Column(scale=3):
            gr.Markdown("### 2. Moderation Results")
            status_output = gr.Markdown(value=create_status_banner("info", "AWAITING SUBMISSION"))
            with gr.Group():
                with gr.Row():
                    latency_output = gr.Textbox(label="⏱️ API Latency", interactive=False)
                    categories_output = gr.Textbox(label="🏷️ Flagged Categories", interactive=False)
                reason_output = gr.Textbox(label="⚖️ Reason", interactive=False, lines=2)
                # MODIFICATION: The Copy button and its surrounding Row have been removed.
                transcription_output = gr.Textbox(label="🎤 Transcribed Text (from audio)", interactive=False, lines=4)
            with gr.Accordion("Full API Response (JSON)", open=False):
                full_response_output = gr.JSON(label="Raw JSON Response")
    
    demo.load(fn=clear_outputs, inputs=None, outputs=[status_output, latency_output, reason_output, categories_output, transcription_output, full_response_output])
    
    gr.Markdown("---")
    gr.Markdown(
        """
        ### 💡 Quick Examples
        <p style='color: #666; font-size: 0.9rem;'>
        <b>⚠️ Content Warning:</b> The examples below include text that may be offensive or disturbing (e.g., hate speech, violence, sexual content). 
        They are provided solely to demonstrate the capabilities of the moderation model.
        </p>
        """
    )
    gr.Examples(
        examples=[
            ["The sun is shining and the birds are singing. It's a beautiful day for a walk in the park.", None, None, None, "English"],
            ["I'm going to kill the process on my computer because it's using too much memory.", None, None, None, "English"],
            ["If you don't give me what I want, I will hunt you down and hurt you.", None, None, None, "English"],
            ["I can't stand people from that country, they are all lazy and untrustworthy.", None, None, None, "English"],
            ["I feel so hopeless and alone. I don't see the point in going on anymore.", None, None, None, "English"],
            ["Looking for a partner for some wild, no-strings-attached fun tonight. Must be over 18.", None, None, None, "English"],
            ["She looks so young and innocent in that picture, I love it.", None, None, None, "English"],
        ],
        inputs=[text_input, image_input, video_input, audio_input, language_input],
        fn=moderate_content,
        outputs=[status_output, latency_output, reason_output, categories_output, transcription_output, full_response_output],
        cache_examples=False,
    )

    # --- Event Handlers (Backward-Compatible) ---
    all_inputs = [text_input, image_input, video_input, audio_input, language_input]
    all_outputs = [status_output, latency_output, reason_output, categories_output, transcription_output, full_response_output]

    submit_button.click(fn=moderate_content, inputs=all_inputs, outputs=all_outputs)
    clear_button.click(
        fn=lambda: (None, None, None, None, *clear_outputs()), 
        inputs=None, 
        outputs=[text_input, image_input, video_input, audio_input, *all_outputs], 
        queue=False
    )
    # MODIFICATION: The copy_button.click() handler has been removed entirely.

if __name__ == "__main__":
    logging.info(f"Connecting to API server at: {API_BASE_URL}")
    if API_BASE_URL == "http://127.0.0.1:8000":
        logging.warning("API_BASE_URL is set to the default local address. Make sure this is correct or set it in your .env file.")
    demo.launch(server_name="0.0.0.0", server_port=7860)