Spaces:

taha092
/

HumanizerV2

Running

File size: 10,207 Bytes

import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from transformers.pipelines import pipeline
from sentence_transformers import SentenceTransformer, util
import numpy as np
import gradio.themes as grthemes
import random
import re

# ----------------------
# Paraphrasing Model Setup (Pegasus + T5)
# ----------------------
PEGASUS_MODEL_NAME = "tuner007/pegasus_paraphrase"
T5_MODEL_NAME = "Vamsi/T5_Paraphrase_Paws"
pegasus_tokenizer = AutoTokenizer.from_pretrained(PEGASUS_MODEL_NAME)
pegasus_model = AutoModelForSeq2SeqLM.from_pretrained(PEGASUS_MODEL_NAME)
t5_tokenizer = AutoTokenizer.from_pretrained(T5_MODEL_NAME)
t5_model = AutoModelForSeq2SeqLM.from_pretrained(T5_MODEL_NAME)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
pegasus_model = pegasus_model.to(device)
t5_model = t5_model.to(device)

# ----------------------
# Semantic Similarity Model
# ----------------------
similarity_model = SentenceTransformer('all-MiniLM-L6-v2')

# ----------------------
# Local AI Detector (roberta-base-openai-detector)
# ----------------------
AI_DETECTOR_MODEL = "roberta-base-openai-detector"
ai_detector = pipeline("text-classification", model=AI_DETECTOR_MODEL, device=0 if torch.cuda.is_available() else -1)

# ----------------------
# Prompt Variations for Humanization
# ----------------------
PEGASUS_PROMPTS = [
    "Paraphrase this naturally:",
    "Rewrite as if explaining to a friend:",
    "Make this sound like a real conversation:",
    "Express this in a casual, human way:",
    "Reword this with natural flow:",
    "Make this sound less robotic:",
    "Rewrite in a friendly, informal tone:",
    "Paraphrase in a way a student would say it:",
]
T5_PROMPTS = [
    "Paraphrase the following text in a formal, academic tone:",
    "Paraphrase the following text in a casual, conversational tone:",
    "Paraphrase the following text in a friendly, approachable tone:",
    "Paraphrase the following text to bypass AI detectors and sound as human as possible:",
]

# ----------------------
# Sentence Splitter
# ----------------------
def split_sentences(text):
    sentences = re.split(r'(?<=[.!?])\s+', text.strip())
    return [s for s in sentences if s]

# ----------------------
# Aggressive Post-Processing
# ----------------------
def postprocess_text(text):
    contractions = {
        "do not": "don't", "cannot": "can't", "will not": "won't", "I am": "I'm",
        "is not": "isn't", "are not": "aren't", "did not": "didn't", "it is": "it's",
        "does not": "doesn't", "have not": "haven't", "has not": "hasn't"
    }
    for k, v in contractions.items():
        text = re.sub(rf'\b{k}\b', v, text, flags=re.IGNORECASE)
    idioms = [
        "at the end of the day", "to be honest", "as a matter of fact", "for what it's worth",
        "in a nutshell", "the bottom line is", "all things considered"
    ]
    transitions = [
        "Interestingly,", "In fact,", "To be clear,", "As a result,", "For example,", "On the other hand,", "In other words,"
    ]
    if random.random() < 0.3:
        text += " " + random.choice(idioms) + "."
    if random.random() < 0.3:
        text = random.choice(transitions) + " " + text
    # Randomly lower-case a word to mimic human error
    if random.random() < 0.2:
        words = text.split()
        if len(words) > 3:
            idx = random.randint(1, len(words)-2)
            words[idx] = words[idx].lower()
            text = ' '.join(words)
    return text

# ----------------------
# Multi-Model, Multi-Pass Paraphrasing
# ----------------------
def pegasus_paraphrase(sentence):
    prompt = random.choice(PEGASUS_PROMPTS)
    full_prompt = f"{prompt} {sentence}"
    batch = pegasus_tokenizer([full_prompt], truncation=True, padding='longest', max_length=60, return_tensors="pt").to(device)
    outputs = pegasus_model.generate(
        **batch,
        max_length=60,
        num_beams=5,
        num_return_sequences=1,
        temperature=1.0
    )
    tgt_text = pegasus_tokenizer.batch_decode(outputs, skip_special_tokens=True)
    return tgt_text[0] if tgt_text else sentence

def t5_paraphrase(sentence):
    prompt = random.choice(T5_PROMPTS) + " " + sentence
    input_ids = t5_tokenizer.encode(prompt, return_tensors="pt", max_length=256, truncation=True).to(device)
    outputs = t5_model.generate(
        input_ids,
        do_sample=True,
        top_k=120,
        top_p=0.95,
        temperature=0.7,
        repetition_penalty=1.2,
        max_length=256,
        num_return_sequences=1
    )
    paraphrased = t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
    return paraphrased

# ----------------------
# Feedback Loop with AI Detector
# ----------------------
def check_ai_score(text):
    try:
        result = ai_detector(text)
        for r in result:
            if r['label'] in ['LABEL_1', 'Fake']:
                return r['score'], None
            elif r['label'] in ['LABEL_0', 'Real']:
                return 1.0 - r['score'], None
        return 0.5, None
    except Exception as e:
        return None, f"AI detection error: {str(e)}"

# ----------------------
# Main Humanizer Pipeline
# ----------------------
def humanize_pipeline(text, tone, max_feedback_loops=2):
    sentences = split_sentences(text)
    paraphrased = []
    for sent in sentences:
        # First pass: Pegasus
        peg = pegasus_paraphrase(sent)
        # Second pass: T5
        t5 = t5_paraphrase(peg)
        paraphrased.append(t5)
    joined = ' '.join(paraphrased)
    processed = postprocess_text(joined)
    # Feedback loop: if still flagged as AI, re-paraphrase flagged sentences
    for _ in range(max_feedback_loops):
        ai_prob, _ = check_ai_score(processed)
        if ai_prob is not None and ai_prob < 0.5:
            break  # Considered human
        # Re-paraphrase all sentences again
        sentences = split_sentences(processed)
        paraphrased = []
        for sent in sentences:
            peg = pegasus_paraphrase(sent)
            t5 = t5_paraphrase(peg)
            paraphrased.append(t5)
        joined = ' '.join(paraphrased)
        processed = postprocess_text(joined)
    return processed

# ----------------------
# Semantic Similarity Function
# ----------------------
def semantic_similarity(text1, text2):
    emb1 = similarity_model.encode(text1, convert_to_tensor=True)
    emb2 = similarity_model.encode(text2, convert_to_tensor=True)
    sim = util.pytorch_cos_sim(emb1, emb2).item()
    return sim

# ----------------------
# Humanization Score & Rating
# ----------------------
def humanization_score(sim, ai_prob):
    score = (1.0 - sim) * 0.5 + (1.0 - ai_prob) * 0.5
    return score

def humanization_rating(score):
    if score < 0.7:
        return f"⚠️ Still AI-like ({score:.2f})"
    elif score < 0.85:
        return f"👍 Acceptable ({score:.2f})"
    else:
        return f"✅ Highly Humanized ({score:.2f})"

# ----------------------
# Main Processing Function
# ----------------------
def process(text, tone):
    if not text.strip():
        return "", "", 0.0, "", 0.0, ""
    pre_ai_prob, pre_err = check_ai_score(text)
    if pre_ai_prob is None:
        return "", f"AI Detection Error: {pre_err}", 0.0, "", 0.0, ""
    try:
        # Generate 3 versions for user choice
        outputs = [humanize_pipeline(text, tone) for _ in range(3)]
    except Exception as e:
        return f"[Paraphrasing error: {str(e)}]", "", 0.0, "", 0.0, ""
    # Pick the most human-like version (lowest ai_prob)
    best = None
    best_score = -1
    best_ai_prob = 1.0
    for out in outputs:
        post_ai_prob, _ = check_ai_score(out)
        sim = semantic_similarity(text, out)
        score = humanization_score(sim, post_ai_prob if post_ai_prob is not None else 1.0)
        if post_ai_prob is not None and post_ai_prob < best_ai_prob:
            best = out
            best_score = score
            best_ai_prob = post_ai_prob
    if best is None:
        best = outputs[0]
        best_score = 0.0
        best_ai_prob = 1.0
    sim = semantic_similarity(text, best)
    rating = humanization_rating(best_score)
    ai_score_str = f"Pre: {100*(1-pre_ai_prob):.1f}% human | Post: {100*(1-best_ai_prob):.1f}% human"
    return (
        best,
        ai_score_str,
        sim,
        rating,
        best_score * 100,
        ""
    )

# ----------------------
# Gradio UI
# ----------------------
custom_theme = grthemes.Base(
    primary_hue="blue",
    secondary_hue="blue",
    neutral_hue="slate"
)

with gr.Blocks(theme=custom_theme, title="AI Humanizer - Made by Taha") as demo:
    gr.Markdown("""
    # 🧠 AI Humanizer
    <div style='display:flex;justify-content:space-between;align-items:center;'>
        <span style='font-size:1.2em;color:#7bb1ff;'>Rewrite AI text to sound 100% human</span>
        <span style='font-weight:bold;color:#7bb1ff;'>Made by Taha</span>
    </div>
    """, elem_id="header")
    with gr.Row():
        with gr.Column():
            text_in = gr.Textbox(label="Paste AI-generated text here", lines=8, placeholder="Paste your text...", elem_id="input-box")
            tone = gr.Dropdown(["Academic", "Casual", "Friendly", "Stealth"], value="Stealth", label="Tone Selector")
            btn = gr.Button("Humanize", elem_id="humanize-btn")
        with gr.Column():
            text_out = gr.Textbox(label="Humanized Output", lines=8, interactive=False, elem_id="output-box")
            ai_scores = gr.Markdown("", elem_id="ai-scores")
            sim_score = gr.Number(label="Similarity (0=very different, 1=very similar)", interactive=False)
            rating = gr.Markdown("", elem_id="rating")
            human_score = gr.Number(label="Humanization Score (%)", interactive=False)
    btn.click(
        process,
        inputs=[text_in, tone],
        outputs=[text_out, ai_scores, sim_score, rating, human_score, gr.Textbox(visible=False)],
        api_name="humanize"
    )
    gr.Markdown("""
    <div style='text-align:center;color:#7bb1ff;margin-top:2em;'>
        <b>Made by Taha</b> | Free for unlimited use | Optimized for students and creators
    </div>
    """, elem_id="footer")

demo.launch()