import gradio as gr import torch from transformers import AutoTokenizer, AutoModelForSeq2SeqLM from transformers.pipelines import pipeline from sentence_transformers import SentenceTransformer, util import numpy as np import gradio.themes as grthemes import random import re # ---------------------- # Paraphrasing Model Setup (Pegasus + T5) # ---------------------- PEGASUS_MODEL_NAME = "tuner007/pegasus_paraphrase" T5_MODEL_NAME = "Vamsi/T5_Paraphrase_Paws" pegasus_tokenizer = AutoTokenizer.from_pretrained(PEGASUS_MODEL_NAME) pegasus_model = AutoModelForSeq2SeqLM.from_pretrained(PEGASUS_MODEL_NAME) t5_tokenizer = AutoTokenizer.from_pretrained(T5_MODEL_NAME) t5_model = AutoModelForSeq2SeqLM.from_pretrained(T5_MODEL_NAME) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") pegasus_model = pegasus_model.to(device) t5_model = t5_model.to(device) # ---------------------- # Semantic Similarity Model # ---------------------- similarity_model = SentenceTransformer('all-MiniLM-L6-v2') # ---------------------- # Local AI Detector (roberta-base-openai-detector) # ---------------------- AI_DETECTOR_MODEL = "roberta-base-openai-detector" ai_detector = pipeline("text-classification", model=AI_DETECTOR_MODEL, device=0 if torch.cuda.is_available() else -1) # ---------------------- # Prompt Variations for Humanization # ---------------------- PEGASUS_PROMPTS = [ "Paraphrase this naturally:", "Rewrite as if explaining to a friend:", "Make this sound like a real conversation:", "Express this in a casual, human way:", "Reword this with natural flow:", "Make this sound less robotic:", "Rewrite in a friendly, informal tone:", "Paraphrase in a way a student would say it:", ] T5_PROMPTS = [ "Paraphrase the following text in a formal, academic tone:", "Paraphrase the following text in a casual, conversational tone:", "Paraphrase the following text in a friendly, approachable tone:", "Paraphrase the following text to bypass AI detectors and sound as human as possible:", ] # ---------------------- # Sentence Splitter # ---------------------- def split_sentences(text): sentences = re.split(r'(?<=[.!?])\s+', text.strip()) return [s for s in sentences if s] # ---------------------- # Aggressive Post-Processing # ---------------------- def postprocess_text(text): contractions = { "do not": "don't", "cannot": "can't", "will not": "won't", "I am": "I'm", "is not": "isn't", "are not": "aren't", "did not": "didn't", "it is": "it's", "does not": "doesn't", "have not": "haven't", "has not": "hasn't" } for k, v in contractions.items(): text = re.sub(rf'\b{k}\b', v, text, flags=re.IGNORECASE) idioms = [ "at the end of the day", "to be honest", "as a matter of fact", "for what it's worth", "in a nutshell", "the bottom line is", "all things considered" ] transitions = [ "Interestingly,", "In fact,", "To be clear,", "As a result,", "For example,", "On the other hand,", "In other words," ] if random.random() < 0.3: text += " " + random.choice(idioms) + "." if random.random() < 0.3: text = random.choice(transitions) + " " + text # Randomly lower-case a word to mimic human error if random.random() < 0.2: words = text.split() if len(words) > 3: idx = random.randint(1, len(words)-2) words[idx] = words[idx].lower() text = ' '.join(words) return text # ---------------------- # Multi-Model, Multi-Pass Paraphrasing # ---------------------- def pegasus_paraphrase(sentence): prompt = random.choice(PEGASUS_PROMPTS) full_prompt = f"{prompt} {sentence}" batch = pegasus_tokenizer([full_prompt], truncation=True, padding='longest', max_length=60, return_tensors="pt").to(device) outputs = pegasus_model.generate( **batch, max_length=60, num_beams=5, num_return_sequences=1, temperature=1.0 ) tgt_text = pegasus_tokenizer.batch_decode(outputs, skip_special_tokens=True) return tgt_text[0] if tgt_text else sentence def t5_paraphrase(sentence): prompt = random.choice(T5_PROMPTS) + " " + sentence input_ids = t5_tokenizer.encode(prompt, return_tensors="pt", max_length=256, truncation=True).to(device) outputs = t5_model.generate( input_ids, do_sample=True, top_k=120, top_p=0.95, temperature=0.7, repetition_penalty=1.2, max_length=256, num_return_sequences=1 ) paraphrased = t5_tokenizer.decode(outputs[0], skip_special_tokens=True) return paraphrased # ---------------------- # Feedback Loop with AI Detector # ---------------------- def check_ai_score(text): try: result = ai_detector(text) for r in result: if r['label'] in ['LABEL_1', 'Fake']: return r['score'], None elif r['label'] in ['LABEL_0', 'Real']: return 1.0 - r['score'], None return 0.5, None except Exception as e: return None, f"AI detection error: {str(e)}" # ---------------------- # Main Humanizer Pipeline # ---------------------- def humanize_pipeline(text, tone, max_feedback_loops=2): sentences = split_sentences(text) paraphrased = [] for sent in sentences: # First pass: Pegasus peg = pegasus_paraphrase(sent) # Second pass: T5 t5 = t5_paraphrase(peg) paraphrased.append(t5) joined = ' '.join(paraphrased) processed = postprocess_text(joined) # Feedback loop: if still flagged as AI, re-paraphrase flagged sentences for _ in range(max_feedback_loops): ai_prob, _ = check_ai_score(processed) if ai_prob is not None and ai_prob < 0.5: break # Considered human # Re-paraphrase all sentences again sentences = split_sentences(processed) paraphrased = [] for sent in sentences: peg = pegasus_paraphrase(sent) t5 = t5_paraphrase(peg) paraphrased.append(t5) joined = ' '.join(paraphrased) processed = postprocess_text(joined) return processed # ---------------------- # Semantic Similarity Function # ---------------------- def semantic_similarity(text1, text2): emb1 = similarity_model.encode(text1, convert_to_tensor=True) emb2 = similarity_model.encode(text2, convert_to_tensor=True) sim = util.pytorch_cos_sim(emb1, emb2).item() return sim # ---------------------- # Humanization Score & Rating # ---------------------- def humanization_score(sim, ai_prob): score = (1.0 - sim) * 0.5 + (1.0 - ai_prob) * 0.5 return score def humanization_rating(score): if score < 0.7: return f"⚠️ Still AI-like ({score:.2f})" elif score < 0.85: return f"👍 Acceptable ({score:.2f})" else: return f"✅ Highly Humanized ({score:.2f})" # ---------------------- # Main Processing Function # ---------------------- def process(text, tone): if not text.strip(): return "", "", 0.0, "", 0.0, "" pre_ai_prob, pre_err = check_ai_score(text) if pre_ai_prob is None: return "", f"AI Detection Error: {pre_err}", 0.0, "", 0.0, "" try: # Generate 3 versions for user choice outputs = [humanize_pipeline(text, tone) for _ in range(3)] except Exception as e: return f"[Paraphrasing error: {str(e)}]", "", 0.0, "", 0.0, "" # Pick the most human-like version (lowest ai_prob) best = None best_score = -1 best_ai_prob = 1.0 for out in outputs: post_ai_prob, _ = check_ai_score(out) sim = semantic_similarity(text, out) score = humanization_score(sim, post_ai_prob if post_ai_prob is not None else 1.0) if post_ai_prob is not None and post_ai_prob < best_ai_prob: best = out best_score = score best_ai_prob = post_ai_prob if best is None: best = outputs[0] best_score = 0.0 best_ai_prob = 1.0 sim = semantic_similarity(text, best) rating = humanization_rating(best_score) ai_score_str = f"Pre: {100*(1-pre_ai_prob):.1f}% human | Post: {100*(1-best_ai_prob):.1f}% human" return ( best, ai_score_str, sim, rating, best_score * 100, "" ) # ---------------------- # Gradio UI # ---------------------- custom_theme = grthemes.Base( primary_hue="blue", secondary_hue="blue", neutral_hue="slate" ) with gr.Blocks(theme=custom_theme, title="AI Humanizer - Made by Taha") as demo: gr.Markdown(""" # 🧠 AI Humanizer
Rewrite AI text to sound 100% human Made by Taha
""", elem_id="header") with gr.Row(): with gr.Column(): text_in = gr.Textbox(label="Paste AI-generated text here", lines=8, placeholder="Paste your text...", elem_id="input-box") tone = gr.Dropdown(["Academic", "Casual", "Friendly", "Stealth"], value="Stealth", label="Tone Selector") btn = gr.Button("Humanize", elem_id="humanize-btn") with gr.Column(): text_out = gr.Textbox(label="Humanized Output", lines=8, interactive=False, elem_id="output-box") ai_scores = gr.Markdown("", elem_id="ai-scores") sim_score = gr.Number(label="Similarity (0=very different, 1=very similar)", interactive=False) rating = gr.Markdown("", elem_id="rating") human_score = gr.Number(label="Humanization Score (%)", interactive=False) btn.click( process, inputs=[text_in, tone], outputs=[text_out, ai_scores, sim_score, rating, human_score, gr.Textbox(visible=False)], api_name="humanize" ) gr.Markdown("""
Made by Taha | Free for unlimited use | Optimized for students and creators
""", elem_id="footer") demo.launch()