ai-text-humanizer / humanizer_simple.py
SidddhantJain
Grason app was built for ai detection and humanizer
850a7ff
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
import random
import re
import warnings
warnings.filterwarnings("ignore")
class SimpleHumanizer:
def __init__(self):
# Load a reliable T5 model for paraphrasing
try:
self.model_name = "Vamsi/T5_Paraphrase_Paws"
self.tokenizer = AutoTokenizer.from_pretrained(self.model_name, use_fast=False)
self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name)
print("βœ… Model loaded successfully")
except Exception as e:
print(f"❌ Error loading model: {e}")
self.tokenizer = None
self.model = None
def add_variations(self, text):
"""Add simple variations to make text more natural"""
# Common academic phrase variations
replacements = {
"shows that": ["demonstrates that", "indicates that", "reveals that", "suggests that"],
"results in": ["leads to", "causes", "produces", "generates"],
"due to": ["because of", "owing to", "as a result of", "on account of"],
"in order to": ["to", "so as to", "with the aim of", "for the purpose of"],
"as well as": ["and", "along with", "together with", "in addition to"],
"therefore": ["thus", "hence", "consequently", "as a result"],
"however": ["nevertheless", "nonetheless", "on the other hand", "yet"],
"furthermore": ["moreover", "additionally", "in addition", "what is more"],
"significant": ["notable", "considerable", "substantial", "important"],
"important": ["crucial", "vital", "essential", "key"],
"analyze": ["examine", "investigate", "study", "assess"],
"demonstrate": ["show", "illustrate", "reveal", "display"],
"utilize": ["use", "employ", "apply", "implement"]
}
result = text
for original, alternatives in replacements.items():
if original in result.lower():
replacement = random.choice(alternatives)
# Replace with case matching
pattern = re.compile(re.escape(original), re.IGNORECASE)
result = pattern.sub(replacement, result, count=1)
return result
def vary_sentence_structure(self, text):
"""Simple sentence structure variations"""
sentences = text.split('.')
varied = []
for sentence in sentences:
sentence = sentence.strip()
if not sentence:
continue
# Add some variety to sentence starters
if random.random() < 0.3:
starters = ["Notably, ", "Importantly, ", "Significantly, ", "Interestingly, "]
if not any(sentence.startswith(s.strip()) for s in starters):
sentence = random.choice(starters) + sentence.lower()
varied.append(sentence)
return '. '.join(varied) + '.'
def paraphrase_text(self, text):
"""Paraphrase using T5 model"""
if not self.model or not self.tokenizer:
return text
try:
# Split long text into chunks
max_length = 400
if len(text) > max_length:
sentences = text.split('.')
chunks = []
current_chunk = ""
for sentence in sentences:
if len(current_chunk + sentence) < max_length:
current_chunk += sentence + "."
else:
if current_chunk:
chunks.append(current_chunk.strip())
current_chunk = sentence + "."
if current_chunk:
chunks.append(current_chunk.strip())
paraphrased_chunks = []
for chunk in chunks:
para = self._paraphrase_chunk(chunk)
paraphrased_chunks.append(para)
return " ".join(paraphrased_chunks)
else:
return self._paraphrase_chunk(text)
except Exception as e:
print(f"Paraphrasing error: {e}")
return text
def _paraphrase_chunk(self, text):
"""Paraphrase a single chunk"""
try:
# Prepare input
input_text = f"paraphrase: {text}"
input_ids = self.tokenizer.encode(
input_text,
return_tensors="pt",
max_length=512,
truncation=True
)
# Generate paraphrase
with torch.no_grad():
outputs = self.model.generate(
input_ids=input_ids,
max_length=min(len(text.split()) + 50, 512),
num_beams=5,
num_return_sequences=1,
temperature=1.3,
top_k=50,
top_p=0.95,
do_sample=True,
early_stopping=True,
repetition_penalty=1.2
)
# Decode result
paraphrased = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
# Clean up the result
paraphrased = paraphrased.strip()
if paraphrased and len(paraphrased) > 10:
return paraphrased
else:
return text
except Exception as e:
print(f"Chunk paraphrasing error: {e}")
return text
# Initialize humanizer
humanizer = SimpleHumanizer()
def humanize_text(input_text, complexity="Medium"):
"""Main humanization function"""
if not input_text or not input_text.strip():
return "Please enter some text to humanize."
try:
# Step 1: Paraphrase the text
result = humanizer.paraphrase_text(input_text)
# Step 2: Add variations based on complexity
if complexity in ["Medium", "High"]:
result = humanizer.add_variations(result)
if complexity == "High":
result = humanizer.vary_sentence_structure(result)
# Step 3: Clean up formatting
result = re.sub(r'\s+', ' ', result)
result = re.sub(r'\s+([.!?,:;])', r'\1', result)
# Ensure proper sentence capitalization
sentences = result.split('. ')
formatted_sentences = []
for i, sentence in enumerate(sentences):
sentence = sentence.strip()
if sentence:
# Capitalize first letter
sentence = sentence[0].upper() + sentence[1:] if len(sentence) > 1 else sentence.upper()
formatted_sentences.append(sentence)
result = '. '.join(formatted_sentences)
# Final cleanup
if not result.endswith('.') and not result.endswith('!') and not result.endswith('?'):
result += '.'
return result
except Exception as e:
print(f"Humanization error: {e}")
return f"Error processing text: {str(e)}"
# Create Gradio interface
demo = gr.Interface(
fn=humanize_text,
inputs=[
gr.Textbox(
lines=10,
placeholder="Paste your AI-generated or robotic text here...",
label="Input Text",
info="Enter the text you want to humanize"
),
gr.Radio(
choices=["Low", "Medium", "High"],
value="Medium",
label="Humanization Complexity",
info="Low: Basic paraphrasing | Medium: + Vocabulary variations | High: + Structure changes"
)
],
outputs=gr.Textbox(
label="Humanized Output",
lines=10,
show_copy_button=True
),
title="πŸ€–βž‘οΈπŸ‘¨ AI Text Humanizer (Simple)",
description="""
**Transform robotic AI text into natural, human-like writing**
This tool uses advanced paraphrasing techniques to make AI-generated text sound more natural and human-like.
Perfect for academic papers, essays, reports, and any content that needs to pass AI detection tools.
**Features:**
βœ… Advanced T5-based paraphrasing
βœ… Vocabulary diversification
βœ… Sentence structure optimization
βœ… Academic tone preservation
βœ… Natural flow enhancement
""",
examples=[
[
"The implementation of machine learning algorithms in data processing systems demonstrates significant improvements in efficiency and accuracy metrics.",
"Medium"
],
[
"Artificial intelligence technologies are increasingly being utilized across various industries to enhance operational capabilities and drive innovation.",
"High"
]
],
theme="soft"
)
if __name__ == "__main__":
demo.launch(
share=False,
server_name="0.0.0.0",
server_port=7861,
debug=True
)