Spaces:

Siddhant-Jain
/

ai-text-humanizer

Running

ai-text-humanizer / humanizer_batch.py

SidddhantJain

Grason app was built for ai detection and humanizer

850a7ff 9 days ago

12.4 kB

	import gradio as gr
	import pandas as pd
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
	import torch
	import random
	import re
	import warnings
	warnings.filterwarnings("ignore")

	class BatchHumanizer:
	def __init__(self):
	try:
	self.model_name = "Vamsi/T5_Paraphrase_Paws"
	self.tokenizer = AutoTokenizer.from_pretrained(self.model_name, use_fast=False)
	self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name)
	print("✅ Batch Humanizer model loaded successfully")
	except Exception as e:
	print(f"❌ Error loading model: {e}")
	self.tokenizer = None
	self.model = None

	def humanize_single_text(self, text, strength="medium"):
	"""Humanize a single piece of text"""
	if not self.model or not self.tokenizer:
	return self.fallback_humanize(text)

	try:
	# Paraphrase using T5
	input_text = f"paraphrase: {text}"
	input_ids = self.tokenizer.encode(
	input_text,
	return_tensors="pt",
	max_length=512,
	truncation=True
	)

	# Adjust parameters based on strength
	if strength == "light":
	temp, top_p = 1.1, 0.9
	elif strength == "heavy":
	temp, top_p = 1.5, 0.95
	else: # medium
	temp, top_p = 1.3, 0.92

	with torch.no_grad():
	outputs = self.model.generate(
	input_ids=input_ids,
	max_length=min(len(text.split()) + 50, 512),
	num_beams=5,
	temperature=temp,
	top_p=top_p,
	do_sample=True,
	early_stopping=True,
	repetition_penalty=1.2
	)

	result = self.tokenizer.decode(outputs[0], skip_special_tokens=True)

	# Additional humanization
	if strength in ["medium", "heavy"]:
	result = self.add_natural_variations(result)

	return self.clean_text(result) if result and len(result) > 10 else text

	except Exception as e:
	print(f"Error humanizing text: {e}")
	return self.fallback_humanize(text)

	def fallback_humanize(self, text):
	"""Simple fallback humanization without model"""
	# Basic word replacements
	replacements = {
	"utilize": "use", "demonstrate": "show", "facilitate": "help",
	"optimize": "improve", "implement": "apply", "generate": "create",
	"therefore": "thus", "however": "yet", "furthermore": "also"
	}

	result = text
	for old, new in replacements.items():
	result = re.sub(r'\b' + old + r'\b', new, result, flags=re.IGNORECASE)

	return result

	def add_natural_variations(self, text):
	"""Add natural language variations"""
	# Academic connectors
	connectors = [
	"Moreover", "Furthermore", "Additionally", "In contrast",
	"Similarly", "Consequently", "Nevertheless", "Notably"
	]

	sentences = text.split('.')
	varied = []

	for i, sentence in enumerate(sentences):
	sentence = sentence.strip()
	if not sentence:
	continue

	# Sometimes add connectors
	if i > 0 and random.random() < 0.2:
	connector = random.choice(connectors)
	sentence = f"{connector}, {sentence.lower()}"

	varied.append(sentence)

	return '. '.join(varied) + '.' if varied else text

	def clean_text(self, text):
	"""Clean and format text"""
	# Remove extra spaces
	text = re.sub(r'\s+', ' ', text)
	text = re.sub(r'\s+([.!?,:;])', r'\1', text)

	# Capitalize sentences
	sentences = text.split('. ')
	formatted = []
	for sentence in sentences:
	sentence = sentence.strip()
	if sentence:
	sentence = sentence[0].upper() + sentence[1:] if len(sentence) > 1 else sentence.upper()
	formatted.append(sentence)

	result = '. '.join(formatted)
	if not result.endswith(('.', '!', '?')):
	result += '.'

	return result

	# Initialize humanizer
	batch_humanizer = BatchHumanizer()

	def process_text_input(text_input, strength):
	"""Process single text input"""
	if not text_input or not text_input.strip():
	return "Please enter some text to humanize."

	return batch_humanizer.humanize_single_text(text_input, strength.lower())

	def process_file_upload(file, strength):
	"""Process uploaded file"""
	if file is None:
	return "Please upload a file.", None

	try:
	# Read the file
	if file.name.endswith('.txt'):
	with open(file.name, 'r', encoding='utf-8') as f:
	content = f.read()

	# Split into paragraphs or sentences for processing
	paragraphs = [p.strip() for p in content.split('\n\n') if p.strip()]

	humanized_paragraphs = []
	for para in paragraphs:
	if len(para) > 50: # Only process substantial paragraphs
	humanized = batch_humanizer.humanize_single_text(para, strength.lower())
	humanized_paragraphs.append(humanized)
	else:
	humanized_paragraphs.append(para)

	result = '\n\n'.join(humanized_paragraphs)

	# Save to new file
	output_filename = file.name.replace('.txt', '_humanized.txt')
	with open(output_filename, 'w', encoding='utf-8') as f:
	f.write(result)

	return result, output_filename

	elif file.name.endswith('.csv'):
	df = pd.read_csv(file.name)

	# Assume the text column is named 'text' or the first column
	text_column = 'text' if 'text' in df.columns else df.columns[0]

	# Humanize each text entry
	df['humanized'] = df[text_column].apply(
	lambda x: batch_humanizer.humanize_single_text(str(x), strength.lower()) if pd.notna(x) else x
	)

	# Save to new CSV
	output_filename = file.name.replace('.csv', '_humanized.csv')
	df.to_csv(output_filename, index=False)

	return f"Processed {len(df)} entries. Check the 'humanized' column.", output_filename

	else:
	return "Unsupported file format. Please upload .txt or .csv files.", None

	except Exception as e:
	return f"Error processing file: {str(e)}", None

	# Create Gradio interface with tabs
	with gr.Blocks(theme="soft", title="AI Text Humanizer Pro") as demo:
	gr.Markdown("""
	# 🤖➡️👨 AI Text Humanizer Pro

	Advanced tool to transform robotic AI-generated text into natural, human-like writing

	Perfect for:
	- 📝 Academic papers and essays
	- 📊 Research reports
	- 📄 Business documents
	- 💼 Professional content
	- 🔍 Bypassing AI detection tools
	""")

	with gr.Tabs():
	# Single Text Tab
	with gr.TabItem("Single Text"):
	gr.Markdown("### Humanize Individual Text")

	with gr.Row():
	with gr.Column(scale=2):
	text_input = gr.Textbox(
	lines=12,
	placeholder="Paste your AI-generated text here...",
	label="Input Text",
	info="Enter the text you want to humanize"
	)

	strength_single = gr.Radio(
	choices=["Light", "Medium", "Heavy"],
	value="Medium",
	label="Humanization Strength"
	)

	process_btn = gr.Button("🚀 Humanize Text", variant="primary")

	with gr.Column(scale=2):
	text_output = gr.Textbox(
	lines=12,
	label="Humanized Output",
	show_copy_button=True
	)

	# Examples
	gr.Examples(
	examples=[
	["The implementation of artificial intelligence algorithms demonstrates significant improvements in computational efficiency and accuracy metrics across various benchmark datasets.", "Medium"],
	["Machine learning models exhibit superior performance characteristics when evaluated against traditional statistical approaches in predictive analytics applications.", "Heavy"],
	["The research methodology utilized in this study involves comprehensive data collection and analysis procedures to ensure robust and reliable results.", "Light"]
	],
	inputs=[text_input, strength_single],
	outputs=text_output,
	fn=process_text_input
	)

	# Batch Processing Tab
	with gr.TabItem("Batch Processing"):
	gr.Markdown("### Process Files in Batch")
	gr.Markdown("Upload .txt or .csv files to humanize multiple texts at once")

	with gr.Row():
	with gr.Column():
	file_input = gr.File(
	label="Upload File (.txt or .csv)",
	file_types=[".txt", ".csv"]
	)

	strength_batch = gr.Radio(
	choices=["Light", "Medium", "Heavy"],
	value="Medium",
	label="Humanization Strength"
	)

	process_file_btn = gr.Button("🔄 Process File", variant="primary")

	with gr.Column():
	file_output = gr.Textbox(
	lines=10,
	label="Processing Results",
	show_copy_button=True
	)

	download_file = gr.File(
	label="Download Processed File",
	visible=False
	)

	# Settings Tab
	with gr.TabItem("Settings & Info"):
	gr.Markdown("""
	### How it works:

	1. Light Humanization: Basic paraphrasing with minimal changes
	2. Medium Humanization: Paraphrasing + vocabulary variations
	3. Heavy Humanization: All techniques + sentence structure changes

	### Features:
	- ✅ Advanced T5-based paraphrasing
	- ✅ Natural vocabulary diversification
	- ✅ Sentence structure optimization
	- ✅ Academic tone preservation
	- ✅ Batch file processing
	- ✅ Multiple output formats

	### Supported Formats:
	- Text files (.txt): Processes paragraph by paragraph
	- CSV files (.csv): Adds 'humanized' column with processed text

	### Tips for best results:
	- Use complete sentences and paragraphs
	- Avoid very short fragments
	- Choose appropriate humanization strength
	- Review output for context accuracy
	""")

	# Event handlers
	process_btn.click(
	fn=process_text_input,
	inputs=[text_input, strength_single],
	outputs=text_output
	)

	process_file_btn.click(
	fn=process_file_upload,
	inputs=[file_input, strength_batch],
	outputs=[file_output, download_file]
	)

	if __name__ == "__main__":
	demo.launch(
	share=False,
	server_name="0.0.0.0",
	server_port=7862,
	debug=True
	)