Spaces:
Running
Running
File size: 9,250 Bytes
850a7ff |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 |
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
import random
import re
import warnings
warnings.filterwarnings("ignore")
class SimpleHumanizer:
def __init__(self):
# Load a reliable T5 model for paraphrasing
try:
self.model_name = "Vamsi/T5_Paraphrase_Paws"
self.tokenizer = AutoTokenizer.from_pretrained(self.model_name, use_fast=False)
self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name)
print("β
Model loaded successfully")
except Exception as e:
print(f"β Error loading model: {e}")
self.tokenizer = None
self.model = None
def add_variations(self, text):
"""Add simple variations to make text more natural"""
# Common academic phrase variations
replacements = {
"shows that": ["demonstrates that", "indicates that", "reveals that", "suggests that"],
"results in": ["leads to", "causes", "produces", "generates"],
"due to": ["because of", "owing to", "as a result of", "on account of"],
"in order to": ["to", "so as to", "with the aim of", "for the purpose of"],
"as well as": ["and", "along with", "together with", "in addition to"],
"therefore": ["thus", "hence", "consequently", "as a result"],
"however": ["nevertheless", "nonetheless", "on the other hand", "yet"],
"furthermore": ["moreover", "additionally", "in addition", "what is more"],
"significant": ["notable", "considerable", "substantial", "important"],
"important": ["crucial", "vital", "essential", "key"],
"analyze": ["examine", "investigate", "study", "assess"],
"demonstrate": ["show", "illustrate", "reveal", "display"],
"utilize": ["use", "employ", "apply", "implement"]
}
result = text
for original, alternatives in replacements.items():
if original in result.lower():
replacement = random.choice(alternatives)
# Replace with case matching
pattern = re.compile(re.escape(original), re.IGNORECASE)
result = pattern.sub(replacement, result, count=1)
return result
def vary_sentence_structure(self, text):
"""Simple sentence structure variations"""
sentences = text.split('.')
varied = []
for sentence in sentences:
sentence = sentence.strip()
if not sentence:
continue
# Add some variety to sentence starters
if random.random() < 0.3:
starters = ["Notably, ", "Importantly, ", "Significantly, ", "Interestingly, "]
if not any(sentence.startswith(s.strip()) for s in starters):
sentence = random.choice(starters) + sentence.lower()
varied.append(sentence)
return '. '.join(varied) + '.'
def paraphrase_text(self, text):
"""Paraphrase using T5 model"""
if not self.model or not self.tokenizer:
return text
try:
# Split long text into chunks
max_length = 400
if len(text) > max_length:
sentences = text.split('.')
chunks = []
current_chunk = ""
for sentence in sentences:
if len(current_chunk + sentence) < max_length:
current_chunk += sentence + "."
else:
if current_chunk:
chunks.append(current_chunk.strip())
current_chunk = sentence + "."
if current_chunk:
chunks.append(current_chunk.strip())
paraphrased_chunks = []
for chunk in chunks:
para = self._paraphrase_chunk(chunk)
paraphrased_chunks.append(para)
return " ".join(paraphrased_chunks)
else:
return self._paraphrase_chunk(text)
except Exception as e:
print(f"Paraphrasing error: {e}")
return text
def _paraphrase_chunk(self, text):
"""Paraphrase a single chunk"""
try:
# Prepare input
input_text = f"paraphrase: {text}"
input_ids = self.tokenizer.encode(
input_text,
return_tensors="pt",
max_length=512,
truncation=True
)
# Generate paraphrase
with torch.no_grad():
outputs = self.model.generate(
input_ids=input_ids,
max_length=min(len(text.split()) + 50, 512),
num_beams=5,
num_return_sequences=1,
temperature=1.3,
top_k=50,
top_p=0.95,
do_sample=True,
early_stopping=True,
repetition_penalty=1.2
)
# Decode result
paraphrased = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
# Clean up the result
paraphrased = paraphrased.strip()
if paraphrased and len(paraphrased) > 10:
return paraphrased
else:
return text
except Exception as e:
print(f"Chunk paraphrasing error: {e}")
return text
# Initialize humanizer
humanizer = SimpleHumanizer()
def humanize_text(input_text, complexity="Medium"):
"""Main humanization function"""
if not input_text or not input_text.strip():
return "Please enter some text to humanize."
try:
# Step 1: Paraphrase the text
result = humanizer.paraphrase_text(input_text)
# Step 2: Add variations based on complexity
if complexity in ["Medium", "High"]:
result = humanizer.add_variations(result)
if complexity == "High":
result = humanizer.vary_sentence_structure(result)
# Step 3: Clean up formatting
result = re.sub(r'\s+', ' ', result)
result = re.sub(r'\s+([.!?,:;])', r'\1', result)
# Ensure proper sentence capitalization
sentences = result.split('. ')
formatted_sentences = []
for i, sentence in enumerate(sentences):
sentence = sentence.strip()
if sentence:
# Capitalize first letter
sentence = sentence[0].upper() + sentence[1:] if len(sentence) > 1 else sentence.upper()
formatted_sentences.append(sentence)
result = '. '.join(formatted_sentences)
# Final cleanup
if not result.endswith('.') and not result.endswith('!') and not result.endswith('?'):
result += '.'
return result
except Exception as e:
print(f"Humanization error: {e}")
return f"Error processing text: {str(e)}"
# Create Gradio interface
demo = gr.Interface(
fn=humanize_text,
inputs=[
gr.Textbox(
lines=10,
placeholder="Paste your AI-generated or robotic text here...",
label="Input Text",
info="Enter the text you want to humanize"
),
gr.Radio(
choices=["Low", "Medium", "High"],
value="Medium",
label="Humanization Complexity",
info="Low: Basic paraphrasing | Medium: + Vocabulary variations | High: + Structure changes"
)
],
outputs=gr.Textbox(
label="Humanized Output",
lines=10,
show_copy_button=True
),
title="π€β‘οΈπ¨ AI Text Humanizer (Simple)",
description="""
**Transform robotic AI text into natural, human-like writing**
This tool uses advanced paraphrasing techniques to make AI-generated text sound more natural and human-like.
Perfect for academic papers, essays, reports, and any content that needs to pass AI detection tools.
**Features:**
β
Advanced T5-based paraphrasing
β
Vocabulary diversification
β
Sentence structure optimization
β
Academic tone preservation
β
Natural flow enhancement
""",
examples=[
[
"The implementation of machine learning algorithms in data processing systems demonstrates significant improvements in efficiency and accuracy metrics.",
"Medium"
],
[
"Artificial intelligence technologies are increasingly being utilized across various industries to enhance operational capabilities and drive innovation.",
"High"
]
],
theme="soft"
)
if __name__ == "__main__":
demo.launch(
share=False,
server_name="0.0.0.0",
server_port=7861,
debug=True
)
|