File size: 9,250 Bytes
850a7ff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
import random
import re
import warnings
warnings.filterwarnings("ignore")

class SimpleHumanizer:
    def __init__(self):
        # Load a reliable T5 model for paraphrasing
        try:
            self.model_name = "Vamsi/T5_Paraphrase_Paws"
            self.tokenizer = AutoTokenizer.from_pretrained(self.model_name, use_fast=False)
            self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name)
            print("βœ… Model loaded successfully")
        except Exception as e:
            print(f"❌ Error loading model: {e}")
            self.tokenizer = None
            self.model = None
    
    def add_variations(self, text):
        """Add simple variations to make text more natural"""
        # Common academic phrase variations
        replacements = {
            "shows that": ["demonstrates that", "indicates that", "reveals that", "suggests that"],
            "results in": ["leads to", "causes", "produces", "generates"],
            "due to": ["because of", "owing to", "as a result of", "on account of"],
            "in order to": ["to", "so as to", "with the aim of", "for the purpose of"],
            "as well as": ["and", "along with", "together with", "in addition to"],
            "therefore": ["thus", "hence", "consequently", "as a result"],
            "however": ["nevertheless", "nonetheless", "on the other hand", "yet"],
            "furthermore": ["moreover", "additionally", "in addition", "what is more"],
            "significant": ["notable", "considerable", "substantial", "important"],
            "important": ["crucial", "vital", "essential", "key"],
            "analyze": ["examine", "investigate", "study", "assess"],
            "demonstrate": ["show", "illustrate", "reveal", "display"],
            "utilize": ["use", "employ", "apply", "implement"]
        }
        
        result = text
        for original, alternatives in replacements.items():
            if original in result.lower():
                replacement = random.choice(alternatives)
                # Replace with case matching
                pattern = re.compile(re.escape(original), re.IGNORECASE)
                result = pattern.sub(replacement, result, count=1)
        
        return result
    
    def vary_sentence_structure(self, text):
        """Simple sentence structure variations"""
        sentences = text.split('.')
        varied = []
        
        for sentence in sentences:
            sentence = sentence.strip()
            if not sentence:
                continue
                
            # Add some variety to sentence starters
            if random.random() < 0.3:
                starters = ["Notably, ", "Importantly, ", "Significantly, ", "Interestingly, "]
                if not any(sentence.startswith(s.strip()) for s in starters):
                    sentence = random.choice(starters) + sentence.lower()
            
            varied.append(sentence)
        
        return '. '.join(varied) + '.'
    
    def paraphrase_text(self, text):
        """Paraphrase using T5 model"""
        if not self.model or not self.tokenizer:
            return text
        
        try:
            # Split long text into chunks
            max_length = 400
            if len(text) > max_length:
                sentences = text.split('.')
                chunks = []
                current_chunk = ""
                
                for sentence in sentences:
                    if len(current_chunk + sentence) < max_length:
                        current_chunk += sentence + "."
                    else:
                        if current_chunk:
                            chunks.append(current_chunk.strip())
                        current_chunk = sentence + "."
                
                if current_chunk:
                    chunks.append(current_chunk.strip())
                
                paraphrased_chunks = []
                for chunk in chunks:
                    para = self._paraphrase_chunk(chunk)
                    paraphrased_chunks.append(para)
                
                return " ".join(paraphrased_chunks)
            else:
                return self._paraphrase_chunk(text)
                
        except Exception as e:
            print(f"Paraphrasing error: {e}")
            return text
    
    def _paraphrase_chunk(self, text):
        """Paraphrase a single chunk"""
        try:
            # Prepare input
            input_text = f"paraphrase: {text}"
            input_ids = self.tokenizer.encode(
                input_text, 
                return_tensors="pt", 
                max_length=512, 
                truncation=True
            )
            
            # Generate paraphrase
            with torch.no_grad():
                outputs = self.model.generate(
                    input_ids=input_ids,
                    max_length=min(len(text.split()) + 50, 512),
                    num_beams=5,
                    num_return_sequences=1,
                    temperature=1.3,
                    top_k=50,
                    top_p=0.95,
                    do_sample=True,
                    early_stopping=True,
                    repetition_penalty=1.2
                )
            
            # Decode result
            paraphrased = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
            
            # Clean up the result
            paraphrased = paraphrased.strip()
            if paraphrased and len(paraphrased) > 10:
                return paraphrased
            else:
                return text
                
        except Exception as e:
            print(f"Chunk paraphrasing error: {e}")
            return text

# Initialize humanizer
humanizer = SimpleHumanizer()

def humanize_text(input_text, complexity="Medium"):
    """Main humanization function"""
    if not input_text or not input_text.strip():
        return "Please enter some text to humanize."
    
    try:
        # Step 1: Paraphrase the text
        result = humanizer.paraphrase_text(input_text)
        
        # Step 2: Add variations based on complexity
        if complexity in ["Medium", "High"]:
            result = humanizer.add_variations(result)
        
        if complexity == "High":
            result = humanizer.vary_sentence_structure(result)
        
        # Step 3: Clean up formatting
        result = re.sub(r'\s+', ' ', result)
        result = re.sub(r'\s+([.!?,:;])', r'\1', result)
        
        # Ensure proper sentence capitalization
        sentences = result.split('. ')
        formatted_sentences = []
        for i, sentence in enumerate(sentences):
            sentence = sentence.strip()
            if sentence:
                # Capitalize first letter
                sentence = sentence[0].upper() + sentence[1:] if len(sentence) > 1 else sentence.upper()
                formatted_sentences.append(sentence)
        
        result = '. '.join(formatted_sentences)
        
        # Final cleanup
        if not result.endswith('.') and not result.endswith('!') and not result.endswith('?'):
            result += '.'
        
        return result
        
    except Exception as e:
        print(f"Humanization error: {e}")
        return f"Error processing text: {str(e)}"

# Create Gradio interface
demo = gr.Interface(
    fn=humanize_text,
    inputs=[
        gr.Textbox(
            lines=10,
            placeholder="Paste your AI-generated or robotic text here...",
            label="Input Text",
            info="Enter the text you want to humanize"
        ),
        gr.Radio(
            choices=["Low", "Medium", "High"],
            value="Medium",
            label="Humanization Complexity",
            info="Low: Basic paraphrasing | Medium: + Vocabulary variations | High: + Structure changes"
        )
    ],
    outputs=gr.Textbox(
        label="Humanized Output",
        lines=10,
        show_copy_button=True
    ),
    title="πŸ€–βž‘οΈπŸ‘¨ AI Text Humanizer (Simple)",
    description="""
    **Transform robotic AI text into natural, human-like writing**
    
    This tool uses advanced paraphrasing techniques to make AI-generated text sound more natural and human-like.
    Perfect for academic papers, essays, reports, and any content that needs to pass AI detection tools.
    
    **Features:**
    βœ… Advanced T5-based paraphrasing  
    βœ… Vocabulary diversification  
    βœ… Sentence structure optimization  
    βœ… Academic tone preservation  
    βœ… Natural flow enhancement  
    """,
    examples=[
        [
            "The implementation of machine learning algorithms in data processing systems demonstrates significant improvements in efficiency and accuracy metrics.",
            "Medium"
        ],
        [
            "Artificial intelligence technologies are increasingly being utilized across various industries to enhance operational capabilities and drive innovation.",
            "High"
        ]
    ],
    theme="soft"
)

if __name__ == "__main__":
    demo.launch(
        share=False,
        server_name="0.0.0.0",
        server_port=7861,
        debug=True
    )