Spaces:

Siddhant-Jain
/

ai-text-humanizer

Running

File size: 9,250 Bytes

850a7ff

import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
import random
import re
import warnings
warnings.filterwarnings("ignore")

class SimpleHumanizer:
    def __init__(self):
        # Load a reliable T5 model for paraphrasing
        try:
            self.model_name = "Vamsi/T5_Paraphrase_Paws"
            self.tokenizer = AutoTokenizer.from_pretrained(self.model_name, use_fast=False)
            self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name)
            print("✅ Model loaded successfully")
        except Exception as e:
            print(f"❌ Error loading model: {e}")
            self.tokenizer = None
            self.model = None
    
    def add_variations(self, text):
        """Add simple variations to make text more natural"""
        # Common academic phrase variations
        replacements = {
            "shows that": ["demonstrates that", "indicates that", "reveals that", "suggests that"],
            "results in": ["leads to", "causes", "produces", "generates"],
            "due to": ["because of", "owing to", "as a result of", "on account of"],
            "in order to": ["to", "so as to", "with the aim of", "for the purpose of"],
            "as well as": ["and", "along with", "together with", "in addition to"],
            "therefore": ["thus", "hence", "consequently", "as a result"],
            "however": ["nevertheless", "nonetheless", "on the other hand", "yet"],
            "furthermore": ["moreover", "additionally", "in addition", "what is more"],
            "significant": ["notable", "considerable", "substantial", "important"],
            "important": ["crucial", "vital", "essential", "key"],
            "analyze": ["examine", "investigate", "study", "assess"],
            "demonstrate": ["show", "illustrate", "reveal", "display"],
            "utilize": ["use", "employ", "apply", "implement"]
        }
        
        result = text
        for original, alternatives in replacements.items():
            if original in result.lower():
                replacement = random.choice(alternatives)
                # Replace with case matching
                pattern = re.compile(re.escape(original), re.IGNORECASE)
                result = pattern.sub(replacement, result, count=1)
        
        return result
    
    def vary_sentence_structure(self, text):
        """Simple sentence structure variations"""
        sentences = text.split('.')
        varied = []
        
        for sentence in sentences:
            sentence = sentence.strip()
            if not sentence:
                continue
                
            # Add some variety to sentence starters
            if random.random() < 0.3:
                starters = ["Notably, ", "Importantly, ", "Significantly, ", "Interestingly, "]
                if not any(sentence.startswith(s.strip()) for s in starters):
                    sentence = random.choice(starters) + sentence.lower()
            
            varied.append(sentence)
        
        return '. '.join(varied) + '.'
    
    def paraphrase_text(self, text):
        """Paraphrase using T5 model"""
        if not self.model or not self.tokenizer:
            return text
        
        try:
            # Split long text into chunks
            max_length = 400
            if len(text) > max_length:
                sentences = text.split('.')
                chunks = []
                current_chunk = ""
                
                for sentence in sentences:
                    if len(current_chunk + sentence) < max_length:
                        current_chunk += sentence + "."
                    else:
                        if current_chunk:
                            chunks.append(current_chunk.strip())
                        current_chunk = sentence + "."
                
                if current_chunk:
                    chunks.append(current_chunk.strip())
                
                paraphrased_chunks = []
                for chunk in chunks:
                    para = self._paraphrase_chunk(chunk)
                    paraphrased_chunks.append(para)
                
                return " ".join(paraphrased_chunks)
            else:
                return self._paraphrase_chunk(text)
                
        except Exception as e:
            print(f"Paraphrasing error: {e}")
            return text
    
    def _paraphrase_chunk(self, text):
        """Paraphrase a single chunk"""
        try:
            # Prepare input
            input_text = f"paraphrase: {text}"
            input_ids = self.tokenizer.encode(
                input_text, 
                return_tensors="pt", 
                max_length=512, 
                truncation=True
            )
            
            # Generate paraphrase
            with torch.no_grad():
                outputs = self.model.generate(
                    input_ids=input_ids,
                    max_length=min(len(text.split()) + 50, 512),
                    num_beams=5,
                    num_return_sequences=1,
                    temperature=1.3,
                    top_k=50,
                    top_p=0.95,
                    do_sample=True,
                    early_stopping=True,
                    repetition_penalty=1.2
                )
            
            # Decode result
            paraphrased = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
            
            # Clean up the result
            paraphrased = paraphrased.strip()
            if paraphrased and len(paraphrased) > 10:
                return paraphrased
            else:
                return text
                
        except Exception as e:
            print(f"Chunk paraphrasing error: {e}")
            return text

# Initialize humanizer
humanizer = SimpleHumanizer()

def humanize_text(input_text, complexity="Medium"):
    """Main humanization function"""
    if not input_text or not input_text.strip():
        return "Please enter some text to humanize."
    
    try:
        # Step 1: Paraphrase the text
        result = humanizer.paraphrase_text(input_text)
        
        # Step 2: Add variations based on complexity
        if complexity in ["Medium", "High"]:
            result = humanizer.add_variations(result)
        
        if complexity == "High":
            result = humanizer.vary_sentence_structure(result)
        
        # Step 3: Clean up formatting
        result = re.sub(r'\s+', ' ', result)
        result = re.sub(r'\s+([.!?,:;])', r'\1', result)
        
        # Ensure proper sentence capitalization
        sentences = result.split('. ')
        formatted_sentences = []
        for i, sentence in enumerate(sentences):
            sentence = sentence.strip()
            if sentence:
                # Capitalize first letter
                sentence = sentence[0].upper() + sentence[1:] if len(sentence) > 1 else sentence.upper()
                formatted_sentences.append(sentence)
        
        result = '. '.join(formatted_sentences)
        
        # Final cleanup
        if not result.endswith('.') and not result.endswith('!') and not result.endswith('?'):
            result += '.'
        
        return result
        
    except Exception as e:
        print(f"Humanization error: {e}")
        return f"Error processing text: {str(e)}"

# Create Gradio interface
demo = gr.Interface(
    fn=humanize_text,
    inputs=[
        gr.Textbox(
            lines=10,
            placeholder="Paste your AI-generated or robotic text here...",
            label="Input Text",
            info="Enter the text you want to humanize"
        ),
        gr.Radio(
            choices=["Low", "Medium", "High"],
            value="Medium",
            label="Humanization Complexity",
            info="Low: Basic paraphrasing | Medium: + Vocabulary variations | High: + Structure changes"
        )
    ],
    outputs=gr.Textbox(
        label="Humanized Output",
        lines=10,
        show_copy_button=True
    ),
    title="🤖➡️👨 AI Text Humanizer (Simple)",
    description="""
    **Transform robotic AI text into natural, human-like writing**
    
    This tool uses advanced paraphrasing techniques to make AI-generated text sound more natural and human-like.
    Perfect for academic papers, essays, reports, and any content that needs to pass AI detection tools.
    
    **Features:**
    ✅ Advanced T5-based paraphrasing  
    ✅ Vocabulary diversification  
    ✅ Sentence structure optimization  
    ✅ Academic tone preservation  
    ✅ Natural flow enhancement  
    """,
    examples=[
        [
            "The implementation of machine learning algorithms in data processing systems demonstrates significant improvements in efficiency and accuracy metrics.",
            "Medium"
        ],
        [
            "Artificial intelligence technologies are increasingly being utilized across various industries to enhance operational capabilities and drive innovation.",
            "High"
        ]
    ],
    theme="soft"
)

if __name__ == "__main__":
    demo.launch(
        share=False,
        server_name="0.0.0.0",
        server_port=7861,
        debug=True
    )