import gradio as gr from bs4 import BeautifulSoup from nltk.tokenize import word_tokenize from nltk.corpus import wordnet import random import nltk nltk.download('punkt') nltk.download('wordnet') def paraphrase_text(text): # Tokenize the text tokens = word_tokenize(text) # Create a list to hold the paraphrased words paraphrased_tokens = [] for token in tokens: # Check if the token is a word if token.isalpha(): # Get the synonyms of the word synonyms = [] for syn in wordnet.synsets(token): for lemma in syn.lemmas(): if lemma.name() != token: synonyms.append(lemma.name()) # If there are synonyms available, choose a random one if synonyms: paraphrased_word = random.choice(synonyms) # If no synonyms are available, use the original word else: paraphrased_word = token # If the token is not a word, use it as-is else: paraphrased_word = token # Add the paraphrased word to the list paraphrased_tokens.append(paraphrased_word) # Join the paraphrased tokens back into a string paraphrased_text = ' '.join(paraphrased_tokens) return paraphrased_text def paraphrase_html(html_text): # Parse the HTML using BeautifulSoup soup = BeautifulSoup(html_text, 'html.parser') # Find all the text nodes in the HTML text_nodes = soup.find_all(text=True) # Paraphrase the text nodes for node in text_nodes: node.replace_with(paraphrase_text(node.string)) # Return the paraphrased HTML paraphrased_html = str(soup) return paraphrased_html inputs = gr.inputs.Textbox(label="Enter HTML text to paraphrase") outputs = gr.outputs.HTML(label="Paraphrased HTML") title = "HTML Paraphraser" description = "Enter HTML text and get a paraphrased version in HTML format." examples = [["
This is some HTML text to paraphrase.
"]] gr.Interface(paraphrase_html, inputs, outputs, title=title, description=description, examples=examples).launch()