Spaces:
Sleeping
Sleeping
| import os | |
| from dotenv import load_dotenv, find_dotenv | |
| from serpapi import GoogleSearch | |
| import json | |
| import gradio as gr | |
| import openai | |
| from langchain.chat_models import ChatOpenAI | |
| from langchain.document_loaders import UnstructuredURLLoader | |
| from langchain.docstore.document import Document | |
| from langchain.chains import SequentialChain | |
| from langchain.chains.llm import LLMChain | |
| from langchain.prompts import PromptTemplate | |
| from langchain.chains.summarize import load_summarize_chain | |
| from unstructured.cleaners.core import remove_punctuation,clean,clean_extra_whitespace | |
| _ = load_dotenv(find_dotenv()) # read local .env file | |
| # ============== UTILITY FUNCTIONS ============== | |
| def generate_trend(date_str: str): | |
| SERP_API_KEY = os.environ['SERP_API_KEY'] | |
| params = { | |
| 'api_key': SERP_API_KEY, | |
| 'engine': 'google_trends_trending_now', | |
| 'hl': 'id', | |
| 'geo': 'ID', | |
| 'date': date_str, | |
| 'frequency': 'daily' | |
| } | |
| search = GoogleSearch(params) | |
| results = search.get_dict() | |
| if len(results['daily_searches'][0]['searches']) > 10: | |
| res = results['daily_searches'][0]['searches'][:10] | |
| else: | |
| res = results['daily_searches'][0]['searches'] | |
| trends = [] | |
| for search in res: | |
| trends.append(search['query']) | |
| return trends, res | |
| def fetch_article_urls(res_dict, selected_topic: str): | |
| for item in res_dict: | |
| if item.get('query') == selected_topic: | |
| article_urls = [article['link'] for article in item['articles']] | |
| return article_urls | |
| # if the selected topic is not found | |
| return [] | |
| def extract_article(url): | |
| "Given an URL, return a langchain Document to futher processing" | |
| loader = UnstructuredURLLoader( | |
| urls=[url], mode="elements", | |
| post_processors=[clean,remove_punctuation,clean_extra_whitespace] | |
| ) | |
| elements = loader.load() | |
| selected_elements = [e for e in elements if e.metadata['category']=="NarrativeText"] | |
| full_clean = " ".join([e.page_content for e in selected_elements]) | |
| return Document(page_content=full_clean, metadata={"source":url}) | |
| # ============== UTILITY FUNCTIONS ============== | |
| # ============== GRADIO FUNCTIONS ============== | |
| def dropdown_trend(year_txt, month_txt, date_txt): | |
| date_str = year_txt + month_txt + date_txt | |
| trends, res = generate_trend(date_str) | |
| return gr.Dropdown.update(choices=trends), res | |
| def generate(topic, trends_dic): | |
| article_urls = fetch_article_urls(trends_dic, topic) | |
| article_url_str = "\n- ".join(article_urls) | |
| article_docs = [extract_article(url) for url in article_urls] | |
| openai.api_key = os.environ['OPENAI_API_KEY'] | |
| OpenAIModel = "gpt-3.5-turbo" | |
| llm = ChatOpenAI(model=OpenAIModel, temperature=0.1) | |
| summarize_prompt_template = """Write a concise summary of the following Indonesian articles: | |
| {text} | |
| CONCISE SUMMARY: | |
| """ | |
| prompt = PromptTemplate.from_template(summarize_prompt_template) | |
| refine_template = ( | |
| "Your job is to produce a final summary\n" | |
| "We have provided an existing summary up to a certain point: {existing_answer}\n" | |
| "We have the opportunity to refine the existing summary" | |
| "(only if needed) with some more context below.\n" | |
| "------------\n" | |
| "{text}\n" | |
| "------------\n" | |
| "If the context isn't useful, return the original summary." | |
| ) | |
| refine_prompt = PromptTemplate.from_template(refine_template) | |
| summarize_chain = load_summarize_chain( | |
| llm=llm, | |
| chain_type="refine", | |
| question_prompt=prompt, | |
| refine_prompt=refine_prompt, | |
| return_intermediate_steps=True, | |
| input_key="input_documents", | |
| output_key="summarize_output", | |
| verbose=False | |
| ) | |
| translate_prompt_template = """Translate this following text to Indonesian: | |
| {summarize_output} | |
| """ | |
| translate_prompt = PromptTemplate.from_template(translate_prompt_template) | |
| translate_chain = LLMChain( | |
| llm=llm, | |
| prompt=translate_prompt, | |
| output_key="translated_summary", | |
| verbose=True | |
| ) | |
| llm_2 = ChatOpenAI(model=OpenAIModel, temperature=0.8) | |
| tweet_prompt_template = """Generate a list of three varied versions of Twitter post sequences. Each version has 3 to 10 coherent threads. \ | |
| The topic of the post is as follows: | |
| {translated_summary} | |
| You are required to write it in Indonesian. Keep it fun to read by adding some emojis and supporting hashtags (just if you think it's necessary). | |
| Output it as an array with 3 JSON items format with the following keys: | |
| - version: <version 1/2/3>, | |
| - tweet: <the tweet, each thread separated by the number of the sequence and new line char> | |
| """ | |
| tweet_prompt = PromptTemplate.from_template(tweet_prompt_template) | |
| tweet_chain = LLMChain( | |
| llm=llm_2, | |
| prompt = tweet_prompt, | |
| output_key="output_text", | |
| verbose=True | |
| ) | |
| sequentialChain = SequentialChain( | |
| chains=[summarize_chain, translate_chain, tweet_chain], | |
| input_variables=["input_documents"], | |
| output_variables=["translated_summary", "output_text"], | |
| verbose=True | |
| ) | |
| res = sequentialChain({"input_documents": article_docs}) | |
| summary = [res['translated_summary'] + '\n\nSources:\n' + article_url_str] | |
| generated_res = json.loads(res['output_text']) | |
| tweets = [] | |
| for res in generated_res: | |
| tweets.append(res.get('tweet')) | |
| return summary + tweets | |
| # ============== GRADIO FUNCTIONS ============== | |
| options = ['Do the Browse Trend first'] | |
| with gr.Blocks() as demo: | |
| gr.Markdown( | |
| """ | |
| # π°π₯ Trending News Article-based Tweet (π) Generator | |
| Don't know a current trend? Have no resources to do a research? But you wanna gain a traffic to your Twitter a.k.a π? This is a perfect solution for you! | |
| With a single click, you will get the top 10 most-searched topic in Google Search on specific date. Select one of them, we'll fetch some articles related to your selected topic. | |
| Finally, foala! You get three drafts of tweet that you can simply copy-paste to your Twitter/π! | |
| Psst, for now it will take around **~2 minutes** from fetching several articles related to selected topic until we generate the tweet drafts. We'll improve it soon! | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| with gr.Row(): | |
| year_txt = gr.Textbox(label="year (yyyy)") | |
| month_txt = gr.Textbox(label="month (mm)") | |
| date_txt = gr.Textbox(label="date (dd)") | |
| btn_fetch_trend = gr.Button("1. Browse Trend") | |
| trend_options = gr.Dropdown(options, label="Top 10 trends") | |
| trend_res = gr.JSON(visible=False) | |
| generate_btn = gr.Button("2. Generate now!", variant='primary') | |
| with gr.Column(scale=1): | |
| trend_summary = gr.Textbox(label='Trend Summary') | |
| with gr.Tab("Draft 1"): | |
| ver_1 = gr.Textbox(lines=10, show_copy_button=True) | |
| with gr.Tab("Draft 2"): | |
| ver_2 = gr.Textbox(lines=10, show_copy_button=True) | |
| with gr.Tab("Draft 3"): | |
| ver_3 = gr.Textbox(lines=10, show_copy_button=True) | |
| btn_fetch_trend.click(dropdown_trend, inputs=[year_txt, month_txt, date_txt], outputs=[trend_options, trend_res]) | |
| generate_btn.click(generate, inputs=[trend_options, trend_res], outputs=[trend_summary, ver_1, ver_2, ver_3]) | |
| demo.launch(debug=True) |