padlet-summary / app.py
hlydecker's picture
Update app.py
cdb3883
import gradio as gr
import subprocess
import json
import requests
import re
import pandas as pd
import openai
from bs4 import BeautifulSoup
# Simple function to strip html
def strip_html_tags(html_text):
# Use BeautifulSoup to parse and clean HTML content
soup = BeautifulSoup(html_text, 'html.parser')
return soup.get_text()
def html_posts_to_table(html_posts):
subject_pattern = r"Subject: (.*?)\n"
body_text_pattern = r"Body Text: (.*?)\n"
subjects = re.findall(subject_pattern, html_posts)
body_texts = re.findall(body_text_pattern, html_posts)
data = {
'Subject': subjects,
'Body Text': body_texts
}
df = pd.DataFrame(data)
return(df)
def api_call(board_id, api_key):
curl_command = [
'curl', '-s', '--request', 'GET',
'--url', f"https://api.padlet.dev/v1/boards/{board_id}?include=posts%2Csections",
'--header', f"X-Api-Key: {api_key}",
'--header', 'accept: application/vnd.api+json'
]
try:
response = subprocess.check_output(curl_command, universal_newlines=True)
response_data = json.loads(response)
# Extract the contents of all posts, stripping HTML tags from bodyHtml
posts_data = response_data.get("included", [])
post_contents = []
for post in posts_data:
if post.get("type") == "post":
attributes = post.get("attributes", {}).get("content", {})
subject = attributes.get("subject", "")
body_html = attributes.get("bodyHtml", "")
if subject:
cleaned_body = strip_html_tags(body_html)
post_contents.append({"subject": subject, "content": cleaned_body})
if post_contents:
df = pd.DataFrame(post_contents)
return df
else:
return pd.DataFrame({"subject": ["No post contents found."], "content": [""]})
except subprocess.CalledProcessError:
return pd.DataFrame({"subject": ["Error: Unable to fetch data using cURL."], "content": [""]})
def create_post(subject, post_content, board_id, api_key):
curl_command = [
'curl', '-s', '--request', 'POST',
'--url', f"https://api.padlet.dev/v1/boards/{board_id}/posts",
'--header', f"X-Api-Key: {api_key}",
'--header', 'accept: application/vnd.api+json',
'--header', 'content-type: application/vnd.api+json',
'--data',
json.dumps({
"data": {
"type": "post",
"attributes": {
"content": {
"subject": subject,
"body": post_content
}
}
}
})
]
try:
response = subprocess.check_output(curl_command, universal_newlines=True)
response_data = json.loads(response)
return "Post created successfully."
except subprocess.CalledProcessError as e:
return f"Error: Unable to create post - {str(e)}"
def posts_to_prompt(padlet_posts):
post_prompt = padlet_posts.apply(lambda row: f"{row['subject']} {row['content']}", axis=1).str.cat(sep=', ')
return post_prompt
def remove_html_tags(text):
# Use a regular expression to remove HTML tags
clean = re.compile('<.*?>')
return re.sub(clean, '', text)
def summarize_padlet_posts(padlet_posts, openai_api_key, system_prompt):
# Concatenate padlet post df
post_prompt = posts_to_prompt(padlet_posts)
# Set the system prompt with more specific instructions
system_prompt = system_prompt
# Set the prompt for the GPT-3.5 model
prompt = system_prompt + "\n" + post_prompt # Added a newline after system_prompt
try:
# Make the API call to GPT-3.5
response = openai.Completion.create(
engine="text-davinci-003", # GPT-3.5 engine
prompt=prompt,
max_tokens=1000, # Limit response length for concise summaries
api_key=openai_api_key,
temperature=0.5 # Adjust temperature as needed
)
# Extract and return the summary, removing leading newlines and HTML tags
summary = response.choices[0].text.lstrip('\n')
summary = remove_html_tags(summary)
return summary
except Exception as e:
return f"Error: {str(e)}"
def summarize_padlets(input_board_id, output_board_id, padlet_api, openai_api, system_prompt):
posts_to_summarize = api_call(input_board_id, padlet_api)
summary = summarize_padlet_posts(posts_to_summarize, openai_api, system_prompt)
create_post("Summary",summary, output_board_id, padlet_api)
return(summary)
iface = gr.Interface(
fn=summarize_padlets,
inputs=[
gr.inputs.Textbox(label="Input Board ID"),
gr.inputs.Textbox(label="Output Board ID"),
gr.inputs.Textbox(label="Padlet API Key", type="password"),
gr.inputs.Textbox(label="OpenAI API Key", type="password", placeholder="sk.."),
gr.inputs.Textbox(label="System Prompt", default = "You are an AI assistant tasked with summarizing the main points of the following Padlet posts. Please provide a concise summary of the posts based on their content.")
],
outputs=gr.outputs.Textbox(label="Summary"),
live=False, # Set to True to show the result without clicking a button
title="Padlet Summarization",
description="Summarize Padlet posts and create a summary post on another board using OpenAI GPT3.5.",
)
# Run the Gradio interface
iface.launch()