Spaces:

Abdullraffayy
/

jfk_assassination_records_app

Runtime error

App Files Files Community

jfk_assassination_records_app / app.py

3v324v23

Auto-deploy from GitHub

4418e3c 4 months ago

raw

history blame

6.1 kB

	from fastapi import FastAPI,HTTPException
	from pydantic import BaseModel
	from fastapi.responses import FileResponse
	import gradio as gr
	from entity_recognition import extract_entities # Import entity extraction function
	#from entity_recognition import generate_word_cloud
	from wordcloud import WordCloud
	from summarization import summarizer
	from utils import list_files,process_file

	# import threading
	TEXT_FOLDER = "jfk_text"
	app = FastAPI()

	# Request Model
	class TextRequest(BaseModel):
	text: str
	import pygraphviz as pgv
	import re

	def generate_mermaid_mindmap(text):
	entities = extract_entities(text)
	print("Extracted Entities:", entities)

	# Create a directed graph
	G = pgv.AGraph(directed=True, rankdir="TB", bgcolor="white")

	# Add root node
	G.add_node("Document", shape="ellipse", style="filled", fillcolor="lightblue", label="Document")

	# Keep track of node names to ensure uniqueness
	node_counter = {}

	for category, values in entities.items():
	# Sanitize category name for the node identifier
	safe_category = re.sub(r'[^a-zA-Z0-9_]', '', category)
	if not safe_category or safe_category.startswith('.'):
	safe_category = "Category_" + str(hash(category) % 10000)

	# Add category node
	G.add_node(safe_category, shape="box", style="filled", fillcolor="lightgreen", label=category)
	G.add_edge("Document", safe_category)

	for value in values:
	# Clean up the value
	cleaned_value = value.strip().rstrip(')').lstrip(',')
	if not cleaned_value:
	cleaned_value = "Unknown"

	# Truncate long values for readability (max 50 characters)
	if len(cleaned_value) > 50:
	cleaned_value = cleaned_value[:47] + "..."

	# Sanitize value name for the node identifier
	safe_value = re.sub(r'[^a-zA-Z0-9_]', '', cleaned_value)
	if not safe_value:
	safe_value = "Value_" + str(hash(cleaned_value) % 10000)

	# Ensure unique node name
	node_key = safe_value
	node_counter[node_key] = node_counter.get(node_key, 0) + 1
	if node_counter[node_key] > 1:
	safe_value = f"{safe_value}_{node_counter[node_key]}"

	# Add value node
	G.add_node(safe_value, shape="ellipse", style="filled", fillcolor="lightyellow", label=cleaned_value)
	G.add_edge(safe_category, safe_value)

	# Render the graph to a PNG file
	output_path = "mindmap.png"
	G.draw(output_path, format="png", prog="dot") # 'dot' is the layout engine

	return output_path
	@app.post("/summarize")
	def summarize_text(request: TextRequest):
	chunks = [request.text[i:i+500] for i in range(0, len(request.text), 500)]
	summaries = []
	for chunk in chunks:
	try:
	summary = summarizer(
	chunk,
	max_length=130,
	min_length=30,
	do_sample=False,
	truncation=True # Explicitly enable truncation
	)
	summaries.append(summary[0]['summary_text'])
	except Exception as e:
	raise HTTPException(status_code=500, detail=f"Summarization error: {str(e)}")
	return {"summary": " ".join(summaries)}

	# Entity Recognition Endpoint
	@app.post("/entities")
	def extract_entities_endpoint(request: TextRequest):
	return {"entities": extract_entities(request.text)}

	# Word Cloud Generation Endpoint
	@app.post("/wordcloud")
	def generate_word_cloud(request: TextRequest):
	wordcloud = WordCloud(width=800, height=800,max_font_size=40, min_font_size=10, background_color="white").generate(request.text)
	img_path = "wordcloud.png"
	wordcloud.to_file(img_path)
	return FileResponse(img_path, media_type="image/png", filename="wordcloud.png")


	# Gradio UI
	with gr.Blocks() as iface:
	gr.Markdown("File Selector")
	gr.Markdown("Choose a file and process it for summarization, entity recognition, and word cloud generation.")

	# File selection & process button
	with gr.Row():
	file_dropdown = gr.Dropdown(choices=list_files(), label=" Select a File", interactive=True)
	process_button = gr.Button(" Process")

	# First Row (Original Text & Summary)
	with gr.Row():
	full_doc_text = gr.Textbox(label=" Full Document")
	output_summary = gr.Textbox(label=" Summarized Text")

	# Second Row (Entities & Word Cloud)
	with gr.Row():
	output_entities = gr.JSON(label=" Entities")
	output_wordcloud = gr.Image(label=" Word Cloud")
	with gr.Row():
	generate_mindmap_button = gr.Button("Generate Mind Map")
	output_mindmap = gr.Image(label="Mind Map") # Use HTML instead of Textbox

	generate_mindmap_button.click(
	fn=generate_mermaid_mindmap,
	inputs=full_doc_text,
	outputs=output_mindmap
	)


	# # Mind Map Generation
	# with gr.Row():
	# generate_mindmap_button = gr.Button("Generate Mind Map")
	# output_mindmap = gr.Image(label="Mind Map")
	# Mind Map Generation Section
	# with gr.Row():
	# generate_mindmap_button = gr.Button("Generate Mind Map")
	# output_mindmap = gr.HTML(label="Mind Map")

	# Process selected file
	process_button.click(
	fn=process_file,
	inputs=file_dropdown,
	outputs=[full_doc_text, output_summary, output_entities, output_wordcloud]
	)

	# # Connect mind map button to function (MOVE THIS INSIDE `with gr.Blocks()`)
	# generate_mindmap_button.click(
	# fn=generate_mind_map,
	# inputs=full_doc_text, # Use the full document text
	# outputs=output_mindmap
	# )
	# generate_mindmap_button.click(
	# fn=generate_mermaid_mindmap,
	# inputs=full_doc_text,
	# outputs=output_mindmap
	# )
	# Launch Gradio app
	if __name__ == "__main__":
	iface.launch(server_name="0.0.0.0", server_port=7860, share=False, debug=True)