import os | |
from entity_recognition import extract_entities | |
from pydantic import BaseModel | |
from wordcloud import WordCloud | |
# Define paths | |
TEXT_FOLDER = "jfk_text" | |
SUMMARY_FOLDER = "summaryoutput" | |
MINDMAP_FOLDER = "mindmap_output" | |
WORDCLOUD_FOLDER = "wordcloud_output" | |
# Request model | |
class TextRequest(BaseModel): | |
text: str | |
def list_files(): | |
"""List all Markdown (.md) files in the 'jfk_text' folder.""" | |
if os.path.exists(TEXT_FOLDER): | |
return [f for f in os.listdir(TEXT_FOLDER) if f.endswith(".md")] | |
return [] | |
def read_file(file_path): | |
"""Read the content of a given file.""" | |
with open(file_path, "r", encoding="utf-8") as file: | |
return file.read() | |
def get_summary(file_name): | |
"""Get the summary of a file if it exists.""" | |
summary_file = f"summary_{file_name}" | |
summary_path = os.path.join(SUMMARY_FOLDER, summary_file) | |
if os.path.exists(summary_path): | |
return read_file(summary_path) | |
return "Summary not found." | |
def process_file(file_name): | |
try: | |
# 1. Validate input and paths | |
if not file_name or not os.path.exists(os.path.join(TEXT_FOLDER, file_name)): | |
raise FileNotFoundError("Invalid file selection") | |
# 2. Read file | |
text = read_file(os.path.join(TEXT_FOLDER, file_name)) | |
# 3. Generate outputs | |
wordcloud_path = os.path.join(WORDCLOUD_FOLDER, f"wordcloud_{file_name}.png") | |
os.makedirs(WORDCLOUD_FOLDER, exist_ok=True) | |
# 4. Create visualizations | |
wc = WordCloud(width=800, height=400, background_color="white").generate(text) | |
wc.to_file(wordcloud_path) | |
return ( | |
text, | |
get_summary(file_name), | |
{"entities": extract_entities(text)}, | |
wordcloud_path, | |
) | |
except Exception as e: | |
error_msg = f"Error: {str(e)}" | |
return error_msg, error_msg, {"entities": {}}, None, f"<div>{error_msg}</div>" | |
# def process_file(file_name): | |
# try: | |
# # 1. Validate input and paths | |
# if not file_name or not os.path.exists(os.path.join(TEXT_FOLDER, file_name)): | |
# raise FileNotFoundError("Invalid file selection") | |
# # 2. Read file | |
# text = read_file(os.path.join(TEXT_FOLDER, file_name)) | |
# # 3. Generate outputs | |
# wordcloud_path = os.path.join(WORDCLOUD_FOLDER, f"wordcloud_{file_name}.png") | |
# os.makedirs(WORDCLOUD_FOLDER, exist_ok=True) | |
# # 4. Create visualizations | |
# wc = WordCloud(width=800, height=400, background_color="white").generate(text) | |
# wc.to_file(wordcloud_path) | |
# # 5. Generate mind map HTML | |
# mindmap_html = generate_mind_map(text) | |
# return ( | |
# text, | |
# get_summary(file_name), | |
# {"entities": extract_entities(text)}, | |
# wordcloud_path, # Word Cloud image path | |
# mindmap_html # Mind Map HTML content | |
# ) | |
# except Exception as e: | |
# error_msg = f"Error: {str(e)}" | |
# return error_msg, error_msg, {"entities": {}}, None, f"<div>{error_msg}</div>" | |
# # def process_file(file_name): | |
# """Process file and return all outputs including mind map.""" | |
# try: | |
# if not file_name: # Check if file_name is empty | |
# raise ValueError("No file selected") | |
# file_path = os.path.join(TEXT_FOLDER, file_name) | |
# if not os.path.exists(file_path): | |
# raise FileNotFoundError(f"File {file_name} not found in {TEXT_FOLDER}") | |
# text = read_file(file_path) | |
# return ( | |
# text, # Full text | |
# get_summary(file_name), # Summary | |
# {"entities": extract_entities(text)}, # Entities | |
# generate_word_cloud(text, os.path.join(WORDCLOUD_FOLDER, f"wordcloud_{file_name}.png")), # Word Cloud | |
# generate_mind_map(text) # Mind Map (returns HTML) | |
# ) | |
# except Exception as e: | |
# error_msg = f"Error: {str(e)}" | |
# return error_msg, error_msg, {"entities": {}}, None, "<div>Error generating visualization</div>" | |
# return summary, entities, wordcloud_path | |
# from entity_recognition import extract_entities | |
# from wordcloud import WordCloud | |
# from summarization import summarizer | |
# def process_file(filename): | |
# file_path = f"your_data_folder/{filename}" # Update this to the correct file path | |
# try: | |
# with open(file_path, "r", encoding="utf-8") as f: | |
# text = f.read() | |
# # Summarize the text | |
# chunks = [text[i:i+500] for i in range(0, len(text), 500)] | |
# summaries = [] | |
# for chunk in chunks: | |
# summary = summarizer(chunk, max_length=130, min_length=30, do_sample=False, truncation=True) | |
# summaries.append(summary[0]['summary_text']) | |
# # Extract entities | |
# entities = extract_entities(text) | |
# # Generate word cloud | |
# wordcloud = WordCloud(width=800, height=600, max_font_size=40, min_font_size=10, background_color="white").generate(text) | |
# img_path = f"wordcloud_output/wordcloud_{filename}.png" # Ensure the path is valid | |
# wordcloud.to_file(img_path) | |
# return text, " ".join(summaries), entities, img_path # ✅ Returning exactly 4 values | |
# except Exception as e: | |
# return f"Error processing file: {str(e)}", "", {}, "" |