File size: 5,486 Bytes
2339301
 
 
4418e3c
2339301
 
 
4418e3c
 
2339301
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4418e3c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2339301
4418e3c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2339301
4418e3c
2339301
4418e3c
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import os
from entity_recognition import extract_entities
from pydantic import BaseModel
from wordcloud import WordCloud
# Define paths
TEXT_FOLDER = "jfk_text"
SUMMARY_FOLDER = "summaryoutput"
MINDMAP_FOLDER = "mindmap_output"
WORDCLOUD_FOLDER = "wordcloud_output"

# Request model
class TextRequest(BaseModel):
    text: str

def list_files():
    """List all Markdown (.md) files in the 'jfk_text' folder."""
    if os.path.exists(TEXT_FOLDER):
        return [f for f in os.listdir(TEXT_FOLDER) if f.endswith(".md")]
    return []

def read_file(file_path):
    """Read the content of a given file."""
    with open(file_path, "r", encoding="utf-8") as file:
        return file.read()

def get_summary(file_name):
    """Get the summary of a file if it exists."""
    summary_file = f"summary_{file_name}"
    summary_path = os.path.join(SUMMARY_FOLDER, summary_file)
    
    if os.path.exists(summary_path):
        return read_file(summary_path)
    return "Summary not found."

def process_file(file_name):
    try:
        # 1. Validate input and paths
        if not file_name or not os.path.exists(os.path.join(TEXT_FOLDER, file_name)):
            raise FileNotFoundError("Invalid file selection")
        
        # 2. Read file
        text = read_file(os.path.join(TEXT_FOLDER, file_name))
        
        # 3. Generate outputs
        wordcloud_path = os.path.join(WORDCLOUD_FOLDER, f"wordcloud_{file_name}.png")
        os.makedirs(WORDCLOUD_FOLDER, exist_ok=True)
        
        # 4. Create visualizations
        wc = WordCloud(width=800, height=400, background_color="white").generate(text)
        wc.to_file(wordcloud_path)
        
    
        
        return (
            text,
            get_summary(file_name),
            {"entities": extract_entities(text)},
            wordcloud_path,
            
        )
    except Exception as e:
        error_msg = f"Error: {str(e)}"
        return error_msg, error_msg, {"entities": {}}, None, f"<div>{error_msg}</div>"


# def process_file(file_name):
#     try:
#         # 1. Validate input and paths
#         if not file_name or not os.path.exists(os.path.join(TEXT_FOLDER, file_name)):
#             raise FileNotFoundError("Invalid file selection")
        
#         # 2. Read file
#         text = read_file(os.path.join(TEXT_FOLDER, file_name))
        
#         # 3. Generate outputs
#         wordcloud_path = os.path.join(WORDCLOUD_FOLDER, f"wordcloud_{file_name}.png")
#         os.makedirs(WORDCLOUD_FOLDER, exist_ok=True)
        
#         # 4. Create visualizations
#         wc = WordCloud(width=800, height=400, background_color="white").generate(text)
#         wc.to_file(wordcloud_path)
        
#         # 5. Generate mind map HTML
#         mindmap_html = generate_mind_map(text)
        
#         return (
#             text,
#             get_summary(file_name),
#             {"entities": extract_entities(text)},
#             wordcloud_path,  # Word Cloud image path
#             mindmap_html     # Mind Map HTML content
#         )
#     except Exception as e:
#         error_msg = f"Error: {str(e)}"
#         return error_msg, error_msg, {"entities": {}}, None, f"<div>{error_msg}</div>"    
# # def process_file(file_name):
#     """Process file and return all outputs including mind map."""
#     try:
#         if not file_name:  # Check if file_name is empty
#             raise ValueError("No file selected")
        
#         file_path = os.path.join(TEXT_FOLDER, file_name)
#         if not os.path.exists(file_path):
#             raise FileNotFoundError(f"File {file_name} not found in {TEXT_FOLDER}")
        
#         text = read_file(file_path)
#         return (
#             text,  # Full text
#             get_summary(file_name),  # Summary
#             {"entities": extract_entities(text)},  # Entities
#             generate_word_cloud(text, os.path.join(WORDCLOUD_FOLDER, f"wordcloud_{file_name}.png")),  # Word Cloud
#             generate_mind_map(text)  # Mind Map (returns HTML)
#         )
#     except Exception as e:
#         error_msg = f"Error: {str(e)}"
#         return error_msg, error_msg, {"entities": {}}, None, "<div>Error generating visualization</div>"
#     return summary, entities, wordcloud_path
# from entity_recognition import extract_entities
# from wordcloud import WordCloud
# from summarization import summarizer
# def process_file(filename):
#     file_path = f"your_data_folder/{filename}"  # Update this to the correct file path
#     try:
#         with open(file_path, "r", encoding="utf-8") as f:
#             text = f.read()

#         # Summarize the text
#         chunks = [text[i:i+500] for i in range(0, len(text), 500)]
#         summaries = []
#         for chunk in chunks:
#             summary = summarizer(chunk, max_length=130, min_length=30, do_sample=False, truncation=True)
#             summaries.append(summary[0]['summary_text'])

#         # Extract entities
#         entities = extract_entities(text)

#         # Generate word cloud
#         wordcloud = WordCloud(width=800, height=600, max_font_size=40, min_font_size=10, background_color="white").generate(text)
#         img_path = f"wordcloud_output/wordcloud_{filename}.png"  # Ensure the path is valid
#         wordcloud.to_file(img_path)

#         return text, " ".join(summaries), entities, img_path  # ✅ Returning exactly 4 values

#     except Exception as e:
#         return f"Error processing file: {str(e)}", "", {}, ""