File size: 14,637 Bytes
40288c3
2a65456
40288c3
 
0fc0825
2a65456
40288c3
 
 
 
0fc0825
40288c3
 
 
 
 
0fc0825
40288c3
 
2a65456
40288c3
 
16f7329
40288c3
2a65456
40288c3
 
2a65456
40288c3
669c6aa
40288c3
 
 
 
 
 
 
 
2a65456
2412db3
40288c3
 
 
 
2412db3
358023a
40288c3
2a65456
 
fc33512
358023a
 
 
 
a5ee69f
40288c3
2a65456
 
 
 
40288c3
358023a
2a65456
40288c3
802da48
40288c3
2a65456
40288c3
 
2a65456
 
40288c3
 
 
 
 
8e8e631
 
40288c3
 
2a65456
 
40288c3
 
 
 
 
 
2a65456
 
 
 
a5ee69f
2247680
 
62f6318
 
358023a
 
2247680
358023a
 
40288c3
 
a5ee69f
 
 
40288c3
 
 
 
358023a
a5ee69f
358023a
a5ee69f
 
40288c3
 
2a65456
 
a5ee69f
 
 
 
 
 
 
2a65456
a5ee69f
 
358023a
a5ee69f
 
2a65456
40288c3
 
 
2a65456
612ea81
 
 
 
 
 
 
 
40288c3
 
2a65456
612ea81
 
 
 
 
 
 
 
2a65456
40288c3
9676032
53c2c48
5094360
40288c3
 
2a65456
40288c3
358023a
 
96add56
358023a
fe3716e
358023a
fc33512
 
92493a8
53c2c48
358023a
 
62f6318
2db9d56
2a65456
1d1002c
 
40288c3
62f6318
35fadc1
 
40288c3
2a65456
cf97558
 
62f6318
 
a5ee69f
96add56
2a65456
358023a
 
 
1e189c2
2a65456
231c167
1e189c2
358023a
40288c3
7d1ed6a
2a65456
a5ee69f
358023a
a5ee69f
96add56
612ea81
 
 
 
2a65456
612ea81
358023a
40288c3
2a65456
62f6318
 
 
40288c3
2a65456
612ea81
2a65456
612ea81
2a65456
358023a
2a65456
40288c3
 
2a65456
 
62f6318
 
358023a
40288c3
2a65456
 
62f6318
 
612ea81
2a65456
62f6318
 
 
 
 
40288c3
2a65456
 
62f6318
358023a
2a65456
40288c3
2a65456
 
 
 
 
40288c3
 
2a65456
 
 
 
 
 
 
 
 
40288c3
2a65456
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40288c3
 
2a65456
 
40288c3
2a65456
 
 
 
 
40288c3
 
2a65456
40288c3
 
 
2a65456
 
 
 
 
 
 
 
 
 
40288c3
2a65456
40288c3
 
2a65456
 
 
40288c3
 
2a65456
 
a5ee69f
358023a
 
2a65456
40288c3
96add56
7d1ed6a
62f6318
 
 
 
 
 
 
 
 
 
 
40288c3
358023a
 
2a65456
 
40288c3
 
fe3716e
 
358023a
 
40288c3
358023a
53c2c48
2e32e9e
358023a
2a65456
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
import json
import logging
import math
from datetime import datetime, timedelta

import gradio as gr
import pandas as pd
from deep_translator import GoogleTranslator
from gradio_calendar import Calendar

from gematria import calculate_gematria, strip_diacritics
from utils import (
    date_to_words,
    translate_date_to_words,
    process_json_files
)

# --- Constants ---
FORBIDDEN_NAMES_FILE = "c.txt"
DEFAULT_LANGUAGE = 'english'

logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)


# --- Helper Functions ---

def create_language_dropdown(label: str, default_value: str = DEFAULT_LANGUAGE, show_label: bool = True) -> gr.Dropdown:
    """Creates a Gradio dropdown menu for language selection.

    Args:
        label (str): The label for the dropdown.
        default_value (str, optional): The default language value. Defaults to 'en'.
        show_label (bool, optional): Whether to show the label. Defaults to True.

    Returns:
        gr.Dropdown: The Gradio dropdown component.
    """
    languages = GoogleTranslator().get_supported_languages(as_dict=True)
    return gr.Dropdown(
        choices=list(languages.keys()),
        label=label,
        value=default_value,
        show_label=show_label
    )


def calculate_gematria_sum(text: str, date_words: str) -> int:
    """Calculates the Gematria sum for a text and date words."""
    combined_input = f"{text} {date_words}"
    logger.info(f"Combined input for Gematria: {combined_input}")
    sum_value = calculate_gematria(strip_diacritics(combined_input))
    logger.info(f"Gematria sum: {sum_value}")
    return sum_value


def perform_els_search(start: int, end: int, step: int, rounds: int, length: int, tlang: str,
                       strip_spaces: bool, strip_in_braces: bool, strip_diacritics: bool, average_combine: bool,
                       search_word_yiddish: str, date_words: str) -> list:  # Accept Yiddish word
    """Performs the ELS search and filters by the Yiddish search word."""

    logger.info("Starting ELS search...")
    logger.debug(f"Search word (Yiddish): {search_word_yiddish}")  # No translation here

    if step == 0 or rounds == 0:
        logger.info("Cannot search with step 0 or rounds 0")
        return []

    results = process_json_files(start, end, step, rounds, length, tlang, strip_spaces,
                                 strip_in_braces, strip_diacritics, average_combine,
                                 translate_results=False)

    # Filter results by search word in els_result_text (Yiddish)
    filtered_results = []
    for result in results:
        logger.debug(f"Searching result: {result}")
        #if 'els_result_text' in result and search_word_yiddish in result['els_result_text']:
        if 1==1:
            filtered_results.append({
                'Date': date_words,
                'Book Result': result['els_result_text'],
                'Result': result.get('translated_text', '')
            })

    return filtered_results


def generate_json_dump(start: int, end: int, step: int, rounds: int, length: int, tlang: str,
                       strip_spaces: bool, strip_in_braces: bool, strip_diacritics_chk: bool,
                       search_phrase: str, results_df: pd.DataFrame, search_word: str,
                       start_date: datetime, end_date: datetime) -> str:
    """Generates the JSON dump with configuration, date range, and results."""
    config = {
        "Start Book": start,
        "End Book": end,
        "Step": step,
        "Rounds": rounds,
        "Length": length,
        "Target Language": tlang,
        "Strip Spaces": strip_spaces,
        "Strip Text in Braces": strip_in_braces,
        "Strip Diacritics": strip_diacritics_chk,
        "Search Phrase": search_phrase,
        "Search Word": search_word
    }
    result = {
        "Configuration": config,
        "DateRange": {
            "StartDate": start_date.strftime("%Y-%m-%d"),
            "EndDate": end_date.strftime("%Y-%m-%d")
        },
        "Results": json.loads(results_df.to_json(orient='records', force_ascii=False))
    }
    logger.info(f"Generated JSON dump: {result}")
    return json.dumps(result, indent=4, ensure_ascii=False)


def download_json_file(config_json: str, step: int, rounds: int,
                       strip_spaces: bool, strip_in_braces: bool, strip_diacritics_chk: bool) -> str:
    """Downloads the JSON config file with a descriptive name."""
    filename_suffix = ""
    if strip_spaces:
        filename_suffix += "-stSp"
    if strip_in_braces:
        filename_suffix += "-stBr"
    if strip_diacritics_chk:
        filename_suffix += "-stDc"
    file_path = f"step-{step}-rounds-{rounds}{filename_suffix}.json"
    with open(file_path, "w", encoding='utf-8') as file:
        file.write(config_json)
    logger.info(f"Downloaded JSON file to: {file_path}")
    return file_path


# --- Forbidden Names Functions ---

def load_forbidden_names(filename: str = FORBIDDEN_NAMES_FILE) -> list:
    """Loads forbidden names from the specified file."""
    try:
        with open(filename, "r", encoding='utf-8') as f:
            forbidden_names = [line.strip() for line in f]
        return forbidden_names
    except FileNotFoundError:
        print(f"Error: Forbidden names file '{filename}' not found.")
        return []


def check_name_similarity(name: str, forbidden_names: list, threshold: int = 80) -> bool:
    """Checks if a name is similar to any forbidden name."""
    from fuzzywuzzy import fuzz
    for forbidden_name in forbidden_names:
        similarity_ratio = fuzz.ratio(name.lower(), forbidden_name.lower())
        if similarity_ratio >= threshold:
            logging.info(f"Forbidden word {forbidden_name} detected in: {name}")
            return True
    return False


# --- Gradio UI ---

with gr.Blocks() as app:
    with gr.Row():
        start_date = Calendar(type="datetime", label="1. Select Start Date")
        end_date = Calendar(type="datetime", label="2. Select End Date")
        date_language_input = create_language_dropdown("3. Date Word Language", default_value=DEFAULT_LANGUAGE)
        search_word = gr.Textbox(label="4. Search Word")

    with gr.Row():
        gematria_text = gr.Textbox(label="5. Name and/or Topic", value="Hans Albert Einstein")
        gematria_btn = gr.Button("6. Calculate Journal Sum")

    gematria_result = gr.Number(label="Journal Sum")
    #TODO: journal sum is wrong, because "english" is added to it initially,
    #TODO: this only affects the interface field(s), not the result searching

    with gr.Row():
        start = gr.Number(label="Start Book", value=1)
        end = gr.Number(label="End Book", value=39)
        step = gr.Number(label="Jump Width (Steps) for ELS")
        rounds = gr.Number(label="Rounds through Books", value=1)
        float_step = gr.Number(visible=False, value=1)
        half_step_btn = gr.Button("Steps / 2")
        double_step_btn = gr.Button("Steps * 2")

        with gr.Column():
            round_x = gr.Number(label="Round (x)", value=1)
            round_y = gr.Number(label="Round (y)", value=-1)

        average_combine_chk = gr.Checkbox(label="Average-Combine Combined Rounds", value=False)
        mirror_book_numbers = gr.Checkbox(label="Mirror book numbers for negative rounds (axis=book 20)", value=False)

        rounds_combination = gr.Textbox(label="Combined Rounds", value="1,-1")

    with gr.Row():
        length = gr.Number(label="Result Length (0=inf)", value=0)
        tlang = create_language_dropdown("Target Language for Translation", default_value=DEFAULT_LANGUAGE)
        strip_spaces = gr.Checkbox(label="Strip Spaces from Books", value=True)
        strip_in_braces = gr.Checkbox(label="Strip Text in Braces from Books", value=True)
        strip_diacritics_chk = gr.Checkbox(label="Strip Diacritics from Books", value=True)
        acknowledgment_chk = gr.Checkbox(
            label="The User hereby accepts that the User will not harm or stalk anyone with this information, or bet on any of this information, in any regards.",
            value=True
        )

    translate_btn = gr.Button("7. Search with ELS")

    results_output = gr.Dataframe(headers=['Date', 'Book Result', 'Result'], label="Results")
    json_output = gr.Textbox(label="JSON Configuration Output")
    json_download_btn = gr.Button("Prepare .json for Download")
    json_file = gr.File(label="Download Config JSON", file_count="single")

    # --- Load Forbidden Names ---

    forbidden_names = load_forbidden_names()


    # --- Event Handlers ---

    def update_rounds_combination(round_x: int, round_y: int) -> str:
        """Updates the rounds_combination textbox based on round_x and round_y."""
        return f"{int(round_x)},{int(round_y)}"


    def calculate_journal_sum(text: str, date_words: str) -> tuple:
        """Calculates the journal sum and updates the step value."""
        if check_name_similarity(text, forbidden_names):
            return 0, 0, 0
        if check_name_similarity(date_words, forbidden_names):
            return 0, 0, 0
        sum_value = calculate_gematria_sum(text, date_words)
        return sum_value, sum_value, sum_value


    def update_step_half(float_step: float) -> tuple:
        """Updates the step value to half."""
        new_step = math.ceil(float_step / 2)
        return new_step, float_step / 2


    def update_step_double(float_step: float) -> tuple:
        """Updates the step value to double."""
        new_step = math.ceil(float_step * 2)
        return new_step, float_step * 2


    # Update rounds_combination when round_x or round_y changes
    round_x.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination)
    round_y.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination)


    def handle_json_download(config_json: str, step: int, rounds: int, strip_spaces: bool,
                             strip_in_braces: bool, strip_diacritics_chk: bool) -> str:
        """Handles the download of the JSON config file."""
        return download_json_file(config_json, step, rounds, strip_spaces, strip_in_braces, strip_diacritics_chk)


    def perform_search_and_create_json(start_date: datetime, end_date: datetime, date_language_input: str,
                                       search_word: str, start: int, end: int, step: int, rounds: int, length: int,
                                       tlang: str, strip_spaces: bool, strip_in_braces: bool,
                                       strip_diacritics_chk: bool,
                                       gematria_text: str, average_combine: bool) -> tuple:
        """Performs the ELS search for each date in the range, creates the JSON config, and displays the results."""
        all_results = []
        delta = timedelta(days=1)
        original_start_date = start_date
        total_steps = 0

        # Translate the search word to Yiddish ONLY ONCE (outside the loop)
        translator_yi = GoogleTranslator(source='auto', target='yi')
        search_word_yiddish = translator_yi.translate(search_word)

        seen_dates = set()  # Keep track of processed dates

        while start_date <= end_date:
            date_words_output = date_to_words(start_date.strftime("%Y-%m-%d"))

            # Only translate if the date language is not English
            if date_language_input.lower() != DEFAULT_LANGUAGE:
                date_words_output = translate_date_to_words(start_date, date_language_input)

            # Skip if date has already been processed
            if date_words_output in seen_dates:
                start_date += delta
                continue
            seen_dates.add(date_words_output)

            journal_sum, _, _ = calculate_journal_sum(gematria_text, date_words_output)
            step = journal_sum
            total_steps += step

            filtered_results = perform_els_search(start, end, step, rounds, length, tlang, strip_spaces,
                                                  strip_in_braces, strip_diacritics_chk, average_combine,
                                                  search_word_yiddish,  # Pass the translated Yiddish word
                                                  date_words_output)

            # Only add the first result for each date
            if filtered_results:
                all_results.append(filtered_results[0])

            start_date += delta

        # Process results after the loop completes
        if all_results:
            df = pd.DataFrame(all_results)

            # Deduplicate steps
            seen_steps = set()
            deduplicated_results = []
            for result in all_results:
                step_key = (result['Date'], result['Book Result'])
                if step_key not in seen_steps:
                    deduplicated_results.append(result)
                    seen_steps.add(step_key)
            df = pd.DataFrame(deduplicated_results)

            # Translate the 'Book Result' column to the target language
            translator = GoogleTranslator(source='yi', target=tlang)
            df['Result'] = df['Book Result'].apply(translator.translate)

            config_json = generate_json_dump(start, end, total_steps, rounds, length, tlang, strip_spaces,
                                             strip_in_braces, strip_diacritics_chk, gematria_text, df, search_word,
                                             original_start_date, end_date)
            return config_json, df
        else:
            return "No results found.", None


    gematria_btn.click(
        calculate_journal_sum,
        inputs=[gematria_text, date_language_input],
        outputs=[gematria_result, step, float_step]
    )

    half_step_btn.click(
        update_step_half,
        inputs=[float_step],
        outputs=[step, float_step]
    )

    double_step_btn.click(
        update_step_double,
        inputs=[float_step],
        outputs=[step, float_step]
    )

    translate_btn.click(
        perform_search_and_create_json,
        inputs=[start_date, end_date, date_language_input, search_word, start, end, step, rounds_combination, length,
                tlang, strip_spaces,
                strip_in_braces, strip_diacritics_chk, gematria_text, average_combine_chk],
        outputs=[json_output, results_output]
    )

    json_download_btn.click(
        handle_json_download,
        inputs=[json_output, step, rounds, strip_spaces, strip_in_braces, strip_diacritics_chk],
        outputs=[json_file]
    )

if __name__ == "__main__":
    app.launch(share=False)