Spaces:
Runtime error
Runtime error
import json | |
import logging | |
import math | |
from datetime import datetime, timedelta | |
import gradio as gr | |
import pandas as pd | |
from deep_translator import GoogleTranslator | |
from gradio_calendar import Calendar | |
from gematria import calculate_gematria, strip_diacritics | |
from utils import ( | |
date_to_words, | |
translate_date_to_words, | |
process_json_files | |
) | |
# --- Constants --- | |
FORBIDDEN_NAMES_FILE = "c.txt" | |
DEFAULT_LANGUAGE = 'english' | |
logger = logging.getLogger(__name__) | |
logging.basicConfig(level=logging.INFO) | |
# --- Helper Functions --- | |
def create_language_dropdown(label: str, default_value: str = DEFAULT_LANGUAGE, show_label: bool = True) -> gr.Dropdown: | |
"""Creates a Gradio dropdown menu for language selection. | |
Args: | |
label (str): The label for the dropdown. | |
default_value (str, optional): The default language value. Defaults to 'en'. | |
show_label (bool, optional): Whether to show the label. Defaults to True. | |
Returns: | |
gr.Dropdown: The Gradio dropdown component. | |
""" | |
languages = GoogleTranslator().get_supported_languages(as_dict=True) | |
return gr.Dropdown( | |
choices=list(languages.keys()), | |
label=label, | |
value=default_value, | |
show_label=show_label | |
) | |
def calculate_gematria_sum(text: str, date_words: str) -> int: | |
"""Calculates the Gematria sum for a text and date words.""" | |
combined_input = f"{text} {date_words}" | |
logger.info(f"Combined input for Gematria: {combined_input}") | |
sum_value = calculate_gematria(strip_diacritics(combined_input)) | |
logger.info(f"Gematria sum: {sum_value}") | |
return sum_value | |
def perform_els_search(start: int, end: int, step: int, rounds: int, length: int, tlang: str, | |
strip_spaces: bool, strip_in_braces: bool, strip_diacritics: bool, average_combine: bool, | |
search_word_yiddish: str, date_words: str) -> list: # Accept Yiddish word | |
"""Performs the ELS search and filters by the Yiddish search word.""" | |
logger.info("Starting ELS search...") | |
logger.debug(f"Search word (Yiddish): {search_word_yiddish}") # No translation here | |
if step == 0 or rounds == 0: | |
logger.info("Cannot search with step 0 or rounds 0") | |
return [] | |
results = process_json_files(start, end, step, rounds, length, tlang, strip_spaces, | |
strip_in_braces, strip_diacritics, average_combine, | |
translate_results=False) | |
# Filter results by search word in els_result_text (Yiddish) | |
filtered_results = [] | |
for result in results: | |
logger.debug(f"Searching result: {result}") | |
#if 'els_result_text' in result and search_word_yiddish in result['els_result_text']: | |
if 1==1: | |
filtered_results.append({ | |
'Date': date_words, | |
'Book Result': result['els_result_text'], | |
'Result': result.get('translated_text', '') | |
}) | |
return filtered_results | |
def generate_json_dump(start: int, end: int, step: int, rounds: int, length: int, tlang: str, | |
strip_spaces: bool, strip_in_braces: bool, strip_diacritics_chk: bool, | |
search_phrase: str, results_df: pd.DataFrame, search_word: str, | |
start_date: datetime, end_date: datetime) -> str: | |
"""Generates the JSON dump with configuration, date range, and results.""" | |
config = { | |
"Start Book": start, | |
"End Book": end, | |
"Step": step, | |
"Rounds": rounds, | |
"Length": length, | |
"Target Language": tlang, | |
"Strip Spaces": strip_spaces, | |
"Strip Text in Braces": strip_in_braces, | |
"Strip Diacritics": strip_diacritics_chk, | |
"Search Phrase": search_phrase, | |
"Search Word": search_word | |
} | |
result = { | |
"Configuration": config, | |
"DateRange": { | |
"StartDate": start_date.strftime("%Y-%m-%d"), | |
"EndDate": end_date.strftime("%Y-%m-%d") | |
}, | |
"Results": json.loads(results_df.to_json(orient='records', force_ascii=False)) | |
} | |
logger.info(f"Generated JSON dump: {result}") | |
return json.dumps(result, indent=4, ensure_ascii=False) | |
def download_json_file(config_json: str, step: int, rounds: int, | |
strip_spaces: bool, strip_in_braces: bool, strip_diacritics_chk: bool) -> str: | |
"""Downloads the JSON config file with a descriptive name.""" | |
filename_suffix = "" | |
if strip_spaces: | |
filename_suffix += "-stSp" | |
if strip_in_braces: | |
filename_suffix += "-stBr" | |
if strip_diacritics_chk: | |
filename_suffix += "-stDc" | |
file_path = f"step-{step}-rounds-{rounds}{filename_suffix}.json" | |
with open(file_path, "w", encoding='utf-8') as file: | |
file.write(config_json) | |
logger.info(f"Downloaded JSON file to: {file_path}") | |
return file_path | |
# --- Forbidden Names Functions --- | |
def load_forbidden_names(filename: str = FORBIDDEN_NAMES_FILE) -> list: | |
"""Loads forbidden names from the specified file.""" | |
try: | |
with open(filename, "r", encoding='utf-8') as f: | |
forbidden_names = [line.strip() for line in f] | |
return forbidden_names | |
except FileNotFoundError: | |
print(f"Error: Forbidden names file '{filename}' not found.") | |
return [] | |
def check_name_similarity(name: str, forbidden_names: list, threshold: int = 80) -> bool: | |
"""Checks if a name is similar to any forbidden name.""" | |
from fuzzywuzzy import fuzz | |
for forbidden_name in forbidden_names: | |
similarity_ratio = fuzz.ratio(name.lower(), forbidden_name.lower()) | |
if similarity_ratio >= threshold: | |
logging.info(f"Forbidden word {forbidden_name} detected in: {name}") | |
return True | |
return False | |
# --- Gradio UI --- | |
with gr.Blocks() as app: | |
with gr.Row(): | |
start_date = Calendar(type="datetime", label="1. Select Start Date") | |
end_date = Calendar(type="datetime", label="2. Select End Date") | |
date_language_input = create_language_dropdown("3. Date Word Language", default_value=DEFAULT_LANGUAGE) | |
search_word = gr.Textbox(label="4. Search Word") | |
with gr.Row(): | |
gematria_text = gr.Textbox(label="5. Name and/or Topic", value="Hans Albert Einstein") | |
gematria_btn = gr.Button("6. Calculate Journal Sum") | |
gematria_result = gr.Number(label="Journal Sum") | |
#TODO: journal sum is wrong, because "english" is added to it initially, | |
#TODO: this only affects the interface field(s), not the result searching | |
with gr.Row(): | |
start = gr.Number(label="Start Book", value=1) | |
end = gr.Number(label="End Book", value=39) | |
step = gr.Number(label="Jump Width (Steps) for ELS") | |
rounds = gr.Number(label="Rounds through Books", value=1) | |
float_step = gr.Number(visible=False, value=1) | |
half_step_btn = gr.Button("Steps / 2") | |
double_step_btn = gr.Button("Steps * 2") | |
with gr.Column(): | |
round_x = gr.Number(label="Round (x)", value=1) | |
round_y = gr.Number(label="Round (y)", value=-1) | |
average_combine_chk = gr.Checkbox(label="Average-Combine Combined Rounds", value=False) | |
mirror_book_numbers = gr.Checkbox(label="Mirror book numbers for negative rounds (axis=book 20)", value=False) | |
rounds_combination = gr.Textbox(label="Combined Rounds", value="1,-1") | |
with gr.Row(): | |
length = gr.Number(label="Result Length (0=inf)", value=0) | |
tlang = create_language_dropdown("Target Language for Translation", default_value=DEFAULT_LANGUAGE) | |
strip_spaces = gr.Checkbox(label="Strip Spaces from Books", value=True) | |
strip_in_braces = gr.Checkbox(label="Strip Text in Braces from Books", value=True) | |
strip_diacritics_chk = gr.Checkbox(label="Strip Diacritics from Books", value=True) | |
acknowledgment_chk = gr.Checkbox( | |
label="The User hereby accepts that the User will not harm or stalk anyone with this information, or bet on any of this information, in any regards.", | |
value=True | |
) | |
translate_btn = gr.Button("7. Search with ELS") | |
results_output = gr.Dataframe(headers=['Date', 'Book Result', 'Result'], label="Results") | |
json_output = gr.Textbox(label="JSON Configuration Output") | |
json_download_btn = gr.Button("Prepare .json for Download") | |
json_file = gr.File(label="Download Config JSON", file_count="single") | |
# --- Load Forbidden Names --- | |
forbidden_names = load_forbidden_names() | |
# --- Event Handlers --- | |
def update_rounds_combination(round_x: int, round_y: int) -> str: | |
"""Updates the rounds_combination textbox based on round_x and round_y.""" | |
return f"{int(round_x)},{int(round_y)}" | |
def calculate_journal_sum(text: str, date_words: str) -> tuple: | |
"""Calculates the journal sum and updates the step value.""" | |
if check_name_similarity(text, forbidden_names): | |
return 0, 0, 0 | |
if check_name_similarity(date_words, forbidden_names): | |
return 0, 0, 0 | |
sum_value = calculate_gematria_sum(text, date_words) | |
return sum_value, sum_value, sum_value | |
def update_step_half(float_step: float) -> tuple: | |
"""Updates the step value to half.""" | |
new_step = math.ceil(float_step / 2) | |
return new_step, float_step / 2 | |
def update_step_double(float_step: float) -> tuple: | |
"""Updates the step value to double.""" | |
new_step = math.ceil(float_step * 2) | |
return new_step, float_step * 2 | |
# Update rounds_combination when round_x or round_y changes | |
round_x.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination) | |
round_y.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination) | |
def handle_json_download(config_json: str, step: int, rounds: int, strip_spaces: bool, | |
strip_in_braces: bool, strip_diacritics_chk: bool) -> str: | |
"""Handles the download of the JSON config file.""" | |
return download_json_file(config_json, step, rounds, strip_spaces, strip_in_braces, strip_diacritics_chk) | |
def perform_search_and_create_json(start_date: datetime, end_date: datetime, date_language_input: str, | |
search_word: str, start: int, end: int, step: int, rounds: int, length: int, | |
tlang: str, strip_spaces: bool, strip_in_braces: bool, | |
strip_diacritics_chk: bool, | |
gematria_text: str, average_combine: bool) -> tuple: | |
"""Performs the ELS search for each date in the range, creates the JSON config, and displays the results.""" | |
all_results = [] | |
delta = timedelta(days=1) | |
original_start_date = start_date | |
total_steps = 0 | |
# Translate the search word to Yiddish ONLY ONCE (outside the loop) | |
translator_yi = GoogleTranslator(source='auto', target='yi') | |
search_word_yiddish = translator_yi.translate(search_word) | |
seen_dates = set() # Keep track of processed dates | |
while start_date <= end_date: | |
date_words_output = date_to_words(start_date.strftime("%Y-%m-%d")) | |
# Only translate if the date language is not English | |
if date_language_input.lower() != DEFAULT_LANGUAGE: | |
date_words_output = translate_date_to_words(start_date, date_language_input) | |
# Skip if date has already been processed | |
if date_words_output in seen_dates: | |
start_date += delta | |
continue | |
seen_dates.add(date_words_output) | |
journal_sum, _, _ = calculate_journal_sum(gematria_text, date_words_output) | |
step = journal_sum | |
total_steps += step | |
filtered_results = perform_els_search(start, end, step, rounds, length, tlang, strip_spaces, | |
strip_in_braces, strip_diacritics_chk, average_combine, | |
search_word_yiddish, # Pass the translated Yiddish word | |
date_words_output) | |
# Only add the first result for each date | |
if filtered_results: | |
all_results.append(filtered_results[0]) | |
start_date += delta | |
# Process results after the loop completes | |
if all_results: | |
df = pd.DataFrame(all_results) | |
# Deduplicate steps | |
seen_steps = set() | |
deduplicated_results = [] | |
for result in all_results: | |
step_key = (result['Date'], result['Book Result']) | |
if step_key not in seen_steps: | |
deduplicated_results.append(result) | |
seen_steps.add(step_key) | |
df = pd.DataFrame(deduplicated_results) | |
# Translate the 'Book Result' column to the target language | |
translator = GoogleTranslator(source='yi', target=tlang) | |
df['Result'] = df['Book Result'].apply(translator.translate) | |
config_json = generate_json_dump(start, end, total_steps, rounds, length, tlang, strip_spaces, | |
strip_in_braces, strip_diacritics_chk, gematria_text, df, search_word, | |
original_start_date, end_date) | |
return config_json, df | |
else: | |
return "No results found.", None | |
gematria_btn.click( | |
calculate_journal_sum, | |
inputs=[gematria_text, date_language_input], | |
outputs=[gematria_result, step, float_step] | |
) | |
half_step_btn.click( | |
update_step_half, | |
inputs=[float_step], | |
outputs=[step, float_step] | |
) | |
double_step_btn.click( | |
update_step_double, | |
inputs=[float_step], | |
outputs=[step, float_step] | |
) | |
translate_btn.click( | |
perform_search_and_create_json, | |
inputs=[start_date, end_date, date_language_input, search_word, start, end, step, rounds_combination, length, | |
tlang, strip_spaces, | |
strip_in_braces, strip_diacritics_chk, gematria_text, average_combine_chk], | |
outputs=[json_output, results_output] | |
) | |
json_download_btn.click( | |
handle_json_download, | |
inputs=[json_output, step, rounds, strip_spaces, strip_in_braces, strip_diacritics_chk], | |
outputs=[json_file] | |
) | |
if __name__ == "__main__": | |
app.launch(share=False) | |