neuralworm's picture
Update app.py
8e8e631 verified
import json
import logging
import math
from datetime import datetime, timedelta
import gradio as gr
import pandas as pd
from deep_translator import GoogleTranslator
from gradio_calendar import Calendar
from gematria import calculate_gematria, strip_diacritics
from utils import (
date_to_words,
translate_date_to_words,
process_json_files
)
# --- Constants ---
FORBIDDEN_NAMES_FILE = "c.txt"
DEFAULT_LANGUAGE = 'english'
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
# --- Helper Functions ---
def create_language_dropdown(label: str, default_value: str = DEFAULT_LANGUAGE, show_label: bool = True) -> gr.Dropdown:
"""Creates a Gradio dropdown menu for language selection.
Args:
label (str): The label for the dropdown.
default_value (str, optional): The default language value. Defaults to 'en'.
show_label (bool, optional): Whether to show the label. Defaults to True.
Returns:
gr.Dropdown: The Gradio dropdown component.
"""
languages = GoogleTranslator().get_supported_languages(as_dict=True)
return gr.Dropdown(
choices=list(languages.keys()),
label=label,
value=default_value,
show_label=show_label
)
def calculate_gematria_sum(text: str, date_words: str) -> int:
"""Calculates the Gematria sum for a text and date words."""
combined_input = f"{text} {date_words}"
logger.info(f"Combined input for Gematria: {combined_input}")
sum_value = calculate_gematria(strip_diacritics(combined_input))
logger.info(f"Gematria sum: {sum_value}")
return sum_value
def perform_els_search(start: int, end: int, step: int, rounds: int, length: int, tlang: str,
strip_spaces: bool, strip_in_braces: bool, strip_diacritics: bool, average_combine: bool,
search_word_yiddish: str, date_words: str) -> list: # Accept Yiddish word
"""Performs the ELS search and filters by the Yiddish search word."""
logger.info("Starting ELS search...")
logger.debug(f"Search word (Yiddish): {search_word_yiddish}") # No translation here
if step == 0 or rounds == 0:
logger.info("Cannot search with step 0 or rounds 0")
return []
results = process_json_files(start, end, step, rounds, length, tlang, strip_spaces,
strip_in_braces, strip_diacritics, average_combine,
translate_results=False)
# Filter results by search word in els_result_text (Yiddish)
filtered_results = []
for result in results:
logger.debug(f"Searching result: {result}")
#if 'els_result_text' in result and search_word_yiddish in result['els_result_text']:
if 1==1:
filtered_results.append({
'Date': date_words,
'Book Result': result['els_result_text'],
'Result': result.get('translated_text', '')
})
return filtered_results
def generate_json_dump(start: int, end: int, step: int, rounds: int, length: int, tlang: str,
strip_spaces: bool, strip_in_braces: bool, strip_diacritics_chk: bool,
search_phrase: str, results_df: pd.DataFrame, search_word: str,
start_date: datetime, end_date: datetime) -> str:
"""Generates the JSON dump with configuration, date range, and results."""
config = {
"Start Book": start,
"End Book": end,
"Step": step,
"Rounds": rounds,
"Length": length,
"Target Language": tlang,
"Strip Spaces": strip_spaces,
"Strip Text in Braces": strip_in_braces,
"Strip Diacritics": strip_diacritics_chk,
"Search Phrase": search_phrase,
"Search Word": search_word
}
result = {
"Configuration": config,
"DateRange": {
"StartDate": start_date.strftime("%Y-%m-%d"),
"EndDate": end_date.strftime("%Y-%m-%d")
},
"Results": json.loads(results_df.to_json(orient='records', force_ascii=False))
}
logger.info(f"Generated JSON dump: {result}")
return json.dumps(result, indent=4, ensure_ascii=False)
def download_json_file(config_json: str, step: int, rounds: int,
strip_spaces: bool, strip_in_braces: bool, strip_diacritics_chk: bool) -> str:
"""Downloads the JSON config file with a descriptive name."""
filename_suffix = ""
if strip_spaces:
filename_suffix += "-stSp"
if strip_in_braces:
filename_suffix += "-stBr"
if strip_diacritics_chk:
filename_suffix += "-stDc"
file_path = f"step-{step}-rounds-{rounds}{filename_suffix}.json"
with open(file_path, "w", encoding='utf-8') as file:
file.write(config_json)
logger.info(f"Downloaded JSON file to: {file_path}")
return file_path
# --- Forbidden Names Functions ---
def load_forbidden_names(filename: str = FORBIDDEN_NAMES_FILE) -> list:
"""Loads forbidden names from the specified file."""
try:
with open(filename, "r", encoding='utf-8') as f:
forbidden_names = [line.strip() for line in f]
return forbidden_names
except FileNotFoundError:
print(f"Error: Forbidden names file '{filename}' not found.")
return []
def check_name_similarity(name: str, forbidden_names: list, threshold: int = 80) -> bool:
"""Checks if a name is similar to any forbidden name."""
from fuzzywuzzy import fuzz
for forbidden_name in forbidden_names:
similarity_ratio = fuzz.ratio(name.lower(), forbidden_name.lower())
if similarity_ratio >= threshold:
logging.info(f"Forbidden word {forbidden_name} detected in: {name}")
return True
return False
# --- Gradio UI ---
with gr.Blocks() as app:
with gr.Row():
start_date = Calendar(type="datetime", label="1. Select Start Date")
end_date = Calendar(type="datetime", label="2. Select End Date")
date_language_input = create_language_dropdown("3. Date Word Language", default_value=DEFAULT_LANGUAGE)
search_word = gr.Textbox(label="4. Search Word")
with gr.Row():
gematria_text = gr.Textbox(label="5. Name and/or Topic", value="Hans Albert Einstein")
gematria_btn = gr.Button("6. Calculate Journal Sum")
gematria_result = gr.Number(label="Journal Sum")
#TODO: journal sum is wrong, because "english" is added to it initially,
#TODO: this only affects the interface field(s), not the result searching
with gr.Row():
start = gr.Number(label="Start Book", value=1)
end = gr.Number(label="End Book", value=39)
step = gr.Number(label="Jump Width (Steps) for ELS")
rounds = gr.Number(label="Rounds through Books", value=1)
float_step = gr.Number(visible=False, value=1)
half_step_btn = gr.Button("Steps / 2")
double_step_btn = gr.Button("Steps * 2")
with gr.Column():
round_x = gr.Number(label="Round (x)", value=1)
round_y = gr.Number(label="Round (y)", value=-1)
average_combine_chk = gr.Checkbox(label="Average-Combine Combined Rounds", value=False)
mirror_book_numbers = gr.Checkbox(label="Mirror book numbers for negative rounds (axis=book 20)", value=False)
rounds_combination = gr.Textbox(label="Combined Rounds", value="1,-1")
with gr.Row():
length = gr.Number(label="Result Length (0=inf)", value=0)
tlang = create_language_dropdown("Target Language for Translation", default_value=DEFAULT_LANGUAGE)
strip_spaces = gr.Checkbox(label="Strip Spaces from Books", value=True)
strip_in_braces = gr.Checkbox(label="Strip Text in Braces from Books", value=True)
strip_diacritics_chk = gr.Checkbox(label="Strip Diacritics from Books", value=True)
acknowledgment_chk = gr.Checkbox(
label="The User hereby accepts that the User will not harm or stalk anyone with this information, or bet on any of this information, in any regards.",
value=True
)
translate_btn = gr.Button("7. Search with ELS")
results_output = gr.Dataframe(headers=['Date', 'Book Result', 'Result'], label="Results")
json_output = gr.Textbox(label="JSON Configuration Output")
json_download_btn = gr.Button("Prepare .json for Download")
json_file = gr.File(label="Download Config JSON", file_count="single")
# --- Load Forbidden Names ---
forbidden_names = load_forbidden_names()
# --- Event Handlers ---
def update_rounds_combination(round_x: int, round_y: int) -> str:
"""Updates the rounds_combination textbox based on round_x and round_y."""
return f"{int(round_x)},{int(round_y)}"
def calculate_journal_sum(text: str, date_words: str) -> tuple:
"""Calculates the journal sum and updates the step value."""
if check_name_similarity(text, forbidden_names):
return 0, 0, 0
if check_name_similarity(date_words, forbidden_names):
return 0, 0, 0
sum_value = calculate_gematria_sum(text, date_words)
return sum_value, sum_value, sum_value
def update_step_half(float_step: float) -> tuple:
"""Updates the step value to half."""
new_step = math.ceil(float_step / 2)
return new_step, float_step / 2
def update_step_double(float_step: float) -> tuple:
"""Updates the step value to double."""
new_step = math.ceil(float_step * 2)
return new_step, float_step * 2
# Update rounds_combination when round_x or round_y changes
round_x.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination)
round_y.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination)
def handle_json_download(config_json: str, step: int, rounds: int, strip_spaces: bool,
strip_in_braces: bool, strip_diacritics_chk: bool) -> str:
"""Handles the download of the JSON config file."""
return download_json_file(config_json, step, rounds, strip_spaces, strip_in_braces, strip_diacritics_chk)
def perform_search_and_create_json(start_date: datetime, end_date: datetime, date_language_input: str,
search_word: str, start: int, end: int, step: int, rounds: int, length: int,
tlang: str, strip_spaces: bool, strip_in_braces: bool,
strip_diacritics_chk: bool,
gematria_text: str, average_combine: bool) -> tuple:
"""Performs the ELS search for each date in the range, creates the JSON config, and displays the results."""
all_results = []
delta = timedelta(days=1)
original_start_date = start_date
total_steps = 0
# Translate the search word to Yiddish ONLY ONCE (outside the loop)
translator_yi = GoogleTranslator(source='auto', target='yi')
search_word_yiddish = translator_yi.translate(search_word)
seen_dates = set() # Keep track of processed dates
while start_date <= end_date:
date_words_output = date_to_words(start_date.strftime("%Y-%m-%d"))
# Only translate if the date language is not English
if date_language_input.lower() != DEFAULT_LANGUAGE:
date_words_output = translate_date_to_words(start_date, date_language_input)
# Skip if date has already been processed
if date_words_output in seen_dates:
start_date += delta
continue
seen_dates.add(date_words_output)
journal_sum, _, _ = calculate_journal_sum(gematria_text, date_words_output)
step = journal_sum
total_steps += step
filtered_results = perform_els_search(start, end, step, rounds, length, tlang, strip_spaces,
strip_in_braces, strip_diacritics_chk, average_combine,
search_word_yiddish, # Pass the translated Yiddish word
date_words_output)
# Only add the first result for each date
if filtered_results:
all_results.append(filtered_results[0])
start_date += delta
# Process results after the loop completes
if all_results:
df = pd.DataFrame(all_results)
# Deduplicate steps
seen_steps = set()
deduplicated_results = []
for result in all_results:
step_key = (result['Date'], result['Book Result'])
if step_key not in seen_steps:
deduplicated_results.append(result)
seen_steps.add(step_key)
df = pd.DataFrame(deduplicated_results)
# Translate the 'Book Result' column to the target language
translator = GoogleTranslator(source='yi', target=tlang)
df['Result'] = df['Book Result'].apply(translator.translate)
config_json = generate_json_dump(start, end, total_steps, rounds, length, tlang, strip_spaces,
strip_in_braces, strip_diacritics_chk, gematria_text, df, search_word,
original_start_date, end_date)
return config_json, df
else:
return "No results found.", None
gematria_btn.click(
calculate_journal_sum,
inputs=[gematria_text, date_language_input],
outputs=[gematria_result, step, float_step]
)
half_step_btn.click(
update_step_half,
inputs=[float_step],
outputs=[step, float_step]
)
double_step_btn.click(
update_step_double,
inputs=[float_step],
outputs=[step, float_step]
)
translate_btn.click(
perform_search_and_create_json,
inputs=[start_date, end_date, date_language_input, search_word, start, end, step, rounds_combination, length,
tlang, strip_spaces,
strip_in_braces, strip_diacritics_chk, gematria_text, average_combine_chk],
outputs=[json_output, results_output]
)
json_download_btn.click(
handle_json_download,
inputs=[json_output, step, rounds, strip_spaces, strip_in_braces, strip_diacritics_chk],
outputs=[json_file]
)
if __name__ == "__main__":
app.launch(share=False)