book-of-souls-2-word-search

Runtime error

App Files Files Community

book-of-souls-2-word-search / app.py

neuralworm

Update app.py

8e8e631 verified 11 months ago

raw

history blame contribute delete

14.6 kB

	import json
	import logging
	import math
	from datetime import datetime, timedelta

	import gradio as gr
	import pandas as pd
	from deep_translator import GoogleTranslator
	from gradio_calendar import Calendar

	from gematria import calculate_gematria, strip_diacritics
	from utils import (
	date_to_words,
	translate_date_to_words,
	process_json_files
	)

	# --- Constants ---
	FORBIDDEN_NAMES_FILE = "c.txt"
	DEFAULT_LANGUAGE = 'english'

	logger = logging.getLogger(__name__)
	logging.basicConfig(level=logging.INFO)


	# --- Helper Functions ---

	def create_language_dropdown(label: str, default_value: str = DEFAULT_LANGUAGE, show_label: bool = True) -> gr.Dropdown:
	"""Creates a Gradio dropdown menu for language selection.

	Args:
	label (str): The label for the dropdown.
	default_value (str, optional): The default language value. Defaults to 'en'.
	show_label (bool, optional): Whether to show the label. Defaults to True.

	Returns:
	gr.Dropdown: The Gradio dropdown component.
	"""
	languages = GoogleTranslator().get_supported_languages(as_dict=True)
	return gr.Dropdown(
	choices=list(languages.keys()),
	label=label,
	value=default_value,
	show_label=show_label
	)


	def calculate_gematria_sum(text: str, date_words: str) -> int:
	"""Calculates the Gematria sum for a text and date words."""
	combined_input = f"{text} {date_words}"
	logger.info(f"Combined input for Gematria: {combined_input}")
	sum_value = calculate_gematria(strip_diacritics(combined_input))
	logger.info(f"Gematria sum: {sum_value}")
	return sum_value


	def perform_els_search(start: int, end: int, step: int, rounds: int, length: int, tlang: str,
	strip_spaces: bool, strip_in_braces: bool, strip_diacritics: bool, average_combine: bool,
	search_word_yiddish: str, date_words: str) -> list: # Accept Yiddish word
	"""Performs the ELS search and filters by the Yiddish search word."""

	logger.info("Starting ELS search...")
	logger.debug(f"Search word (Yiddish): {search_word_yiddish}") # No translation here

	if step == 0 or rounds == 0:
	logger.info("Cannot search with step 0 or rounds 0")
	return []

	results = process_json_files(start, end, step, rounds, length, tlang, strip_spaces,
	strip_in_braces, strip_diacritics, average_combine,
	translate_results=False)

	# Filter results by search word in els_result_text (Yiddish)
	filtered_results = []
	for result in results:
	logger.debug(f"Searching result: {result}")
	#if 'els_result_text' in result and search_word_yiddish in result['els_result_text']:
	if 1==1:
	filtered_results.append({
	'Date': date_words,
	'Book Result': result['els_result_text'],
	'Result': result.get('translated_text', '')
	})

	return filtered_results


	def generate_json_dump(start: int, end: int, step: int, rounds: int, length: int, tlang: str,
	strip_spaces: bool, strip_in_braces: bool, strip_diacritics_chk: bool,
	search_phrase: str, results_df: pd.DataFrame, search_word: str,
	start_date: datetime, end_date: datetime) -> str:
	"""Generates the JSON dump with configuration, date range, and results."""
	config = {
	"Start Book": start,
	"End Book": end,
	"Step": step,
	"Rounds": rounds,
	"Length": length,
	"Target Language": tlang,
	"Strip Spaces": strip_spaces,
	"Strip Text in Braces": strip_in_braces,
	"Strip Diacritics": strip_diacritics_chk,
	"Search Phrase": search_phrase,
	"Search Word": search_word
	}
	result = {
	"Configuration": config,
	"DateRange": {
	"StartDate": start_date.strftime("%Y-%m-%d"),
	"EndDate": end_date.strftime("%Y-%m-%d")
	},
	"Results": json.loads(results_df.to_json(orient='records', force_ascii=False))
	}
	logger.info(f"Generated JSON dump: {result}")
	return json.dumps(result, indent=4, ensure_ascii=False)


	def download_json_file(config_json: str, step: int, rounds: int,
	strip_spaces: bool, strip_in_braces: bool, strip_diacritics_chk: bool) -> str:
	"""Downloads the JSON config file with a descriptive name."""
	filename_suffix = ""
	if strip_spaces:
	filename_suffix += "-stSp"
	if strip_in_braces:
	filename_suffix += "-stBr"
	if strip_diacritics_chk:
	filename_suffix += "-stDc"
	file_path = f"step-{step}-rounds-{rounds}{filename_suffix}.json"
	with open(file_path, "w", encoding='utf-8') as file:
	file.write(config_json)
	logger.info(f"Downloaded JSON file to: {file_path}")
	return file_path


	# --- Forbidden Names Functions ---

	def load_forbidden_names(filename: str = FORBIDDEN_NAMES_FILE) -> list:
	"""Loads forbidden names from the specified file."""
	try:
	with open(filename, "r", encoding='utf-8') as f:
	forbidden_names = [line.strip() for line in f]
	return forbidden_names
	except FileNotFoundError:
	print(f"Error: Forbidden names file '{filename}' not found.")
	return []


	def check_name_similarity(name: str, forbidden_names: list, threshold: int = 80) -> bool:
	"""Checks if a name is similar to any forbidden name."""
	from fuzzywuzzy import fuzz
	for forbidden_name in forbidden_names:
	similarity_ratio = fuzz.ratio(name.lower(), forbidden_name.lower())
	if similarity_ratio >= threshold:
	logging.info(f"Forbidden word {forbidden_name} detected in: {name}")
	return True
	return False


	# --- Gradio UI ---

	with gr.Blocks() as app:
	with gr.Row():
	start_date = Calendar(type="datetime", label="1. Select Start Date")
	end_date = Calendar(type="datetime", label="2. Select End Date")
	date_language_input = create_language_dropdown("3. Date Word Language", default_value=DEFAULT_LANGUAGE)
	search_word = gr.Textbox(label="4. Search Word")

	with gr.Row():
	gematria_text = gr.Textbox(label="5. Name and/or Topic", value="Hans Albert Einstein")
	gematria_btn = gr.Button("6. Calculate Journal Sum")

	gematria_result = gr.Number(label="Journal Sum")
	#TODO: journal sum is wrong, because "english" is added to it initially,
	#TODO: this only affects the interface field(s), not the result searching

	with gr.Row():
	start = gr.Number(label="Start Book", value=1)
	end = gr.Number(label="End Book", value=39)
	step = gr.Number(label="Jump Width (Steps) for ELS")
	rounds = gr.Number(label="Rounds through Books", value=1)
	float_step = gr.Number(visible=False, value=1)
	half_step_btn = gr.Button("Steps / 2")
	double_step_btn = gr.Button("Steps * 2")

	with gr.Column():
	round_x = gr.Number(label="Round (x)", value=1)
	round_y = gr.Number(label="Round (y)", value=-1)

	average_combine_chk = gr.Checkbox(label="Average-Combine Combined Rounds", value=False)
	mirror_book_numbers = gr.Checkbox(label="Mirror book numbers for negative rounds (axis=book 20)", value=False)

	rounds_combination = gr.Textbox(label="Combined Rounds", value="1,-1")

	with gr.Row():
	length = gr.Number(label="Result Length (0=inf)", value=0)
	tlang = create_language_dropdown("Target Language for Translation", default_value=DEFAULT_LANGUAGE)
	strip_spaces = gr.Checkbox(label="Strip Spaces from Books", value=True)
	strip_in_braces = gr.Checkbox(label="Strip Text in Braces from Books", value=True)
	strip_diacritics_chk = gr.Checkbox(label="Strip Diacritics from Books", value=True)
	acknowledgment_chk = gr.Checkbox(
	label="The User hereby accepts that the User will not harm or stalk anyone with this information, or bet on any of this information, in any regards.",
	value=True
	)

	translate_btn = gr.Button("7. Search with ELS")

	results_output = gr.Dataframe(headers=['Date', 'Book Result', 'Result'], label="Results")
	json_output = gr.Textbox(label="JSON Configuration Output")
	json_download_btn = gr.Button("Prepare .json for Download")
	json_file = gr.File(label="Download Config JSON", file_count="single")

	# --- Load Forbidden Names ---

	forbidden_names = load_forbidden_names()


	# --- Event Handlers ---

	def update_rounds_combination(round_x: int, round_y: int) -> str:
	"""Updates the rounds_combination textbox based on round_x and round_y."""
	return f"{int(round_x)},{int(round_y)}"


	def calculate_journal_sum(text: str, date_words: str) -> tuple:
	"""Calculates the journal sum and updates the step value."""
	if check_name_similarity(text, forbidden_names):
	return 0, 0, 0
	if check_name_similarity(date_words, forbidden_names):
	return 0, 0, 0
	sum_value = calculate_gematria_sum(text, date_words)
	return sum_value, sum_value, sum_value


	def update_step_half(float_step: float) -> tuple:
	"""Updates the step value to half."""
	new_step = math.ceil(float_step / 2)
	return new_step, float_step / 2


	def update_step_double(float_step: float) -> tuple:
	"""Updates the step value to double."""
	new_step = math.ceil(float_step * 2)
	return new_step, float_step * 2


	# Update rounds_combination when round_x or round_y changes
	round_x.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination)
	round_y.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination)


	def handle_json_download(config_json: str, step: int, rounds: int, strip_spaces: bool,
	strip_in_braces: bool, strip_diacritics_chk: bool) -> str:
	"""Handles the download of the JSON config file."""
	return download_json_file(config_json, step, rounds, strip_spaces, strip_in_braces, strip_diacritics_chk)


	def perform_search_and_create_json(start_date: datetime, end_date: datetime, date_language_input: str,
	search_word: str, start: int, end: int, step: int, rounds: int, length: int,
	tlang: str, strip_spaces: bool, strip_in_braces: bool,
	strip_diacritics_chk: bool,
	gematria_text: str, average_combine: bool) -> tuple:
	"""Performs the ELS search for each date in the range, creates the JSON config, and displays the results."""
	all_results = []
	delta = timedelta(days=1)
	original_start_date = start_date
	total_steps = 0

	# Translate the search word to Yiddish ONLY ONCE (outside the loop)
	translator_yi = GoogleTranslator(source='auto', target='yi')
	search_word_yiddish = translator_yi.translate(search_word)

	seen_dates = set() # Keep track of processed dates

	while start_date <= end_date:
	date_words_output = date_to_words(start_date.strftime("%Y-%m-%d"))

	# Only translate if the date language is not English
	if date_language_input.lower() != DEFAULT_LANGUAGE:
	date_words_output = translate_date_to_words(start_date, date_language_input)

	# Skip if date has already been processed
	if date_words_output in seen_dates:
	start_date += delta
	continue
	seen_dates.add(date_words_output)

	journal_sum, _, _ = calculate_journal_sum(gematria_text, date_words_output)
	step = journal_sum
	total_steps += step

	filtered_results = perform_els_search(start, end, step, rounds, length, tlang, strip_spaces,
	strip_in_braces, strip_diacritics_chk, average_combine,
	search_word_yiddish, # Pass the translated Yiddish word
	date_words_output)

	# Only add the first result for each date
	if filtered_results:
	all_results.append(filtered_results[0])

	start_date += delta

	# Process results after the loop completes
	if all_results:
	df = pd.DataFrame(all_results)

	# Deduplicate steps
	seen_steps = set()
	deduplicated_results = []
	for result in all_results:
	step_key = (result['Date'], result['Book Result'])
	if step_key not in seen_steps:
	deduplicated_results.append(result)
	seen_steps.add(step_key)
	df = pd.DataFrame(deduplicated_results)

	# Translate the 'Book Result' column to the target language
	translator = GoogleTranslator(source='yi', target=tlang)
	df['Result'] = df['Book Result'].apply(translator.translate)

	config_json = generate_json_dump(start, end, total_steps, rounds, length, tlang, strip_spaces,
	strip_in_braces, strip_diacritics_chk, gematria_text, df, search_word,
	original_start_date, end_date)
	return config_json, df
	else:
	return "No results found.", None


	gematria_btn.click(
	calculate_journal_sum,
	inputs=[gematria_text, date_language_input],
	outputs=[gematria_result, step, float_step]
	)

	half_step_btn.click(
	update_step_half,
	inputs=[float_step],
	outputs=[step, float_step]
	)

	double_step_btn.click(
	update_step_double,
	inputs=[float_step],
	outputs=[step, float_step]
	)

	translate_btn.click(
	perform_search_and_create_json,
	inputs=[start_date, end_date, date_language_input, search_word, start, end, step, rounds_combination, length,
	tlang, strip_spaces,
	strip_in_braces, strip_diacritics_chk, gematria_text, average_combine_chk],
	outputs=[json_output, results_output]
	)

	json_download_btn.click(
	handle_json_download,
	inputs=[json_output, step, rounds, strip_spaces, strip_in_braces, strip_diacritics_chk],
	outputs=[json_file]
	)

	if __name__ == "__main__":
	app.launch(share=False)