import json import logging import math from datetime import datetime, timedelta import gradio as gr import pandas as pd from deep_translator import GoogleTranslator from gradio_calendar import Calendar from gematria import calculate_gematria, strip_diacritics from utils import ( date_to_words, translate_date_to_words, process_json_files ) # --- Constants --- FORBIDDEN_NAMES_FILE = "c.txt" DEFAULT_LANGUAGE = 'english' logger = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) # --- Helper Functions --- def create_language_dropdown(label: str, default_value: str = DEFAULT_LANGUAGE, show_label: bool = True) -> gr.Dropdown: """Creates a Gradio dropdown menu for language selection. Args: label (str): The label for the dropdown. default_value (str, optional): The default language value. Defaults to 'en'. show_label (bool, optional): Whether to show the label. Defaults to True. Returns: gr.Dropdown: The Gradio dropdown component. """ languages = GoogleTranslator().get_supported_languages(as_dict=True) return gr.Dropdown( choices=list(languages.keys()), label=label, value=default_value, show_label=show_label ) def calculate_gematria_sum(text: str, date_words: str) -> int: """Calculates the Gematria sum for a text and date words.""" combined_input = f"{text} {date_words}" logger.info(f"Combined input for Gematria: {combined_input}") sum_value = calculate_gematria(strip_diacritics(combined_input)) logger.info(f"Gematria sum: {sum_value}") return sum_value def perform_els_search(start: int, end: int, step: int, rounds: int, length: int, tlang: str, strip_spaces: bool, strip_in_braces: bool, strip_diacritics: bool, average_combine: bool, search_word_yiddish: str, date_words: str) -> list: # Accept Yiddish word """Performs the ELS search and filters by the Yiddish search word.""" logger.info("Starting ELS search...") logger.debug(f"Search word (Yiddish): {search_word_yiddish}") # No translation here if step == 0 or rounds == 0: logger.info("Cannot search with step 0 or rounds 0") return [] results = process_json_files(start, end, step, rounds, length, tlang, strip_spaces, strip_in_braces, strip_diacritics, average_combine, translate_results=False) # Filter results by search word in els_result_text (Yiddish) filtered_results = [] for result in results: logger.debug(f"Searching result: {result}") #if 'els_result_text' in result and search_word_yiddish in result['els_result_text']: if 1==1: filtered_results.append({ 'Date': date_words, 'Book Result': result['els_result_text'], 'Result': result.get('translated_text', '') }) return filtered_results def generate_json_dump(start: int, end: int, step: int, rounds: int, length: int, tlang: str, strip_spaces: bool, strip_in_braces: bool, strip_diacritics_chk: bool, search_phrase: str, results_df: pd.DataFrame, search_word: str, start_date: datetime, end_date: datetime) -> str: """Generates the JSON dump with configuration, date range, and results.""" config = { "Start Book": start, "End Book": end, "Step": step, "Rounds": rounds, "Length": length, "Target Language": tlang, "Strip Spaces": strip_spaces, "Strip Text in Braces": strip_in_braces, "Strip Diacritics": strip_diacritics_chk, "Search Phrase": search_phrase, "Search Word": search_word } result = { "Configuration": config, "DateRange": { "StartDate": start_date.strftime("%Y-%m-%d"), "EndDate": end_date.strftime("%Y-%m-%d") }, "Results": json.loads(results_df.to_json(orient='records', force_ascii=False)) } logger.info(f"Generated JSON dump: {result}") return json.dumps(result, indent=4, ensure_ascii=False) def download_json_file(config_json: str, step: int, rounds: int, strip_spaces: bool, strip_in_braces: bool, strip_diacritics_chk: bool) -> str: """Downloads the JSON config file with a descriptive name.""" filename_suffix = "" if strip_spaces: filename_suffix += "-stSp" if strip_in_braces: filename_suffix += "-stBr" if strip_diacritics_chk: filename_suffix += "-stDc" file_path = f"step-{step}-rounds-{rounds}{filename_suffix}.json" with open(file_path, "w", encoding='utf-8') as file: file.write(config_json) logger.info(f"Downloaded JSON file to: {file_path}") return file_path # --- Forbidden Names Functions --- def load_forbidden_names(filename: str = FORBIDDEN_NAMES_FILE) -> list: """Loads forbidden names from the specified file.""" try: with open(filename, "r", encoding='utf-8') as f: forbidden_names = [line.strip() for line in f] return forbidden_names except FileNotFoundError: print(f"Error: Forbidden names file '{filename}' not found.") return [] def check_name_similarity(name: str, forbidden_names: list, threshold: int = 80) -> bool: """Checks if a name is similar to any forbidden name.""" from fuzzywuzzy import fuzz for forbidden_name in forbidden_names: similarity_ratio = fuzz.ratio(name.lower(), forbidden_name.lower()) if similarity_ratio >= threshold: logging.info(f"Forbidden word {forbidden_name} detected in: {name}") return True return False # --- Gradio UI --- with gr.Blocks() as app: with gr.Row(): start_date = Calendar(type="datetime", label="1. Select Start Date") end_date = Calendar(type="datetime", label="2. Select End Date") date_language_input = create_language_dropdown("3. Date Word Language", default_value=DEFAULT_LANGUAGE) search_word = gr.Textbox(label="4. Search Word") with gr.Row(): gematria_text = gr.Textbox(label="5. Name and/or Topic", value="Hans Albert Einstein") gematria_btn = gr.Button("6. Calculate Journal Sum") gematria_result = gr.Number(label="Journal Sum") #TODO: journal sum is wrong, because "english" is added to it initially, #TODO: this only affects the interface field(s), not the result searching with gr.Row(): start = gr.Number(label="Start Book", value=1) end = gr.Number(label="End Book", value=39) step = gr.Number(label="Jump Width (Steps) for ELS") rounds = gr.Number(label="Rounds through Books", value=1) float_step = gr.Number(visible=False, value=1) half_step_btn = gr.Button("Steps / 2") double_step_btn = gr.Button("Steps * 2") with gr.Column(): round_x = gr.Number(label="Round (x)", value=1) round_y = gr.Number(label="Round (y)", value=-1) average_combine_chk = gr.Checkbox(label="Average-Combine Combined Rounds", value=False) mirror_book_numbers = gr.Checkbox(label="Mirror book numbers for negative rounds (axis=book 20)", value=False) rounds_combination = gr.Textbox(label="Combined Rounds", value="1,-1") with gr.Row(): length = gr.Number(label="Result Length (0=inf)", value=0) tlang = create_language_dropdown("Target Language for Translation", default_value=DEFAULT_LANGUAGE) strip_spaces = gr.Checkbox(label="Strip Spaces from Books", value=True) strip_in_braces = gr.Checkbox(label="Strip Text in Braces from Books", value=True) strip_diacritics_chk = gr.Checkbox(label="Strip Diacritics from Books", value=True) acknowledgment_chk = gr.Checkbox( label="The User hereby accepts that the User will not harm or stalk anyone with this information, or bet on any of this information, in any regards.", value=True ) translate_btn = gr.Button("7. Search with ELS") results_output = gr.Dataframe(headers=['Date', 'Book Result', 'Result'], label="Results") json_output = gr.Textbox(label="JSON Configuration Output") json_download_btn = gr.Button("Prepare .json for Download") json_file = gr.File(label="Download Config JSON", file_count="single") # --- Load Forbidden Names --- forbidden_names = load_forbidden_names() # --- Event Handlers --- def update_rounds_combination(round_x: int, round_y: int) -> str: """Updates the rounds_combination textbox based on round_x and round_y.""" return f"{int(round_x)},{int(round_y)}" def calculate_journal_sum(text: str, date_words: str) -> tuple: """Calculates the journal sum and updates the step value.""" if check_name_similarity(text, forbidden_names): return 0, 0, 0 if check_name_similarity(date_words, forbidden_names): return 0, 0, 0 sum_value = calculate_gematria_sum(text, date_words) return sum_value, sum_value, sum_value def update_step_half(float_step: float) -> tuple: """Updates the step value to half.""" new_step = math.ceil(float_step / 2) return new_step, float_step / 2 def update_step_double(float_step: float) -> tuple: """Updates the step value to double.""" new_step = math.ceil(float_step * 2) return new_step, float_step * 2 # Update rounds_combination when round_x or round_y changes round_x.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination) round_y.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination) def handle_json_download(config_json: str, step: int, rounds: int, strip_spaces: bool, strip_in_braces: bool, strip_diacritics_chk: bool) -> str: """Handles the download of the JSON config file.""" return download_json_file(config_json, step, rounds, strip_spaces, strip_in_braces, strip_diacritics_chk) def perform_search_and_create_json(start_date: datetime, end_date: datetime, date_language_input: str, search_word: str, start: int, end: int, step: int, rounds: int, length: int, tlang: str, strip_spaces: bool, strip_in_braces: bool, strip_diacritics_chk: bool, gematria_text: str, average_combine: bool) -> tuple: """Performs the ELS search for each date in the range, creates the JSON config, and displays the results.""" all_results = [] delta = timedelta(days=1) original_start_date = start_date total_steps = 0 # Translate the search word to Yiddish ONLY ONCE (outside the loop) translator_yi = GoogleTranslator(source='auto', target='yi') search_word_yiddish = translator_yi.translate(search_word) seen_dates = set() # Keep track of processed dates while start_date <= end_date: date_words_output = date_to_words(start_date.strftime("%Y-%m-%d")) # Only translate if the date language is not English if date_language_input.lower() != DEFAULT_LANGUAGE: date_words_output = translate_date_to_words(start_date, date_language_input) # Skip if date has already been processed if date_words_output in seen_dates: start_date += delta continue seen_dates.add(date_words_output) journal_sum, _, _ = calculate_journal_sum(gematria_text, date_words_output) step = journal_sum total_steps += step filtered_results = perform_els_search(start, end, step, rounds, length, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk, average_combine, search_word_yiddish, # Pass the translated Yiddish word date_words_output) # Only add the first result for each date if filtered_results: all_results.append(filtered_results[0]) start_date += delta # Process results after the loop completes if all_results: df = pd.DataFrame(all_results) # Deduplicate steps seen_steps = set() deduplicated_results = [] for result in all_results: step_key = (result['Date'], result['Book Result']) if step_key not in seen_steps: deduplicated_results.append(result) seen_steps.add(step_key) df = pd.DataFrame(deduplicated_results) # Translate the 'Book Result' column to the target language translator = GoogleTranslator(source='yi', target=tlang) df['Result'] = df['Book Result'].apply(translator.translate) config_json = generate_json_dump(start, end, total_steps, rounds, length, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk, gematria_text, df, search_word, original_start_date, end_date) return config_json, df else: return "No results found.", None gematria_btn.click( calculate_journal_sum, inputs=[gematria_text, date_language_input], outputs=[gematria_result, step, float_step] ) half_step_btn.click( update_step_half, inputs=[float_step], outputs=[step, float_step] ) double_step_btn.click( update_step_double, inputs=[float_step], outputs=[step, float_step] ) translate_btn.click( perform_search_and_create_json, inputs=[start_date, end_date, date_language_input, search_word, start, end, step, rounds_combination, length, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk, gematria_text, average_combine_chk], outputs=[json_output, results_output] ) json_download_btn.click( handle_json_download, inputs=[json_output, step, rounds, strip_spaces, strip_in_braces, strip_diacritics_chk], outputs=[json_file] ) if __name__ == "__main__": app.launch(share=False)