book-of-souls-2-word-search

Runtime error

App Files Files Community

neuralworm commited on Aug 4

Commit

2a65456

•

1 Parent(s): 40288c3

speed up search

Browse files

Files changed (4) hide show

.gitignore +2 -0
app.py +106 -204
temuraeh.py +0 -59
utils.py +23 -10

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ __pycache__
2	+ .idea

app.py CHANGED Viewed

@@ -1,34 +1,31 @@
-import logging
 import json
 import math
-import re
 from datetime import datetime, timedelta
 import pandas as pd
 from deep_translator import GoogleTranslator
 from gradio_calendar import Calendar
-import gradio as gr
-import torah
 from gematria import calculate_gematria, strip_diacritics
 from utils import (
-    number_to_ordinal_word,
-    custom_normalize,
     date_to_words,
     translate_date_to_words,
     process_json_files
 )
 # --- Constants ---
 FORBIDDEN_NAMES_FILE = "c.txt"
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.DEBUG)
 # --- Helper Functions ---
-def create_language_dropdown(label: str, default_value: str = 'en', show_label: bool = True) -> gr.Dropdown:
     """Creates a Gradio dropdown menu for language selection.
     Args:
@@ -39,7 +36,7 @@ def create_language_dropdown(label: str, default_value: str = 'en', show_label:
     Returns:
         gr.Dropdown: The Gradio dropdown component.
     """
-    languages = GoogleTranslator(source='en', target='en').get_supported_languages(as_dict=True)
     return gr.Dropdown(
         choices=list(languages.keys()),
         label=label,
@@ -47,60 +44,31 @@ def create_language_dropdown(label: str, default_value: str = 'en', show_label:
         show_label=show_label
     )
-def calculate_gematria_sum(text: str, date_words: str) -> int:
-    """Calculates the Gematria sum for a text and date words.
-    Args:
-        text (str): The text for Gematria calculation.
-        date_words (str): The date in words for Gematria calculation.
-    Returns:
-        int: The Gematria sum.
-    """
     combined_input = f"{text} {date_words}"
     logger.info(f"Combined input for Gematria: {combined_input}")
     sum_value = calculate_gematria(strip_diacritics(combined_input))
     logger.info(f"Gematria sum: {sum_value}")
     return sum_value
-def perform_els_search(start: int, end: int, step: int, rounds: int, length: int, tlang: str,
-                      strip_spaces: bool, strip_in_braces: bool, strip_diacritics: bool, average_combine: bool,
-                      search_word: str, date_words: str) -> list:
-    """Performs the ELS search and filters by the Yiddish search word.
-    Args:
-        start (int): The starting book number.
-        end (int): The ending book number.
-        step (int): The step/jump width for ELS.
-        rounds (int): The number of rounds through the books.
-        length (int): The desired length of the results (0 for infinite).
-        tlang (str): The target language for translation.
-        strip_spaces (bool): Whether to strip spaces from book content.
-        strip_in_braces (bool): Whether to strip text within braces from book content.
-        strip_diacritics (bool): Whether to strip diacritics from book content.
-        average_combine (bool): Whether to average-combine the results of combined rounds.
-        search_word (str): The word to search for.
-        date_words (str): The date in words.
-    Returns:
-        list: A list of filtered results, each containing the date, book result, and translated result.
-    """
     logger.info("Starting ELS search...")
-    logger.debug(f"Search word (original): {search_word}")
-    # Translate the search word to Yiddish
-    translator_yi = GoogleTranslator(source='auto', target='yi')
-    search_word_yiddish = translator_yi.translate(search_word)
-    logger.debug(f"Search word (Yiddish): {search_word_yiddish}")
     if step == 0 or rounds == 0:
         logger.info("Cannot search with step 0 or rounds 0")
-        return []  # Return an empty list instead of None
     results = process_json_files(start, end, step, rounds, length, tlang, strip_spaces,
-                                  strip_in_braces, strip_diacritics, average_combine,
-                                  translate_results=False)  # Don't translate here
     # Filter results by search word in els_result_text (Yiddish)
     filtered_results = []
@@ -109,38 +77,18 @@ def perform_els_search(start: int, end: int, step: int, rounds: int, length: int
         if 'els_result_text' in result and search_word_yiddish in result['els_result_text']:
             filtered_results.append({
                 'Date': date_words,
-                'Book Result': result['els_result_text'],  # Use the original Yiddish text
-                'Result': result.get('translated_text', '')  # Get translated text if available
             })
     return filtered_results
 def generate_json_dump(start: int, end: int, step: int, rounds: int, length: int, tlang: str,
-                      strip_spaces: bool, strip_in_braces: bool, strip_diacritics_chk: bool,
-                      search_phrase: str, results_df: pd.DataFrame, search_word: str,
-                      start_date: datetime, end_date: datetime) -> str:
-    """Generates the JSON dump with configuration, date range, and results.
-    Args:
-        start (int): The starting book number.
-        end (int): The ending book number.
-        step (int): The step/jump width for ELS.
-        rounds (int): The number of rounds through the books.
-        length (int): The desired length of the results (0 for infinite).
-        tlang (str): The target language for translation.
-        strip_spaces (bool): Whether to strip spaces from book content.
-        strip_in_braces (bool): Whether to strip text within braces from book content.
-        strip_diacritics_chk (bool): Whether to strip diacritics from book content.
-        search_phrase (str): The search phrase used.
-        results_df (pd.DataFrame): The DataFrame containing the results.
-        search_word (str): The word to search for.
-        start_date (datetime): The start date of the search.
-        end_date (datetime): The end date of the search.
-    Returns:
-        str: The JSON dump as a string.
-    """
     config = {
         "Start Book": start,
         "End Book": end,
@@ -167,20 +115,8 @@ def generate_json_dump(start: int, end: int, step: int, rounds: int, length: int
 def download_json_file(config_json: str, step: int, rounds: int,
-                      strip_spaces: bool, strip_in_braces: bool, strip_diacritics_chk: bool) -> str:
-    """Downloads the JSON config file with a descriptive name.
-    Args:
-        config_json (str): The JSON configuration data.
-        step (int): The step/jump width for ELS.
-        rounds (int): The number of rounds through the books.
-        strip_spaces (bool): Whether spaces were stripped.
-        strip_in_braces (bool): Whether text in braces was stripped.
-        strip_diacritics_chk (bool): Whether diacritics were stripped.
-    Returns:
-        str: The path to the downloaded file.
-    """
     filename_suffix = ""
     if strip_spaces:
         filename_suffix += "-stSp"
@@ -188,24 +124,17 @@ def download_json_file(config_json: str, step: int, rounds: int,
         filename_suffix += "-stBr"
     if strip_diacritics_chk:
         filename_suffix += "-stDc"
-    file_path = f"step-{step}-rounds-{rounds}{filename_suffix}.json"  # Include rounds in filename
     with open(file_path, "w", encoding='utf-8') as file:
         file.write(config_json)
     logger.info(f"Downloaded JSON file to: {file_path}")
     return file_path
 # --- Forbidden Names Functions ---
 def load_forbidden_names(filename: str = FORBIDDEN_NAMES_FILE) -> list:
-    """Loads forbidden names from the specified file.
-    Args:
-        filename (str, optional): The path to the file containing forbidden names.
-                                    Defaults to FORBIDDEN_NAMES_FILE.
-    Returns:
-        list: A list of forbidden names.
-    """
     try:
         with open(filename, "r", encoding='utf-8') as f:
             forbidden_names = [line.strip() for line in f]
@@ -216,16 +145,7 @@ def load_forbidden_names(filename: str = FORBIDDEN_NAMES_FILE) -> list:
 def check_name_similarity(name: str, forbidden_names: list, threshold: int = 80) -> bool:
-    """Checks if a name is similar to any forbidden name.
-    Args:
-        name (str): The name to check.
-        forbidden_names (list): A list of forbidden names.
-        threshold (int, optional): The similarity threshold (0-100). Defaults to 80.
-    Returns:
-        bool: True if the name is similar to a forbidden name, False otherwise.
-    """
     from fuzzywuzzy import fuzz
     for forbidden_name in forbidden_names:
         similarity_ratio = fuzz.ratio(name.lower(), forbidden_name.lower())
@@ -234,13 +154,14 @@ def check_name_similarity(name: str, forbidden_names: list, threshold: int = 80)
             return True
     return False
 # --- Gradio UI ---
 with gr.Blocks() as app:
     with gr.Row():
         start_date = Calendar(type="datetime", label="1. Select Start Date")
         end_date = Calendar(type="datetime", label="2. Select End Date")
-        date_language_input = create_language_dropdown("3. Date Word Language (supported: all based on: latin, greek, arabic, hebrew)", default_value='english')
         search_word = gr.Textbox(label="4. Search Word")
     with gr.Row():
@@ -254,7 +175,7 @@ with gr.Blocks() as app:
         end = gr.Number(label="End Book", value=39)
         step = gr.Number(label="Jump Width (Steps) for ELS")
         rounds = gr.Number(label="Rounds through Books", value=1)
-        float_step = gr.Number(visible=False, value=1)  # For half/double step calculations
         half_step_btn = gr.Button("Steps / 2")
         double_step_btn = gr.Button("Steps * 2")
@@ -262,26 +183,25 @@ with gr.Blocks() as app:
             round_x = gr.Number(label="Round (x)", value=1)
             round_y = gr.Number(label="Round (y)", value=-1)
-        average_combine_chk = gr.Checkbox(label="Average-Combine Combined Rounds (hacky)", value=False)
         mirror_book_numbers = gr.Checkbox(label="Mirror book numbers for negative rounds (axis=book 20)", value=False)
         rounds_combination = gr.Textbox(label="Combined Rounds", value="1,-1")
     with gr.Row():
         length = gr.Number(label="Result Length (0=inf)", value=0)
-        tlang = create_language_dropdown("Target Language for Translation", default_value='english')
         strip_spaces = gr.Checkbox(label="Strip Spaces from Books", value=True)
         strip_in_braces = gr.Checkbox(label="Strip Text in Braces from Books", value=True)
         strip_diacritics_chk = gr.Checkbox(label="Strip Diacritics from Books", value=True)
         acknowledgment_chk = gr.Checkbox(
-            label="The User hereby accepts that the User will not harm or stalk anyone with this information, or bet on any of this information, in any regards.",  # Add your full disclaimer here
             value=True
         )
     translate_btn = gr.Button("7. Search with ELS")
-    results_output = gr.Dataframe(headers=['Date', 'Book Result', 'Result'], label="Results") # Changed to Dataframe
     json_output = gr.Textbox(label="JSON Configuration Output")
     json_download_btn = gr.Button("Prepare .json for Download")
     json_file = gr.File(label="Download Config JSON", file_count="single")
@@ -290,140 +210,121 @@ with gr.Blocks() as app:
     forbidden_names = load_forbidden_names()
     # --- Event Handlers ---
     def update_rounds_combination(round_x: int, round_y: int) -> str:
-        """Updates the rounds_combination textbox based on round_x and round_y.
-        Args:
-            round_x (int): The value of round x.
-            round_y (int): The value of round y.
-        Returns:
-            str: The combined rounds string.
-        """
         return f"{int(round_x)},{int(round_y)}"
     def calculate_journal_sum(text: str, date_words: str) -> tuple:
-        """Calculates the journal sum and updates the step value.
-        Args:
-            text (str): The input text for calculation.
-            date_words (str): The date in words.
-        Returns:
-            tuple: A tuple containing the journal sum, step, and float_step.
-        """
         if check_name_similarity(text, forbidden_names):
-            return 0, 0, 0  # Return 0 if the name is forbidden
         if check_name_similarity(date_words, forbidden_names):
-            return 0, 0, 0  # Return 0 if the name is forbidden
         sum_value = calculate_gematria_sum(text, date_words)
-        return sum_value, sum_value, sum_value  # Returning the same value three times
-    def update_step_half(float_step: float) -> tuple:
-        """Updates the step value to half.
-        Args:
-            float_step (float): The current float step value.
-        Returns:
-            tuple: A tuple containing the new step value and the new float step value.
-        """
         new_step = math.ceil(float_step / 2)
         return new_step, float_step / 2
-    def update_step_double(float_step: float) -> tuple:
-        """Updates the step value to double.
-        Args:
-            float_step (float): The current float step value.
-        Returns:
-            tuple: A tuple containing the new step value and the new float step value.
-        """
         new_step = math.ceil(float_step * 2)
         return new_step, float_step * 2
     # Update rounds_combination when round_x or round_y changes
     round_x.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination)
     round_y.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination)
     def handle_json_download(config_json: str, step: int, rounds: int, strip_spaces: bool,
-                              strip_in_braces: bool, strip_diacritics_chk: bool) -> str:
-        """Handles the download of the JSON config file.
-        Args:
-            config_json (str): The JSON configuration data.
-            step (int): The step/jump width for ELS.
-            rounds (int): The number of rounds through the books.
-            strip_spaces (bool): Whether spaces were stripped.
-            strip_in_braces (bool): Whether text in braces was stripped.
-            strip_diacritics_chk (bool): Whether diacritics were stripped.
-        Returns:
-            str: The path to the downloaded file.
-        """
         return download_json_file(config_json, step, rounds, strip_spaces, strip_in_braces, strip_diacritics_chk)
     def perform_search_and_create_json(start_date: datetime, end_date: datetime, date_language_input: str,
-                                        search_word: str, start: int, end: int, step: int, rounds: int, length: int,
-                                        tlang: str, strip_spaces: bool, strip_in_braces: bool, strip_diacritics_chk: bool,
-                                        gematria_text: str, average_combine: bool) -> tuple:
-        """Performs the ELS search for each date in the range, creates the JSON config, and displays the results.
-        Args:
-            start_date (datetime): The start date for the search.
-            end_date (datetime): The end date for the search.
-            date_language_input (str): The language for the date words.
-            search_word (str): The word to search for.
-            start (int): The starting book number.
-            end (int): The ending book number.
-            step (int): The step/jump width for ELS.
-            rounds (int): The number of rounds through the books.
-            length (int): The desired length of the results (0 for infinite).
-            tlang (str): The target language for translation.
-            strip_spaces (bool): Whether to strip spaces from book content.
-            strip_in_braces (bool): Whether to strip text within braces from book content.
-            strip_diacritics_chk (bool): Whether to strip diacritics from book content.
-            gematria_text (str): The text for Gematria calculation.
-            average_combine (bool): Whether to average-combine the results of combined rounds.
-        Returns:
-            tuple: A tuple containing the JSON configuration and the results DataFrame.
-        """
         all_results = []
         delta = timedelta(days=1)
         while start_date <= end_date:
-            date_words_output = translate_date_to_words(start_date, date_language_input)
-            journal_sum, _, _ = calculate_journal_sum(gematria_text, date_words_output) # Calculate the journal sum
-            step = journal_sum # Set the step to the journal sum
             filtered_results = perform_els_search(start, end, step, rounds, length, tlang, strip_spaces,
-                                                  strip_in_braces, strip_diacritics_chk, average_combine, search_word,
                                                   date_words_output)
-            all_results.extend(filtered_results)
             start_date += delta
         if all_results:
             df = pd.DataFrame(all_results)
             # Translate the 'Book Result' column to the target language
-            translator = GoogleTranslator(source='yi', target=tlang)  # Translate from Yiddish
             df['Result'] = df['Book Result'].apply(translator.translate)
-            config_json = generate_json_dump(start, end, step, rounds, length, tlang, strip_spaces,
-                                            strip_in_braces, strip_diacritics_chk, gematria_text, df, search_word,
-                                            start_date, end_date)
             return config_json, df
         else:
-            return "No results found.", None  # Return None for the DataFrame when no results are found
     gematria_btn.click(
         calculate_journal_sum,
-        inputs=[gematria_text, date_language_input], # Using date_language_input as a placeholder
         outputs=[gematria_result, step, float_step]
     )
@@ -441,7 +342,8 @@ with gr.Blocks() as app:
     translate_btn.click(
         perform_search_and_create_json,
-        inputs=[start_date, end_date, date_language_input, search_word, start, end, step, rounds_combination, length, tlang, strip_spaces,
                 strip_in_braces, strip_diacritics_chk, gematria_text, average_combine_chk],
         outputs=[json_output, results_output]
     )
@@ -453,4 +355,4 @@ with gr.Blocks() as app:
     )
 if __name__ == "__main__":
-    app.launch(share=False)

 import json
+import logging
 import math
 from datetime import datetime, timedelta
+import gradio as gr
 import pandas as pd
 from deep_translator import GoogleTranslator
 from gradio_calendar import Calendar
 from gematria import calculate_gematria, strip_diacritics
 from utils import (
     date_to_words,
     translate_date_to_words,
     process_json_files
 )
 # --- Constants ---
 FORBIDDEN_NAMES_FILE = "c.txt"
+DEFAULT_LANGUAGE = 'english'
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.DEBUG)
 # --- Helper Functions ---
+def create_language_dropdown(label: str, default_value: str = DEFAULT_LANGUAGE, show_label: bool = True) -> gr.Dropdown:
     """Creates a Gradio dropdown menu for language selection.
     Args:
     Returns:
         gr.Dropdown: The Gradio dropdown component.
     """
+    languages = GoogleTranslator().get_supported_languages(as_dict=True)
     return gr.Dropdown(
         choices=list(languages.keys()),
         label=label,
         show_label=show_label
     )
+def calculate_gematria_sum(text: str, date_words: str) -> int:
+    """Calculates the Gematria sum for a text and date words."""
     combined_input = f"{text} {date_words}"
     logger.info(f"Combined input for Gematria: {combined_input}")
     sum_value = calculate_gematria(strip_diacritics(combined_input))
     logger.info(f"Gematria sum: {sum_value}")
     return sum_value
+def perform_els_search(start: int, end: int, step: int, rounds: int, length: int, tlang: str,
+                       strip_spaces: bool, strip_in_braces: bool, strip_diacritics: bool, average_combine: bool,
+                       search_word_yiddish: str, date_words: str) -> list:  # Accept Yiddish word
+    """Performs the ELS search and filters by the Yiddish search word."""
     logger.info("Starting ELS search...")
+    logger.debug(f"Search word (Yiddish): {search_word_yiddish}")  # No translation here
     if step == 0 or rounds == 0:
         logger.info("Cannot search with step 0 or rounds 0")
+        return []
     results = process_json_files(start, end, step, rounds, length, tlang, strip_spaces,
+                                 strip_in_braces, strip_diacritics, average_combine,
+                                 translate_results=False)
     # Filter results by search word in els_result_text (Yiddish)
     filtered_results = []
         if 'els_result_text' in result and search_word_yiddish in result['els_result_text']:
             filtered_results.append({
                 'Date': date_words,
+                'Book Result': result['els_result_text'],
+                'Result': result.get('translated_text', '')
             })
     return filtered_results
 def generate_json_dump(start: int, end: int, step: int, rounds: int, length: int, tlang: str,
+                       strip_spaces: bool, strip_in_braces: bool, strip_diacritics_chk: bool,
+                       search_phrase: str, results_df: pd.DataFrame, search_word: str,
+                       start_date: datetime, end_date: datetime) -> str:
+    """Generates the JSON dump with configuration, date range, and results."""
     config = {
         "Start Book": start,
         "End Book": end,
 def download_json_file(config_json: str, step: int, rounds: int,
+                       strip_spaces: bool, strip_in_braces: bool, strip_diacritics_chk: bool) -> str:
+    """Downloads the JSON config file with a descriptive name."""
     filename_suffix = ""
     if strip_spaces:
         filename_suffix += "-stSp"
         filename_suffix += "-stBr"
     if strip_diacritics_chk:
         filename_suffix += "-stDc"
+    file_path = f"step-{step}-rounds-{rounds}{filename_suffix}.json"
     with open(file_path, "w", encoding='utf-8') as file:
         file.write(config_json)
     logger.info(f"Downloaded JSON file to: {file_path}")
     return file_path
 # --- Forbidden Names Functions ---
 def load_forbidden_names(filename: str = FORBIDDEN_NAMES_FILE) -> list:
+    """Loads forbidden names from the specified file."""
     try:
         with open(filename, "r", encoding='utf-8') as f:
             forbidden_names = [line.strip() for line in f]
 def check_name_similarity(name: str, forbidden_names: list, threshold: int = 80) -> bool:
+    """Checks if a name is similar to any forbidden name."""
     from fuzzywuzzy import fuzz
     for forbidden_name in forbidden_names:
         similarity_ratio = fuzz.ratio(name.lower(), forbidden_name.lower())
             return True
     return False
 # --- Gradio UI ---
 with gr.Blocks() as app:
     with gr.Row():
         start_date = Calendar(type="datetime", label="1. Select Start Date")
         end_date = Calendar(type="datetime", label="2. Select End Date")
+        date_language_input = create_language_dropdown("3. Date Word Language", default_value=DEFAULT_LANGUAGE)
         search_word = gr.Textbox(label="4. Search Word")
     with gr.Row():
         end = gr.Number(label="End Book", value=39)
         step = gr.Number(label="Jump Width (Steps) for ELS")
         rounds = gr.Number(label="Rounds through Books", value=1)
+        float_step = gr.Number(visible=False, value=1)
         half_step_btn = gr.Button("Steps / 2")
         double_step_btn = gr.Button("Steps * 2")
             round_x = gr.Number(label="Round (x)", value=1)
             round_y = gr.Number(label="Round (y)", value=-1)
+        average_combine_chk = gr.Checkbox(label="Average-Combine Combined Rounds", value=False)
         mirror_book_numbers = gr.Checkbox(label="Mirror book numbers for negative rounds (axis=book 20)", value=False)
         rounds_combination = gr.Textbox(label="Combined Rounds", value="1,-1")
     with gr.Row():
         length = gr.Number(label="Result Length (0=inf)", value=0)
+        tlang = create_language_dropdown("Target Language for Translation", default_value=DEFAULT_LANGUAGE)
         strip_spaces = gr.Checkbox(label="Strip Spaces from Books", value=True)
         strip_in_braces = gr.Checkbox(label="Strip Text in Braces from Books", value=True)
         strip_diacritics_chk = gr.Checkbox(label="Strip Diacritics from Books", value=True)
         acknowledgment_chk = gr.Checkbox(
+            label="The User hereby accepts that the User will not harm or stalk anyone with this information, or bet on any of this information, in any regards.",
             value=True
         )
     translate_btn = gr.Button("7. Search with ELS")
+    results_output = gr.Dataframe(headers=['Date', 'Book Result', 'Result'], label="Results")
     json_output = gr.Textbox(label="JSON Configuration Output")
     json_download_btn = gr.Button("Prepare .json for Download")
     json_file = gr.File(label="Download Config JSON", file_count="single")
     forbidden_names = load_forbidden_names()
     # --- Event Handlers ---
     def update_rounds_combination(round_x: int, round_y: int) -> str:
+        """Updates the rounds_combination textbox based on round_x and round_y."""
         return f"{int(round_x)},{int(round_y)}"
     def calculate_journal_sum(text: str, date_words: str) -> tuple:
+        """Calculates the journal sum and updates the step value."""
         if check_name_similarity(text, forbidden_names):
+            return 0, 0, 0
         if check_name_similarity(date_words, forbidden_names):
+            return 0, 0, 0
         sum_value = calculate_gematria_sum(text, date_words)
+        return sum_value, sum_value, sum_value
+    def update_step_half(float_step: float) -> tuple:
+        """Updates the step value to half."""
         new_step = math.ceil(float_step / 2)
         return new_step, float_step / 2
+    def update_step_double(float_step: float) -> tuple:
+        """Updates the step value to double."""
         new_step = math.ceil(float_step * 2)
         return new_step, float_step * 2
     # Update rounds_combination when round_x or round_y changes
     round_x.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination)
     round_y.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination)
     def handle_json_download(config_json: str, step: int, rounds: int, strip_spaces: bool,
+                             strip_in_braces: bool, strip_diacritics_chk: bool) -> str:
+        """Handles the download of the JSON config file."""
         return download_json_file(config_json, step, rounds, strip_spaces, strip_in_braces, strip_diacritics_chk)
     def perform_search_and_create_json(start_date: datetime, end_date: datetime, date_language_input: str,
+                                       search_word: str, start: int, end: int, step: int, rounds: int, length: int,
+                                       tlang: str, strip_spaces: bool, strip_in_braces: bool,
+                                       strip_diacritics_chk: bool,
+                                       gematria_text: str, average_combine: bool) -> tuple:
+        """Performs the ELS search for each date in the range, creates the JSON config, and displays the results."""
         all_results = []
         delta = timedelta(days=1)
+        original_start_date = start_date
+        total_steps = 0
+        # Translate the search word to Yiddish ONLY ONCE (outside the loop)
+        translator_yi = GoogleTranslator(source='auto', target='yi')
+        search_word_yiddish = translator_yi.translate(search_word)
+        seen_dates = set()  # Keep track of processed dates
         while start_date <= end_date:
+            date_words_output = date_to_words(start_date.strftime("%Y-%m-%d"))
+            # Only translate if the date language is not English
+            if date_language_input.lower() != DEFAULT_LANGUAGE:
+                date_words_output = translate_date_to_words(start_date, date_language_input)
+            # Skip if date has already been processed
+            if date_words_output in seen_dates:
+                start_date += delta
+                continue
+            seen_dates.add(date_words_output)
+            journal_sum, _, _ = calculate_journal_sum(gematria_text, date_words_output)
+            step = journal_sum
+            total_steps += step
             filtered_results = perform_els_search(start, end, step, rounds, length, tlang, strip_spaces,
+                                                  strip_in_braces, strip_diacritics_chk, average_combine,
+                                                  search_word_yiddish,  # Pass the translated Yiddish word
                                                   date_words_output)
+            # Only add the first result for each date
+            if filtered_results:
+                all_results.append(filtered_results[0])
             start_date += delta
+        # Process results after the loop completes
         if all_results:
             df = pd.DataFrame(all_results)
+            # Deduplicate steps
+            seen_steps = set()
+            deduplicated_results = []
+            for result in all_results:
+                step_key = (result['Date'], result['Book Result'])
+                if step_key not in seen_steps:
+                    deduplicated_results.append(result)
+                    seen_steps.add(step_key)
+            df = pd.DataFrame(deduplicated_results)
             # Translate the 'Book Result' column to the target language
+            translator = GoogleTranslator(source='yi', target=tlang)
             df['Result'] = df['Book Result'].apply(translator.translate)
+            config_json = generate_json_dump(start, end, total_steps, rounds, length, tlang, strip_spaces,
+                                             strip_in_braces, strip_diacritics_chk, gematria_text, df, search_word,
+                                             original_start_date, end_date)
             return config_json, df
         else:
+            return "No results found.", None
     gematria_btn.click(
         calculate_journal_sum,
+        inputs=[gematria_text, date_language_input],
         outputs=[gematria_result, step, float_step]
     )
     translate_btn.click(
         perform_search_and_create_json,
+        inputs=[start_date, end_date, date_language_input, search_word, start, end, step, rounds_combination, length,
+                tlang, strip_spaces,
                 strip_in_braces, strip_diacritics_chk, gematria_text, average_combine_chk],
         outputs=[json_output, results_output]
     )
     )
 if __name__ == "__main__":
+    app.launch(share=False)

temuraeh.py DELETED Viewed

@@ -1,59 +0,0 @@
-import json
-# Implementemos la función de temurah con el alfabeto completo y probemos la conversión de "Baphomet" a "Sofia"
-# en hebreo usando temurah.
-# Nota: La representación exacta de "Baphomet" y "Sofia" en hebreo puede variar debido a interpretaciones,
-# pero aquí usaremos transliteraciones aproximadas para ilustrar cómo podría hacerse.
-def temurah(text, hebrew_alphabet='אבגדהוזחטיכלמנסעפצקרשת', reverse=False):
-    """
-    Aplica la temurah a un texto hebreo utilizando todo el alfabeto hebreo.
-    El esquema de ejemplo simplemente invierte el orden del alfabeto.
-    """
-    # Invertir el alfabeto si se solicita
-    if reverse:
-        hebrew_alphabet = hebrew_alphabet[::-1]
-    # Generar el alfabeto invertido
-    inverted_alphabet = hebrew_alphabet[::-1]
-    # Crear el diccionario de mapeo para temurah
-    temurah_mapping = {orig: inv for orig, inv in zip(hebrew_alphabet, inverted_alphabet)}
-    # Aplicar temurah al texto
-    temurah_text = ''.join(temurah_mapping.get(char, char) for char in text)
-    return temurah_text
-# Definir el alfabeto hebreo
-hebrew_alphabet = 'אבגדהוזחטיכלמנסעפצקרשת'
-# Texto de ejemplo: "Baphomet" y "Sofia" en hebreo
-# Es importante notar que la transliteración directa de nombres propios o términos específicos entre idiomas
-# puede no ser directa o puede requerir ajustes basados en la fonética o el uso histórico.
-# Por simplificación, supongamos transliteraciones hipotéticas para "Baphomet" a "Sofia":
-# Estas transliteraciones son ejemplos y pueden no reflejar transliteraciones precisas.
-baphomet_hebrew = 'בפומת'  # Esta es una transliteración hipotética para "Baphomet"
-sofia_hebrew = 'סופיא'  # Esta es una transliteración hipotética para "Sofia"
-jesus ="ישוע"
-christ = ""
-print(temurah(jesus,hebrew_alphabet))
-# Aplicar temurah al texto hipotético de "Baphomet"
-temurah_baphomet = temurah(baphomet_hebrew, hebrew_alphabet)
-# Mostrar resultados
-print(temurah_baphomet+"\n"+sofia_hebrew)
-genesis = json.loads(open("genesis.json","r").read())["text"][0]
-##example_text = "בראשית ברא אלהים את השמים ואת הארץ"  # "En el principio Dios creó los cielos y la tierra."
-#for txt in genesis:
-#    print(temurah(txt,hebrew_alphabet))

utils.py CHANGED Viewed

@@ -8,8 +8,20 @@ import inflect
 from datetime import datetime
 from deep_translator import GoogleTranslator
-def process_json_files(start, end, step, rounds="1", length=0, tlang="en", strip_spaces=True,
-                      strip_in_braces=True, strip_diacritics=True, average_compile=False, translate_results=False):
     """Processes JSON files to extract and process text.
     Args:
@@ -23,7 +35,7 @@ def process_json_files(start, end, step, rounds="1", length=0, tlang="en", strip
         strip_in_braces (bool, optional): Whether to strip text in braces. Defaults to True.
         strip_diacritics (bool, optional): Whether to strip diacritics. Defaults to True.
         average_compile (bool, optional): Whether to average-combine results. Defaults to False.
-        translate_results (bool, optional): Whether to translate the results. Defaults to False.
     Returns:
         list: A list of processed results.
@@ -57,19 +69,19 @@ def process_json_files(start, end, step, rounds="1", length=0, tlang="en", strip
                     clean_text = clean_text.replace("  ", " ")
                 text_length = len(clean_text)
                 selected_characters_per_round = {}
                 for round_num in map(int, rounds.split(',')):
                     # Handle cases where no characters should be selected
                     if not (round_num == 1 and step > text_length) and not (round_num == -1 and step > text_length):
                         # Corrected logic for negative rounds and step = 1
                         if round_num > 0:
-                            current_position = step - 1
                         else:
                             current_position = text_length - 1 if step == 1 else text_length - step
                         completed_rounds = 0
-                        selected_characters = ""
                         while completed_rounds < abs(round_num):
                             selected_characters += clean_text[current_position % text_length]
@@ -78,16 +90,17 @@ def process_json_files(start, end, step, rounds="1", length=0, tlang="en", strip
                             current_position += step if round_num > 0 else -step
                             if (round_num > 0 and current_position >= text_length * (completed_rounds + 1)) or \
-                               (round_num < 0 and current_position < 0):
                                 completed_rounds += 1
                         selected_characters_per_round[round_num] = selected_characters
                 if average_compile and len(selected_characters_per_round) > 1:
                     result_text = ""
                     keys = sorted(selected_characters_per_round.keys())
                     for i in range(len(keys) - 1):
-                        result_text = average_gematria(selected_characters_per_round[keys[i]], selected_characters_per_round[keys[i+1]])
                 else:
                     result_text = ''.join(selected_characters_per_round.values())
@@ -103,7 +116,7 @@ def process_json_files(start, end, step, rounds="1", length=0, tlang="en", strip
                         "title": data["title"],
                         "els_result_text": result_text,
                         "els_result_gematria": calculate_gematria(result_text),
-                        "translated_text": translated_text
                     })
         except FileNotFoundError:

 from datetime import datetime
 from deep_translator import GoogleTranslator
+import logging
+logger = logging.getLogger(__name__)
+import json
+import re
+from gematria import calculate_gematria
+import inflect
+from datetime import datetime
+from deep_translator import GoogleTranslator
+def process_json_files(start, end, step, rounds="1", length=0, tlang="en", strip_spaces=True,
+                       strip_in_braces=True, strip_diacritics=True, average_compile=False, translate_results=False):
     """Processes JSON files to extract and process text.
     Args:
         strip_in_braces (bool, optional): Whether to strip text in braces. Defaults to True.
         strip_diacritics (bool, optional): Whether to strip diacritics. Defaults to True.
         average_compile (bool, optional): Whether to average-combine results. Defaults to False.
+        translate_results (bool, optional): Whether to translate the results. Defaults to False.
     Returns:
         list: A list of processed results.
                     clean_text = clean_text.replace("  ", " ")
                 text_length = len(clean_text)
                 selected_characters_per_round = {}
                 for round_num in map(int, rounds.split(',')):
                     # Handle cases where no characters should be selected
                     if not (round_num == 1 and step > text_length) and not (round_num == -1 and step > text_length):
                         # Corrected logic for negative rounds and step = 1
                         if round_num > 0:
+                            current_position = step - 1
                         else:
                             current_position = text_length - 1 if step == 1 else text_length - step
                         completed_rounds = 0
+                        selected_characters = ""
                         while completed_rounds < abs(round_num):
                             selected_characters += clean_text[current_position % text_length]
                             current_position += step if round_num > 0 else -step
                             if (round_num > 0 and current_position >= text_length * (completed_rounds + 1)) or \
+                                    (round_num < 0 and current_position < 0):
                                 completed_rounds += 1
                         selected_characters_per_round[round_num] = selected_characters
                 if average_compile and len(selected_characters_per_round) > 1:
                     result_text = ""
                     keys = sorted(selected_characters_per_round.keys())
                     for i in range(len(keys) - 1):
+                        result_text = average_gematria(selected_characters_per_round[keys[i]],
+                                                       selected_characters_per_round[keys[i + 1]])
                 else:
                     result_text = ''.join(selected_characters_per_round.values())
                         "title": data["title"],
                         "els_result_text": result_text,
                         "els_result_gematria": calculate_gematria(result_text),
+                        "translated_text": translated_text
                     })
         except FileNotFoundError: