Spaces:
Runtime error
Runtime error
neuralworm
commited on
Commit
•
2a65456
1
Parent(s):
40288c3
speed up search
Browse files- .gitignore +2 -0
- app.py +106 -204
- temuraeh.py +0 -59
- utils.py +23 -10
.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
__pycache__
|
2 |
+
.idea
|
app.py
CHANGED
@@ -1,34 +1,31 @@
|
|
1 |
-
import logging
|
2 |
import json
|
|
|
3 |
import math
|
4 |
-
import re
|
5 |
from datetime import datetime, timedelta
|
6 |
|
|
|
7 |
import pandas as pd
|
8 |
from deep_translator import GoogleTranslator
|
9 |
from gradio_calendar import Calendar
|
10 |
-
import gradio as gr
|
11 |
-
import torah
|
12 |
|
13 |
from gematria import calculate_gematria, strip_diacritics
|
14 |
from utils import (
|
15 |
-
number_to_ordinal_word,
|
16 |
-
custom_normalize,
|
17 |
date_to_words,
|
18 |
translate_date_to_words,
|
19 |
process_json_files
|
20 |
)
|
21 |
|
22 |
-
|
23 |
# --- Constants ---
|
24 |
FORBIDDEN_NAMES_FILE = "c.txt"
|
|
|
25 |
|
26 |
logger = logging.getLogger(__name__)
|
27 |
logging.basicConfig(level=logging.DEBUG)
|
28 |
|
|
|
29 |
# --- Helper Functions ---
|
30 |
|
31 |
-
def create_language_dropdown(label: str, default_value: str =
|
32 |
"""Creates a Gradio dropdown menu for language selection.
|
33 |
|
34 |
Args:
|
@@ -39,7 +36,7 @@ def create_language_dropdown(label: str, default_value: str = 'en', show_label:
|
|
39 |
Returns:
|
40 |
gr.Dropdown: The Gradio dropdown component.
|
41 |
"""
|
42 |
-
languages = GoogleTranslator(
|
43 |
return gr.Dropdown(
|
44 |
choices=list(languages.keys()),
|
45 |
label=label,
|
@@ -47,60 +44,31 @@ def create_language_dropdown(label: str, default_value: str = 'en', show_label:
|
|
47 |
show_label=show_label
|
48 |
)
|
49 |
|
50 |
-
def calculate_gematria_sum(text: str, date_words: str) -> int:
|
51 |
-
"""Calculates the Gematria sum for a text and date words.
|
52 |
-
|
53 |
-
Args:
|
54 |
-
text (str): The text for Gematria calculation.
|
55 |
-
date_words (str): The date in words for Gematria calculation.
|
56 |
|
57 |
-
|
58 |
-
|
59 |
-
"""
|
60 |
combined_input = f"{text} {date_words}"
|
61 |
logger.info(f"Combined input for Gematria: {combined_input}")
|
62 |
sum_value = calculate_gematria(strip_diacritics(combined_input))
|
63 |
logger.info(f"Gematria sum: {sum_value}")
|
64 |
return sum_value
|
65 |
|
66 |
-
def perform_els_search(start: int, end: int, step: int, rounds: int, length: int, tlang: str,
|
67 |
-
strip_spaces: bool, strip_in_braces: bool, strip_diacritics: bool, average_combine: bool,
|
68 |
-
search_word: str, date_words: str) -> list:
|
69 |
-
"""Performs the ELS search and filters by the Yiddish search word.
|
70 |
-
|
71 |
-
Args:
|
72 |
-
start (int): The starting book number.
|
73 |
-
end (int): The ending book number.
|
74 |
-
step (int): The step/jump width for ELS.
|
75 |
-
rounds (int): The number of rounds through the books.
|
76 |
-
length (int): The desired length of the results (0 for infinite).
|
77 |
-
tlang (str): The target language for translation.
|
78 |
-
strip_spaces (bool): Whether to strip spaces from book content.
|
79 |
-
strip_in_braces (bool): Whether to strip text within braces from book content.
|
80 |
-
strip_diacritics (bool): Whether to strip diacritics from book content.
|
81 |
-
average_combine (bool): Whether to average-combine the results of combined rounds.
|
82 |
-
search_word (str): The word to search for.
|
83 |
-
date_words (str): The date in words.
|
84 |
|
85 |
-
|
86 |
-
|
87 |
-
|
|
|
88 |
|
89 |
logger.info("Starting ELS search...")
|
90 |
-
logger.debug(f"Search word (
|
91 |
-
|
92 |
-
# Translate the search word to Yiddish
|
93 |
-
translator_yi = GoogleTranslator(source='auto', target='yi')
|
94 |
-
search_word_yiddish = translator_yi.translate(search_word)
|
95 |
-
logger.debug(f"Search word (Yiddish): {search_word_yiddish}")
|
96 |
|
97 |
if step == 0 or rounds == 0:
|
98 |
logger.info("Cannot search with step 0 or rounds 0")
|
99 |
-
return []
|
100 |
|
101 |
results = process_json_files(start, end, step, rounds, length, tlang, strip_spaces,
|
102 |
-
|
103 |
-
|
104 |
|
105 |
# Filter results by search word in els_result_text (Yiddish)
|
106 |
filtered_results = []
|
@@ -109,38 +77,18 @@ def perform_els_search(start: int, end: int, step: int, rounds: int, length: int
|
|
109 |
if 'els_result_text' in result and search_word_yiddish in result['els_result_text']:
|
110 |
filtered_results.append({
|
111 |
'Date': date_words,
|
112 |
-
'Book Result': result['els_result_text'],
|
113 |
-
'Result': result.get('translated_text', '')
|
114 |
})
|
115 |
|
116 |
return filtered_results
|
117 |
|
118 |
|
119 |
def generate_json_dump(start: int, end: int, step: int, rounds: int, length: int, tlang: str,
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
"""Generates the JSON dump with configuration, date range, and results.
|
124 |
-
|
125 |
-
Args:
|
126 |
-
start (int): The starting book number.
|
127 |
-
end (int): The ending book number.
|
128 |
-
step (int): The step/jump width for ELS.
|
129 |
-
rounds (int): The number of rounds through the books.
|
130 |
-
length (int): The desired length of the results (0 for infinite).
|
131 |
-
tlang (str): The target language for translation.
|
132 |
-
strip_spaces (bool): Whether to strip spaces from book content.
|
133 |
-
strip_in_braces (bool): Whether to strip text within braces from book content.
|
134 |
-
strip_diacritics_chk (bool): Whether to strip diacritics from book content.
|
135 |
-
search_phrase (str): The search phrase used.
|
136 |
-
results_df (pd.DataFrame): The DataFrame containing the results.
|
137 |
-
search_word (str): The word to search for.
|
138 |
-
start_date (datetime): The start date of the search.
|
139 |
-
end_date (datetime): The end date of the search.
|
140 |
-
|
141 |
-
Returns:
|
142 |
-
str: The JSON dump as a string.
|
143 |
-
"""
|
144 |
config = {
|
145 |
"Start Book": start,
|
146 |
"End Book": end,
|
@@ -167,20 +115,8 @@ def generate_json_dump(start: int, end: int, step: int, rounds: int, length: int
|
|
167 |
|
168 |
|
169 |
def download_json_file(config_json: str, step: int, rounds: int,
|
170 |
-
|
171 |
-
"""Downloads the JSON config file with a descriptive name.
|
172 |
-
|
173 |
-
Args:
|
174 |
-
config_json (str): The JSON configuration data.
|
175 |
-
step (int): The step/jump width for ELS.
|
176 |
-
rounds (int): The number of rounds through the books.
|
177 |
-
strip_spaces (bool): Whether spaces were stripped.
|
178 |
-
strip_in_braces (bool): Whether text in braces was stripped.
|
179 |
-
strip_diacritics_chk (bool): Whether diacritics were stripped.
|
180 |
-
|
181 |
-
Returns:
|
182 |
-
str: The path to the downloaded file.
|
183 |
-
"""
|
184 |
filename_suffix = ""
|
185 |
if strip_spaces:
|
186 |
filename_suffix += "-stSp"
|
@@ -188,24 +124,17 @@ def download_json_file(config_json: str, step: int, rounds: int,
|
|
188 |
filename_suffix += "-stBr"
|
189 |
if strip_diacritics_chk:
|
190 |
filename_suffix += "-stDc"
|
191 |
-
file_path = f"step-{step}-rounds-{rounds}{filename_suffix}.json"
|
192 |
with open(file_path, "w", encoding='utf-8') as file:
|
193 |
file.write(config_json)
|
194 |
logger.info(f"Downloaded JSON file to: {file_path}")
|
195 |
return file_path
|
196 |
|
|
|
197 |
# --- Forbidden Names Functions ---
|
198 |
|
199 |
def load_forbidden_names(filename: str = FORBIDDEN_NAMES_FILE) -> list:
|
200 |
-
"""Loads forbidden names from the specified file.
|
201 |
-
|
202 |
-
Args:
|
203 |
-
filename (str, optional): The path to the file containing forbidden names.
|
204 |
-
Defaults to FORBIDDEN_NAMES_FILE.
|
205 |
-
|
206 |
-
Returns:
|
207 |
-
list: A list of forbidden names.
|
208 |
-
"""
|
209 |
try:
|
210 |
with open(filename, "r", encoding='utf-8') as f:
|
211 |
forbidden_names = [line.strip() for line in f]
|
@@ -216,16 +145,7 @@ def load_forbidden_names(filename: str = FORBIDDEN_NAMES_FILE) -> list:
|
|
216 |
|
217 |
|
218 |
def check_name_similarity(name: str, forbidden_names: list, threshold: int = 80) -> bool:
|
219 |
-
"""Checks if a name is similar to any forbidden name.
|
220 |
-
|
221 |
-
Args:
|
222 |
-
name (str): The name to check.
|
223 |
-
forbidden_names (list): A list of forbidden names.
|
224 |
-
threshold (int, optional): The similarity threshold (0-100). Defaults to 80.
|
225 |
-
|
226 |
-
Returns:
|
227 |
-
bool: True if the name is similar to a forbidden name, False otherwise.
|
228 |
-
"""
|
229 |
from fuzzywuzzy import fuzz
|
230 |
for forbidden_name in forbidden_names:
|
231 |
similarity_ratio = fuzz.ratio(name.lower(), forbidden_name.lower())
|
@@ -234,13 +154,14 @@ def check_name_similarity(name: str, forbidden_names: list, threshold: int = 80)
|
|
234 |
return True
|
235 |
return False
|
236 |
|
|
|
237 |
# --- Gradio UI ---
|
238 |
|
239 |
with gr.Blocks() as app:
|
240 |
with gr.Row():
|
241 |
start_date = Calendar(type="datetime", label="1. Select Start Date")
|
242 |
end_date = Calendar(type="datetime", label="2. Select End Date")
|
243 |
-
date_language_input = create_language_dropdown("3. Date Word Language
|
244 |
search_word = gr.Textbox(label="4. Search Word")
|
245 |
|
246 |
with gr.Row():
|
@@ -254,7 +175,7 @@ with gr.Blocks() as app:
|
|
254 |
end = gr.Number(label="End Book", value=39)
|
255 |
step = gr.Number(label="Jump Width (Steps) for ELS")
|
256 |
rounds = gr.Number(label="Rounds through Books", value=1)
|
257 |
-
float_step = gr.Number(visible=False, value=1)
|
258 |
half_step_btn = gr.Button("Steps / 2")
|
259 |
double_step_btn = gr.Button("Steps * 2")
|
260 |
|
@@ -262,26 +183,25 @@ with gr.Blocks() as app:
|
|
262 |
round_x = gr.Number(label="Round (x)", value=1)
|
263 |
round_y = gr.Number(label="Round (y)", value=-1)
|
264 |
|
265 |
-
average_combine_chk = gr.Checkbox(label="Average-Combine Combined Rounds
|
266 |
mirror_book_numbers = gr.Checkbox(label="Mirror book numbers for negative rounds (axis=book 20)", value=False)
|
267 |
|
268 |
rounds_combination = gr.Textbox(label="Combined Rounds", value="1,-1")
|
269 |
|
270 |
-
|
271 |
with gr.Row():
|
272 |
length = gr.Number(label="Result Length (0=inf)", value=0)
|
273 |
-
tlang = create_language_dropdown("Target Language for Translation", default_value=
|
274 |
strip_spaces = gr.Checkbox(label="Strip Spaces from Books", value=True)
|
275 |
strip_in_braces = gr.Checkbox(label="Strip Text in Braces from Books", value=True)
|
276 |
strip_diacritics_chk = gr.Checkbox(label="Strip Diacritics from Books", value=True)
|
277 |
acknowledgment_chk = gr.Checkbox(
|
278 |
-
label="The User hereby accepts that the User will not harm or stalk anyone with this information, or bet on any of this information, in any regards.",
|
279 |
value=True
|
280 |
)
|
281 |
|
282 |
translate_btn = gr.Button("7. Search with ELS")
|
283 |
|
284 |
-
results_output = gr.Dataframe(headers=['Date', 'Book Result', 'Result'], label="Results")
|
285 |
json_output = gr.Textbox(label="JSON Configuration Output")
|
286 |
json_download_btn = gr.Button("Prepare .json for Download")
|
287 |
json_file = gr.File(label="Download Config JSON", file_count="single")
|
@@ -290,140 +210,121 @@ with gr.Blocks() as app:
|
|
290 |
|
291 |
forbidden_names = load_forbidden_names()
|
292 |
|
|
|
293 |
# --- Event Handlers ---
|
294 |
|
295 |
def update_rounds_combination(round_x: int, round_y: int) -> str:
|
296 |
-
"""Updates the rounds_combination textbox based on round_x and round_y.
|
297 |
-
|
298 |
-
Args:
|
299 |
-
round_x (int): The value of round x.
|
300 |
-
round_y (int): The value of round y.
|
301 |
-
|
302 |
-
Returns:
|
303 |
-
str: The combined rounds string.
|
304 |
-
"""
|
305 |
return f"{int(round_x)},{int(round_y)}"
|
306 |
|
307 |
|
308 |
def calculate_journal_sum(text: str, date_words: str) -> tuple:
|
309 |
-
"""Calculates the journal sum and updates the step value.
|
310 |
-
|
311 |
-
Args:
|
312 |
-
text (str): The input text for calculation.
|
313 |
-
date_words (str): The date in words.
|
314 |
-
|
315 |
-
Returns:
|
316 |
-
tuple: A tuple containing the journal sum, step, and float_step.
|
317 |
-
"""
|
318 |
if check_name_similarity(text, forbidden_names):
|
319 |
-
return 0, 0, 0
|
320 |
if check_name_similarity(date_words, forbidden_names):
|
321 |
-
return 0, 0, 0
|
322 |
sum_value = calculate_gematria_sum(text, date_words)
|
323 |
-
return sum_value, sum_value, sum_value
|
324 |
-
|
325 |
-
def update_step_half(float_step: float) -> tuple:
|
326 |
-
"""Updates the step value to half.
|
327 |
|
328 |
-
Args:
|
329 |
-
float_step (float): The current float step value.
|
330 |
|
331 |
-
|
332 |
-
|
333 |
-
"""
|
334 |
new_step = math.ceil(float_step / 2)
|
335 |
return new_step, float_step / 2
|
336 |
|
337 |
-
def update_step_double(float_step: float) -> tuple:
|
338 |
-
"""Updates the step value to double.
|
339 |
-
|
340 |
-
Args:
|
341 |
-
float_step (float): The current float step value.
|
342 |
|
343 |
-
|
344 |
-
|
345 |
-
"""
|
346 |
new_step = math.ceil(float_step * 2)
|
347 |
return new_step, float_step * 2
|
348 |
|
|
|
349 |
# Update rounds_combination when round_x or round_y changes
|
350 |
round_x.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination)
|
351 |
round_y.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination)
|
352 |
|
353 |
|
354 |
def handle_json_download(config_json: str, step: int, rounds: int, strip_spaces: bool,
|
355 |
-
|
356 |
-
"""Handles the download of the JSON config file.
|
357 |
-
|
358 |
-
Args:
|
359 |
-
config_json (str): The JSON configuration data.
|
360 |
-
step (int): The step/jump width for ELS.
|
361 |
-
rounds (int): The number of rounds through the books.
|
362 |
-
strip_spaces (bool): Whether spaces were stripped.
|
363 |
-
strip_in_braces (bool): Whether text in braces was stripped.
|
364 |
-
strip_diacritics_chk (bool): Whether diacritics were stripped.
|
365 |
-
|
366 |
-
Returns:
|
367 |
-
str: The path to the downloaded file.
|
368 |
-
"""
|
369 |
return download_json_file(config_json, step, rounds, strip_spaces, strip_in_braces, strip_diacritics_chk)
|
370 |
|
|
|
371 |
def perform_search_and_create_json(start_date: datetime, end_date: datetime, date_language_input: str,
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
Args:
|
378 |
-
start_date (datetime): The start date for the search.
|
379 |
-
end_date (datetime): The end date for the search.
|
380 |
-
date_language_input (str): The language for the date words.
|
381 |
-
search_word (str): The word to search for.
|
382 |
-
start (int): The starting book number.
|
383 |
-
end (int): The ending book number.
|
384 |
-
step (int): The step/jump width for ELS.
|
385 |
-
rounds (int): The number of rounds through the books.
|
386 |
-
length (int): The desired length of the results (0 for infinite).
|
387 |
-
tlang (str): The target language for translation.
|
388 |
-
strip_spaces (bool): Whether to strip spaces from book content.
|
389 |
-
strip_in_braces (bool): Whether to strip text within braces from book content.
|
390 |
-
strip_diacritics_chk (bool): Whether to strip diacritics from book content.
|
391 |
-
gematria_text (str): The text for Gematria calculation.
|
392 |
-
average_combine (bool): Whether to average-combine the results of combined rounds.
|
393 |
-
|
394 |
-
Returns:
|
395 |
-
tuple: A tuple containing the JSON configuration and the results DataFrame.
|
396 |
-
"""
|
397 |
all_results = []
|
398 |
delta = timedelta(days=1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
399 |
while start_date <= end_date:
|
400 |
-
date_words_output =
|
401 |
-
|
402 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
403 |
|
404 |
filtered_results = perform_els_search(start, end, step, rounds, length, tlang, strip_spaces,
|
405 |
-
strip_in_braces, strip_diacritics_chk, average_combine,
|
|
|
406 |
date_words_output)
|
407 |
-
|
|
|
|
|
|
|
|
|
408 |
start_date += delta
|
409 |
|
|
|
410 |
if all_results:
|
411 |
df = pd.DataFrame(all_results)
|
412 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
413 |
# Translate the 'Book Result' column to the target language
|
414 |
-
translator = GoogleTranslator(source='yi', target=tlang)
|
415 |
df['Result'] = df['Book Result'].apply(translator.translate)
|
416 |
|
417 |
-
config_json = generate_json_dump(start, end,
|
418 |
-
|
419 |
-
|
420 |
return config_json, df
|
421 |
else:
|
422 |
-
return "No results found.", None
|
|
|
423 |
|
424 |
gematria_btn.click(
|
425 |
calculate_journal_sum,
|
426 |
-
inputs=[gematria_text, date_language_input],
|
427 |
outputs=[gematria_result, step, float_step]
|
428 |
)
|
429 |
|
@@ -441,7 +342,8 @@ with gr.Blocks() as app:
|
|
441 |
|
442 |
translate_btn.click(
|
443 |
perform_search_and_create_json,
|
444 |
-
inputs=[start_date, end_date, date_language_input, search_word, start, end, step, rounds_combination, length,
|
|
|
445 |
strip_in_braces, strip_diacritics_chk, gematria_text, average_combine_chk],
|
446 |
outputs=[json_output, results_output]
|
447 |
)
|
@@ -453,4 +355,4 @@ with gr.Blocks() as app:
|
|
453 |
)
|
454 |
|
455 |
if __name__ == "__main__":
|
456 |
-
app.launch(share=False)
|
|
|
|
|
1 |
import json
|
2 |
+
import logging
|
3 |
import math
|
|
|
4 |
from datetime import datetime, timedelta
|
5 |
|
6 |
+
import gradio as gr
|
7 |
import pandas as pd
|
8 |
from deep_translator import GoogleTranslator
|
9 |
from gradio_calendar import Calendar
|
|
|
|
|
10 |
|
11 |
from gematria import calculate_gematria, strip_diacritics
|
12 |
from utils import (
|
|
|
|
|
13 |
date_to_words,
|
14 |
translate_date_to_words,
|
15 |
process_json_files
|
16 |
)
|
17 |
|
|
|
18 |
# --- Constants ---
|
19 |
FORBIDDEN_NAMES_FILE = "c.txt"
|
20 |
+
DEFAULT_LANGUAGE = 'english'
|
21 |
|
22 |
logger = logging.getLogger(__name__)
|
23 |
logging.basicConfig(level=logging.DEBUG)
|
24 |
|
25 |
+
|
26 |
# --- Helper Functions ---
|
27 |
|
28 |
+
def create_language_dropdown(label: str, default_value: str = DEFAULT_LANGUAGE, show_label: bool = True) -> gr.Dropdown:
|
29 |
"""Creates a Gradio dropdown menu for language selection.
|
30 |
|
31 |
Args:
|
|
|
36 |
Returns:
|
37 |
gr.Dropdown: The Gradio dropdown component.
|
38 |
"""
|
39 |
+
languages = GoogleTranslator().get_supported_languages(as_dict=True)
|
40 |
return gr.Dropdown(
|
41 |
choices=list(languages.keys()),
|
42 |
label=label,
|
|
|
44 |
show_label=show_label
|
45 |
)
|
46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
+
def calculate_gematria_sum(text: str, date_words: str) -> int:
|
49 |
+
"""Calculates the Gematria sum for a text and date words."""
|
|
|
50 |
combined_input = f"{text} {date_words}"
|
51 |
logger.info(f"Combined input for Gematria: {combined_input}")
|
52 |
sum_value = calculate_gematria(strip_diacritics(combined_input))
|
53 |
logger.info(f"Gematria sum: {sum_value}")
|
54 |
return sum_value
|
55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
|
57 |
+
def perform_els_search(start: int, end: int, step: int, rounds: int, length: int, tlang: str,
|
58 |
+
strip_spaces: bool, strip_in_braces: bool, strip_diacritics: bool, average_combine: bool,
|
59 |
+
search_word_yiddish: str, date_words: str) -> list: # Accept Yiddish word
|
60 |
+
"""Performs the ELS search and filters by the Yiddish search word."""
|
61 |
|
62 |
logger.info("Starting ELS search...")
|
63 |
+
logger.debug(f"Search word (Yiddish): {search_word_yiddish}") # No translation here
|
|
|
|
|
|
|
|
|
|
|
64 |
|
65 |
if step == 0 or rounds == 0:
|
66 |
logger.info("Cannot search with step 0 or rounds 0")
|
67 |
+
return []
|
68 |
|
69 |
results = process_json_files(start, end, step, rounds, length, tlang, strip_spaces,
|
70 |
+
strip_in_braces, strip_diacritics, average_combine,
|
71 |
+
translate_results=False)
|
72 |
|
73 |
# Filter results by search word in els_result_text (Yiddish)
|
74 |
filtered_results = []
|
|
|
77 |
if 'els_result_text' in result and search_word_yiddish in result['els_result_text']:
|
78 |
filtered_results.append({
|
79 |
'Date': date_words,
|
80 |
+
'Book Result': result['els_result_text'],
|
81 |
+
'Result': result.get('translated_text', '')
|
82 |
})
|
83 |
|
84 |
return filtered_results
|
85 |
|
86 |
|
87 |
def generate_json_dump(start: int, end: int, step: int, rounds: int, length: int, tlang: str,
|
88 |
+
strip_spaces: bool, strip_in_braces: bool, strip_diacritics_chk: bool,
|
89 |
+
search_phrase: str, results_df: pd.DataFrame, search_word: str,
|
90 |
+
start_date: datetime, end_date: datetime) -> str:
|
91 |
+
"""Generates the JSON dump with configuration, date range, and results."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
config = {
|
93 |
"Start Book": start,
|
94 |
"End Book": end,
|
|
|
115 |
|
116 |
|
117 |
def download_json_file(config_json: str, step: int, rounds: int,
|
118 |
+
strip_spaces: bool, strip_in_braces: bool, strip_diacritics_chk: bool) -> str:
|
119 |
+
"""Downloads the JSON config file with a descriptive name."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
filename_suffix = ""
|
121 |
if strip_spaces:
|
122 |
filename_suffix += "-stSp"
|
|
|
124 |
filename_suffix += "-stBr"
|
125 |
if strip_diacritics_chk:
|
126 |
filename_suffix += "-stDc"
|
127 |
+
file_path = f"step-{step}-rounds-{rounds}{filename_suffix}.json"
|
128 |
with open(file_path, "w", encoding='utf-8') as file:
|
129 |
file.write(config_json)
|
130 |
logger.info(f"Downloaded JSON file to: {file_path}")
|
131 |
return file_path
|
132 |
|
133 |
+
|
134 |
# --- Forbidden Names Functions ---
|
135 |
|
136 |
def load_forbidden_names(filename: str = FORBIDDEN_NAMES_FILE) -> list:
|
137 |
+
"""Loads forbidden names from the specified file."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
138 |
try:
|
139 |
with open(filename, "r", encoding='utf-8') as f:
|
140 |
forbidden_names = [line.strip() for line in f]
|
|
|
145 |
|
146 |
|
147 |
def check_name_similarity(name: str, forbidden_names: list, threshold: int = 80) -> bool:
|
148 |
+
"""Checks if a name is similar to any forbidden name."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
from fuzzywuzzy import fuzz
|
150 |
for forbidden_name in forbidden_names:
|
151 |
similarity_ratio = fuzz.ratio(name.lower(), forbidden_name.lower())
|
|
|
154 |
return True
|
155 |
return False
|
156 |
|
157 |
+
|
158 |
# --- Gradio UI ---
|
159 |
|
160 |
with gr.Blocks() as app:
|
161 |
with gr.Row():
|
162 |
start_date = Calendar(type="datetime", label="1. Select Start Date")
|
163 |
end_date = Calendar(type="datetime", label="2. Select End Date")
|
164 |
+
date_language_input = create_language_dropdown("3. Date Word Language", default_value=DEFAULT_LANGUAGE)
|
165 |
search_word = gr.Textbox(label="4. Search Word")
|
166 |
|
167 |
with gr.Row():
|
|
|
175 |
end = gr.Number(label="End Book", value=39)
|
176 |
step = gr.Number(label="Jump Width (Steps) for ELS")
|
177 |
rounds = gr.Number(label="Rounds through Books", value=1)
|
178 |
+
float_step = gr.Number(visible=False, value=1)
|
179 |
half_step_btn = gr.Button("Steps / 2")
|
180 |
double_step_btn = gr.Button("Steps * 2")
|
181 |
|
|
|
183 |
round_x = gr.Number(label="Round (x)", value=1)
|
184 |
round_y = gr.Number(label="Round (y)", value=-1)
|
185 |
|
186 |
+
average_combine_chk = gr.Checkbox(label="Average-Combine Combined Rounds", value=False)
|
187 |
mirror_book_numbers = gr.Checkbox(label="Mirror book numbers for negative rounds (axis=book 20)", value=False)
|
188 |
|
189 |
rounds_combination = gr.Textbox(label="Combined Rounds", value="1,-1")
|
190 |
|
|
|
191 |
with gr.Row():
|
192 |
length = gr.Number(label="Result Length (0=inf)", value=0)
|
193 |
+
tlang = create_language_dropdown("Target Language for Translation", default_value=DEFAULT_LANGUAGE)
|
194 |
strip_spaces = gr.Checkbox(label="Strip Spaces from Books", value=True)
|
195 |
strip_in_braces = gr.Checkbox(label="Strip Text in Braces from Books", value=True)
|
196 |
strip_diacritics_chk = gr.Checkbox(label="Strip Diacritics from Books", value=True)
|
197 |
acknowledgment_chk = gr.Checkbox(
|
198 |
+
label="The User hereby accepts that the User will not harm or stalk anyone with this information, or bet on any of this information, in any regards.",
|
199 |
value=True
|
200 |
)
|
201 |
|
202 |
translate_btn = gr.Button("7. Search with ELS")
|
203 |
|
204 |
+
results_output = gr.Dataframe(headers=['Date', 'Book Result', 'Result'], label="Results")
|
205 |
json_output = gr.Textbox(label="JSON Configuration Output")
|
206 |
json_download_btn = gr.Button("Prepare .json for Download")
|
207 |
json_file = gr.File(label="Download Config JSON", file_count="single")
|
|
|
210 |
|
211 |
forbidden_names = load_forbidden_names()
|
212 |
|
213 |
+
|
214 |
# --- Event Handlers ---
|
215 |
|
216 |
def update_rounds_combination(round_x: int, round_y: int) -> str:
|
217 |
+
"""Updates the rounds_combination textbox based on round_x and round_y."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
218 |
return f"{int(round_x)},{int(round_y)}"
|
219 |
|
220 |
|
221 |
def calculate_journal_sum(text: str, date_words: str) -> tuple:
|
222 |
+
"""Calculates the journal sum and updates the step value."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
223 |
if check_name_similarity(text, forbidden_names):
|
224 |
+
return 0, 0, 0
|
225 |
if check_name_similarity(date_words, forbidden_names):
|
226 |
+
return 0, 0, 0
|
227 |
sum_value = calculate_gematria_sum(text, date_words)
|
228 |
+
return sum_value, sum_value, sum_value
|
|
|
|
|
|
|
229 |
|
|
|
|
|
230 |
|
231 |
+
def update_step_half(float_step: float) -> tuple:
|
232 |
+
"""Updates the step value to half."""
|
|
|
233 |
new_step = math.ceil(float_step / 2)
|
234 |
return new_step, float_step / 2
|
235 |
|
|
|
|
|
|
|
|
|
|
|
236 |
|
237 |
+
def update_step_double(float_step: float) -> tuple:
|
238 |
+
"""Updates the step value to double."""
|
|
|
239 |
new_step = math.ceil(float_step * 2)
|
240 |
return new_step, float_step * 2
|
241 |
|
242 |
+
|
243 |
# Update rounds_combination when round_x or round_y changes
|
244 |
round_x.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination)
|
245 |
round_y.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination)
|
246 |
|
247 |
|
248 |
def handle_json_download(config_json: str, step: int, rounds: int, strip_spaces: bool,
|
249 |
+
strip_in_braces: bool, strip_diacritics_chk: bool) -> str:
|
250 |
+
"""Handles the download of the JSON config file."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
251 |
return download_json_file(config_json, step, rounds, strip_spaces, strip_in_braces, strip_diacritics_chk)
|
252 |
|
253 |
+
|
254 |
def perform_search_and_create_json(start_date: datetime, end_date: datetime, date_language_input: str,
|
255 |
+
search_word: str, start: int, end: int, step: int, rounds: int, length: int,
|
256 |
+
tlang: str, strip_spaces: bool, strip_in_braces: bool,
|
257 |
+
strip_diacritics_chk: bool,
|
258 |
+
gematria_text: str, average_combine: bool) -> tuple:
|
259 |
+
"""Performs the ELS search for each date in the range, creates the JSON config, and displays the results."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
260 |
all_results = []
|
261 |
delta = timedelta(days=1)
|
262 |
+
original_start_date = start_date
|
263 |
+
total_steps = 0
|
264 |
+
|
265 |
+
# Translate the search word to Yiddish ONLY ONCE (outside the loop)
|
266 |
+
translator_yi = GoogleTranslator(source='auto', target='yi')
|
267 |
+
search_word_yiddish = translator_yi.translate(search_word)
|
268 |
+
|
269 |
+
seen_dates = set() # Keep track of processed dates
|
270 |
+
|
271 |
while start_date <= end_date:
|
272 |
+
date_words_output = date_to_words(start_date.strftime("%Y-%m-%d"))
|
273 |
+
|
274 |
+
# Only translate if the date language is not English
|
275 |
+
if date_language_input.lower() != DEFAULT_LANGUAGE:
|
276 |
+
date_words_output = translate_date_to_words(start_date, date_language_input)
|
277 |
+
|
278 |
+
# Skip if date has already been processed
|
279 |
+
if date_words_output in seen_dates:
|
280 |
+
start_date += delta
|
281 |
+
continue
|
282 |
+
seen_dates.add(date_words_output)
|
283 |
+
|
284 |
+
journal_sum, _, _ = calculate_journal_sum(gematria_text, date_words_output)
|
285 |
+
step = journal_sum
|
286 |
+
total_steps += step
|
287 |
|
288 |
filtered_results = perform_els_search(start, end, step, rounds, length, tlang, strip_spaces,
|
289 |
+
strip_in_braces, strip_diacritics_chk, average_combine,
|
290 |
+
search_word_yiddish, # Pass the translated Yiddish word
|
291 |
date_words_output)
|
292 |
+
|
293 |
+
# Only add the first result for each date
|
294 |
+
if filtered_results:
|
295 |
+
all_results.append(filtered_results[0])
|
296 |
+
|
297 |
start_date += delta
|
298 |
|
299 |
+
# Process results after the loop completes
|
300 |
if all_results:
|
301 |
df = pd.DataFrame(all_results)
|
302 |
|
303 |
+
# Deduplicate steps
|
304 |
+
seen_steps = set()
|
305 |
+
deduplicated_results = []
|
306 |
+
for result in all_results:
|
307 |
+
step_key = (result['Date'], result['Book Result'])
|
308 |
+
if step_key not in seen_steps:
|
309 |
+
deduplicated_results.append(result)
|
310 |
+
seen_steps.add(step_key)
|
311 |
+
df = pd.DataFrame(deduplicated_results)
|
312 |
+
|
313 |
# Translate the 'Book Result' column to the target language
|
314 |
+
translator = GoogleTranslator(source='yi', target=tlang)
|
315 |
df['Result'] = df['Book Result'].apply(translator.translate)
|
316 |
|
317 |
+
config_json = generate_json_dump(start, end, total_steps, rounds, length, tlang, strip_spaces,
|
318 |
+
strip_in_braces, strip_diacritics_chk, gematria_text, df, search_word,
|
319 |
+
original_start_date, end_date)
|
320 |
return config_json, df
|
321 |
else:
|
322 |
+
return "No results found.", None
|
323 |
+
|
324 |
|
325 |
gematria_btn.click(
|
326 |
calculate_journal_sum,
|
327 |
+
inputs=[gematria_text, date_language_input],
|
328 |
outputs=[gematria_result, step, float_step]
|
329 |
)
|
330 |
|
|
|
342 |
|
343 |
translate_btn.click(
|
344 |
perform_search_and_create_json,
|
345 |
+
inputs=[start_date, end_date, date_language_input, search_word, start, end, step, rounds_combination, length,
|
346 |
+
tlang, strip_spaces,
|
347 |
strip_in_braces, strip_diacritics_chk, gematria_text, average_combine_chk],
|
348 |
outputs=[json_output, results_output]
|
349 |
)
|
|
|
355 |
)
|
356 |
|
357 |
if __name__ == "__main__":
|
358 |
+
app.launch(share=False)
|
temuraeh.py
DELETED
@@ -1,59 +0,0 @@
|
|
1 |
-
import json
|
2 |
-
# Implementemos la función de temurah con el alfabeto completo y probemos la conversión de "Baphomet" a "Sofia"
|
3 |
-
# en hebreo usando temurah.
|
4 |
-
# Nota: La representación exacta de "Baphomet" y "Sofia" en hebreo puede variar debido a interpretaciones,
|
5 |
-
# pero aquí usaremos transliteraciones aproximadas para ilustrar cómo podría hacerse.
|
6 |
-
|
7 |
-
def temurah(text, hebrew_alphabet='אבגדהוזחטיכלמנסעפצקרשת', reverse=False):
|
8 |
-
"""
|
9 |
-
Aplica la temurah a un texto hebreo utilizando todo el alfabeto hebreo.
|
10 |
-
El esquema de ejemplo simplemente invierte el orden del alfabeto.
|
11 |
-
"""
|
12 |
-
# Invertir el alfabeto si se solicita
|
13 |
-
if reverse:
|
14 |
-
hebrew_alphabet = hebrew_alphabet[::-1]
|
15 |
-
|
16 |
-
# Generar el alfabeto invertido
|
17 |
-
inverted_alphabet = hebrew_alphabet[::-1]
|
18 |
-
|
19 |
-
# Crear el diccionario de mapeo para temurah
|
20 |
-
temurah_mapping = {orig: inv for orig, inv in zip(hebrew_alphabet, inverted_alphabet)}
|
21 |
-
|
22 |
-
# Aplicar temurah al texto
|
23 |
-
temurah_text = ''.join(temurah_mapping.get(char, char) for char in text)
|
24 |
-
|
25 |
-
return temurah_text
|
26 |
-
|
27 |
-
# Definir el alfabeto hebreo
|
28 |
-
hebrew_alphabet = 'אבגדהוזחטיכלמנסעפצקרשת'
|
29 |
-
|
30 |
-
# Texto de ejemplo: "Baphomet" y "Sofia" en hebreo
|
31 |
-
# Es importante notar que la transliteración directa de nombres propios o términos específicos entre idiomas
|
32 |
-
# puede no ser directa o puede requerir ajustes basados en la fonética o el uso histórico.
|
33 |
-
|
34 |
-
# Por simplificación, supongamos transliteraciones hipotéticas para "Baphomet" a "Sofia":
|
35 |
-
# Estas transliteraciones son ejemplos y pueden no reflejar transliteraciones precisas.
|
36 |
-
baphomet_hebrew = 'בפומת' # Esta es una transliteración hipotética para "Baphomet"
|
37 |
-
sofia_hebrew = 'סופיא' # Esta es una transliteración hipotética para "Sofia"
|
38 |
-
|
39 |
-
|
40 |
-
jesus ="ישוע"
|
41 |
-
christ = ""
|
42 |
-
|
43 |
-
print(temurah(jesus,hebrew_alphabet))
|
44 |
-
# Aplicar temurah al texto hipotético de "Baphomet"
|
45 |
-
temurah_baphomet = temurah(baphomet_hebrew, hebrew_alphabet)
|
46 |
-
|
47 |
-
# Mostrar resultados
|
48 |
-
|
49 |
-
print(temurah_baphomet+"\n"+sofia_hebrew)
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
genesis = json.loads(open("genesis.json","r").read())["text"][0]
|
55 |
-
|
56 |
-
##example_text = "בראשית ברא אלהים את השמים ואת הארץ" # "En el principio Dios creó los cielos y la tierra."
|
57 |
-
#for txt in genesis:
|
58 |
-
# print(temurah(txt,hebrew_alphabet))
|
59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
utils.py
CHANGED
@@ -8,8 +8,20 @@ import inflect
|
|
8 |
from datetime import datetime
|
9 |
from deep_translator import GoogleTranslator
|
10 |
|
11 |
-
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
"""Processes JSON files to extract and process text.
|
14 |
|
15 |
Args:
|
@@ -23,7 +35,7 @@ def process_json_files(start, end, step, rounds="1", length=0, tlang="en", strip
|
|
23 |
strip_in_braces (bool, optional): Whether to strip text in braces. Defaults to True.
|
24 |
strip_diacritics (bool, optional): Whether to strip diacritics. Defaults to True.
|
25 |
average_compile (bool, optional): Whether to average-combine results. Defaults to False.
|
26 |
-
translate_results (bool, optional): Whether to translate the results. Defaults to False.
|
27 |
|
28 |
Returns:
|
29 |
list: A list of processed results.
|
@@ -57,19 +69,19 @@ def process_json_files(start, end, step, rounds="1", length=0, tlang="en", strip
|
|
57 |
clean_text = clean_text.replace(" ", " ")
|
58 |
|
59 |
text_length = len(clean_text)
|
60 |
-
|
61 |
selected_characters_per_round = {}
|
62 |
for round_num in map(int, rounds.split(',')):
|
63 |
# Handle cases where no characters should be selected
|
64 |
if not (round_num == 1 and step > text_length) and not (round_num == -1 and step > text_length):
|
65 |
# Corrected logic for negative rounds and step = 1
|
66 |
if round_num > 0:
|
67 |
-
current_position = step - 1
|
68 |
else:
|
69 |
current_position = text_length - 1 if step == 1 else text_length - step
|
70 |
|
71 |
completed_rounds = 0
|
72 |
-
selected_characters = ""
|
73 |
|
74 |
while completed_rounds < abs(round_num):
|
75 |
selected_characters += clean_text[current_position % text_length]
|
@@ -78,16 +90,17 @@ def process_json_files(start, end, step, rounds="1", length=0, tlang="en", strip
|
|
78 |
current_position += step if round_num > 0 else -step
|
79 |
|
80 |
if (round_num > 0 and current_position >= text_length * (completed_rounds + 1)) or \
|
81 |
-
|
82 |
completed_rounds += 1
|
83 |
|
84 |
selected_characters_per_round[round_num] = selected_characters
|
85 |
-
|
86 |
if average_compile and len(selected_characters_per_round) > 1:
|
87 |
result_text = ""
|
88 |
keys = sorted(selected_characters_per_round.keys())
|
89 |
for i in range(len(keys) - 1):
|
90 |
-
result_text = average_gematria(selected_characters_per_round[keys[i]],
|
|
|
91 |
else:
|
92 |
result_text = ''.join(selected_characters_per_round.values())
|
93 |
|
@@ -103,7 +116,7 @@ def process_json_files(start, end, step, rounds="1", length=0, tlang="en", strip
|
|
103 |
"title": data["title"],
|
104 |
"els_result_text": result_text,
|
105 |
"els_result_gematria": calculate_gematria(result_text),
|
106 |
-
"translated_text": translated_text
|
107 |
})
|
108 |
|
109 |
except FileNotFoundError:
|
|
|
8 |
from datetime import datetime
|
9 |
from deep_translator import GoogleTranslator
|
10 |
|
11 |
+
import logging
|
12 |
+
|
13 |
+
logger = logging.getLogger(__name__)
|
14 |
+
|
15 |
+
import json
|
16 |
+
import re
|
17 |
+
from gematria import calculate_gematria
|
18 |
+
import inflect
|
19 |
+
from datetime import datetime
|
20 |
+
from deep_translator import GoogleTranslator
|
21 |
+
|
22 |
+
|
23 |
+
def process_json_files(start, end, step, rounds="1", length=0, tlang="en", strip_spaces=True,
|
24 |
+
strip_in_braces=True, strip_diacritics=True, average_compile=False, translate_results=False):
|
25 |
"""Processes JSON files to extract and process text.
|
26 |
|
27 |
Args:
|
|
|
35 |
strip_in_braces (bool, optional): Whether to strip text in braces. Defaults to True.
|
36 |
strip_diacritics (bool, optional): Whether to strip diacritics. Defaults to True.
|
37 |
average_compile (bool, optional): Whether to average-combine results. Defaults to False.
|
38 |
+
translate_results (bool, optional): Whether to translate the results. Defaults to False.
|
39 |
|
40 |
Returns:
|
41 |
list: A list of processed results.
|
|
|
69 |
clean_text = clean_text.replace(" ", " ")
|
70 |
|
71 |
text_length = len(clean_text)
|
72 |
+
|
73 |
selected_characters_per_round = {}
|
74 |
for round_num in map(int, rounds.split(',')):
|
75 |
# Handle cases where no characters should be selected
|
76 |
if not (round_num == 1 and step > text_length) and not (round_num == -1 and step > text_length):
|
77 |
# Corrected logic for negative rounds and step = 1
|
78 |
if round_num > 0:
|
79 |
+
current_position = step - 1
|
80 |
else:
|
81 |
current_position = text_length - 1 if step == 1 else text_length - step
|
82 |
|
83 |
completed_rounds = 0
|
84 |
+
selected_characters = ""
|
85 |
|
86 |
while completed_rounds < abs(round_num):
|
87 |
selected_characters += clean_text[current_position % text_length]
|
|
|
90 |
current_position += step if round_num > 0 else -step
|
91 |
|
92 |
if (round_num > 0 and current_position >= text_length * (completed_rounds + 1)) or \
|
93 |
+
(round_num < 0 and current_position < 0):
|
94 |
completed_rounds += 1
|
95 |
|
96 |
selected_characters_per_round[round_num] = selected_characters
|
97 |
+
|
98 |
if average_compile and len(selected_characters_per_round) > 1:
|
99 |
result_text = ""
|
100 |
keys = sorted(selected_characters_per_round.keys())
|
101 |
for i in range(len(keys) - 1):
|
102 |
+
result_text = average_gematria(selected_characters_per_round[keys[i]],
|
103 |
+
selected_characters_per_round[keys[i + 1]])
|
104 |
else:
|
105 |
result_text = ''.join(selected_characters_per_round.values())
|
106 |
|
|
|
116 |
"title": data["title"],
|
117 |
"els_result_text": result_text,
|
118 |
"els_result_gematria": calculate_gematria(result_text),
|
119 |
+
"translated_text": translated_text
|
120 |
})
|
121 |
|
122 |
except FileNotFoundError:
|