|
import json |
|
import re |
|
|
|
def process_json_files(start, end): |
|
""" |
|
Processes JSON files containing Tanach text and returns a dictionary |
|
mapping book IDs to their data. |
|
|
|
Args: |
|
start: The starting book ID (inclusive). |
|
end: The ending book ID (inclusive). |
|
|
|
Returns: |
|
A dictionary where keys are book IDs and values are dictionaries |
|
containing 'title' and 'text' fields. |
|
""" |
|
base_path = "texts" |
|
results = {} |
|
|
|
for i in range(start, end + 1): |
|
file_name = f"{base_path}/{i:02}.json" |
|
try: |
|
with open(file_name, 'r', encoding='utf-8') as file: |
|
data = json.load(file) |
|
if data: |
|
|
|
results[i] = {"title": data.get("title", "No title"), "text": data.get("text", [])} |
|
|
|
except FileNotFoundError: |
|
logging.warning(f"File {file_name} not found.") |
|
except json.JSONDecodeError as e: |
|
logging.warning(f"File {file_name} could not be read as JSON: {e}") |
|
except KeyError as e: |
|
logging.warning(f"Expected key 'text' is missing in {file_name}: {e}") |
|
|
|
return results |