# MUSS AUFGERÄUMT WERDEN import json import pandas as pd def split_json_file(input_filepath, lines_per_file=50): """ Splits a JSON file into multiple files, each containing up to 'lines_per_file' lines. param input_filepath: The path to the input JSON file. param lines_per_file: The maximum number of lines per output file. """ # Counter for file naming file_counter = 1 # Open the input file with open(input_filepath, 'r') as input_file: # Read the lines from the input file lines = input_file.readlines() # Iterate through the lines in chunks of 'lines_per_file' for i in range(0, len(lines), lines_per_file): # Determine the output file name output_filename = f'translate_data/english_{file_counter}.json' # Write the current chunk to the output file with open(output_filename, 'w') as output_file: # Grab the current chunk of lines chunk = lines[i:i+lines_per_file] # Write each line to the output file for line in chunk: output_file.write(line) print(f'Created {output_filename}') # Increment the file counter file_counter += 1 def merge_and_save(list1, list2, dict1, dict2, filename='output.csv'): """ Merges two lists and two dictionaries into a pandas DataFrame according to the specified structure: headers: ['list1', 'list2', 'keys dict1', 'vals dict1', 'keys dict2', 'vals dict2'] and saves it as a CSV file. Parameters: - list1 (list): First list to merge, contributing to column 'list1'. - list2 (list): Second list to merge, contributing to column 'list2'. - dict1 (dict): First dictionary to merge, keys and values added as separate columns. - dict2 (dict): Second dictionary to merge, keys and values added as separate columns. - filename (str): Filename for the saved CSV file. """ # Combining all elements into a structured list of dictionaries for DataFrame construction data = [] dict1_items = list(dict1.items()) dict2_items = list(dict2.items()) for i in range(len(list1)): row = { 'list1': list1[i], 'list2': list2[i], 'keys dict1': dict1_items[i][0], 'vals dict1': dict1_items[i][1], 'keys dict2': dict2_items[i][0], 'vals dict2': dict2_items[i][1] } data.append(row) # Creating the DataFrame df = pd.DataFrame(data) # Saving the DataFrame to a CSV file df.to_csv(filename, index=False) print(f"DataFrame saved as '{filename}' in the current directory.") # new line for every entry def safe_my_dict_as_json(file_name, my_dict): print(my_dict) # Open a file for writing with open(file_name, 'w') as f: # Write the opening brace of the JSON object f.write('{\n') # Get total number of items to control comma insertion total_items = len(my_dict) if type(my_dict) == list: my_dict = my_dict[0] # Iterate over items, keeping track of the current item index for i, (key, value) in enumerate(my_dict.items()): # Serialize the key with JSON to handle special characters and ensure proper quoting json_key = json.dumps(key) # Convert the list to a JSON-formatted string (without indentation) json_value = json.dumps(value) # Determine if a comma is needed (for all but the last item) comma = ',' if i < total_items - 1 else '' # Write the formatted string to the file f.write(f" {json_key}: {json_value}{comma}\n") # Write the closing brace of the JSON object f.write('}\n') if __name__ == "__main__": print("here are all functions that write to the Datasets")