File size: 3,906 Bytes
00316bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# MUSS AUFGERÄUMT WERDEN

import json
import pandas as pd


def split_json_file(input_filepath, lines_per_file=50):
    """
    Splits a JSON file into multiple files, each containing up to 'lines_per_file' lines.

    param input_filepath: The path to the input JSON file.
    param lines_per_file: The maximum number of lines per output file.
    """
    # Counter for file naming
    file_counter = 1
    # Open the input file
    with open(input_filepath, 'r') as input_file:
        # Read the lines from the input file
        lines = input_file.readlines()
        # Iterate through the lines in chunks of 'lines_per_file'
        for i in range(0, len(lines), lines_per_file):
            # Determine the output file name
            output_filename = f'translate_data/english_{file_counter}.json'
            # Write the current chunk to the output file
            with open(output_filename, 'w') as output_file:
                # Grab the current chunk of lines
                chunk = lines[i:i+lines_per_file]
                # Write each line to the output file
                for line in chunk:
                    output_file.write(line)
            print(f'Created {output_filename}')
            # Increment the file counter
            file_counter += 1


def merge_and_save(list1, list2, dict1, dict2, filename='output.csv'):
    """
    Merges two lists and two dictionaries into a pandas DataFrame according to the specified structure:
    headers: ['list1', 'list2', 'keys dict1', 'vals dict1', 'keys dict2', 'vals dict2']
    and saves it as a CSV file.

    Parameters:
    - list1 (list): First list to merge, contributing to column 'list1'.
    - list2 (list): Second list to merge, contributing to column 'list2'.
    - dict1 (dict): First dictionary to merge, keys and values added as separate columns.
    - dict2 (dict): Second dictionary to merge, keys and values added as separate columns.
    - filename (str): Filename for the saved CSV file.
    """
    # Combining all elements into a structured list of dictionaries for DataFrame construction
    data = []
    dict1_items = list(dict1.items())
    dict2_items = list(dict2.items())
    for i in range(len(list1)):
        row = {
            'list1': list1[i],
            'list2': list2[i],
            'keys dict1': dict1_items[i][0],
            'vals dict1': dict1_items[i][1],
            'keys dict2': dict2_items[i][0],
            'vals dict2': dict2_items[i][1]
        }
        data.append(row)

    # Creating the DataFrame
    df = pd.DataFrame(data)

    # Saving the DataFrame to a CSV file
    df.to_csv(filename, index=False)
    print(f"DataFrame saved as '{filename}' in the current directory.")


# new line for every entry
def safe_my_dict_as_json(file_name, my_dict):
    print(my_dict)
    # Open a file for writing
    with open(file_name, 'w') as f:
        # Write the opening brace of the JSON object
        f.write('{\n')
        # Get total number of items to control comma insertion
        total_items = len(my_dict)
        if type(my_dict) == list:
            my_dict = my_dict[0]
        # Iterate over items, keeping track of the current item index
        for i, (key, value) in enumerate(my_dict.items()):
            # Serialize the key with JSON to handle special characters and ensure proper quoting
            json_key = json.dumps(key)
            # Convert the list to a JSON-formatted string (without indentation)
            json_value = json.dumps(value)
            # Determine if a comma is needed (for all but the last item)
            comma = ',' if i < total_items - 1 else ''
            # Write the formatted string to the file
            f.write(f"    {json_key}: {json_value}{comma}\n")
        # Write the closing brace of the JSON object
        f.write('}\n')


if __name__ == "__main__":
    print("here are all functions that write to the Datasets")