File size: 1,230 Bytes
c4d0a5f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import os
import pyarrow.parquet as pq

def extract_parquet_files(directory):
    # Create a directory to store the extracted CSV files
    output_directory = "extracted_csv_files"
    os.makedirs(output_directory, exist_ok=True)

    # Iterate over files in the directory
    for filename in os.listdir(directory):
        # Check if the file has a .parquet extension
        if filename.endswith(".parquet"):
            file_path = os.path.join(directory, filename)
            
            # Read the parquet file
            table = pq.read_table(file_path)
            
            # Extract the data from the parquet file
            data = table.to_pandas()
            
            # Generate the output CSV file path
            csv_filename = os.path.splitext(filename)[0] + ".csv"
            csv_file_path = os.path.join(output_directory, csv_filename)
            
            # Save the extracted data as a CSV file
            data.to_csv(csv_file_path, index=False)
            
            print(f"Extracted data from {filename} saved as {csv_filename}")

# Directory containing the parquet files
parquet_directory = "hindi"

# Call the function to extract parquet files
extract_parquet_files(parquet_directory)