Spaces:
Running
Running
import os | |
import random | |
import shutil | |
# Set the path to the main directory where files are located (absolute path to the target directory) | |
# In this case, the directory contains the original data in JSON format | |
base_dir = 'real_review/original_data/' | |
# Define the target directory where the selected JSON files will be copied | |
# This is a subdirectory within the base directory, named 'selected_files' | |
selected_base_dir = os.path.join(base_dir, 'selected_files') | |
# Create a list to store the full paths of all JSON files found in the base directory | |
json_files = [] | |
# Traverse the base directory and its subdirectories to locate all files | |
# Collect the paths of files that have a '.json' extension | |
for root, dirs, files in os.walk(base_dir): | |
for file in files: | |
if file.endswith('.json'): # Check if the file is a JSON file | |
json_files.append(os.path.join(root, file)) # Add the full path of the file to the list | |
# Calculate the number of files to select randomly | |
# 1% of the total number of JSON files is selected, with a minimum of 1 file | |
num_files_to_select = max(1, int(len(json_files) * 0.01)) | |
# Randomly select 1% of the JSON files from the list of all files | |
selected_files = random.sample(json_files, num_files_to_select) | |
# Print the number of selected files for reference | |
print(f"Selected {num_files_to_select} file(s):") | |
# Copy the selected files to the target directory ('selected_files'), preserving their original directory structure | |
for file in selected_files: | |
# Get the relative path of the file (relative to the base directory) | |
relative_path = os.path.relpath(file, base_dir) | |
# Create the full destination path for the file in the target directory | |
dest_file_path = os.path.join(selected_base_dir, relative_path) | |
# Ensure that the destination directory exists; if not, create it | |
dest_dir = os.path.dirname(dest_file_path) | |
os.makedirs(dest_dir, exist_ok=True) | |
# Copy the file from the original location to the destination | |
shutil.copy(file, dest_file_path) | |
# Print confirmation message after all files have been successfully copied | |
print("File copying completed.") | |