import gradio as gr import os from datasets import load_dataset, DatasetDict, Dataset, Features, Value # Hugging Face API token (replace with your actual token) HUGGINGFACE_TOKEN = os.getenv("hf_token_GCM") # Name of the dataset (replace with your actual dataset name) DATASET_NAME = "beetle_papers" # Function to upload the files and save them to the dataset def upload_files(file_paths): # Check if any files were provided if not file_paths: return "Please select a file to upload." # Attempt to load the dataset or create a new one if it doesn't exist try: dataset = load_dataset(f"ChristopherMarais/{DATASET_NAME}") except (ValueError, FileNotFoundError): # Define the structure of the dataset with binary content features = Features({ "filename": Value("string"), "content": Value("binary") # Use "binary" type for the content }) dataset = DatasetDict({"train": Dataset.from_dict({"filename": [], "content": []}, features=features)}) # Push an initial empty version of the dataset to the Hub dataset.push_to_hub(DATASET_NAME, token=HUGGINGFACE_TOKEN) # Iterate over each file path and upload the file content uploaded_files = [] for file_path in file_paths: # Extract the filename from the file path file_name = os.path.basename(file_path) # Read the file content as binary with open(file_path, "rb") as f: content = f.read() # Add the new item to the dataset new_data = {"filename": file_name, "content": content} dataset["train"] = dataset["train"].add_item(new_data) uploaded_files.append(file_name) # Save the updated dataset back to the Hugging Face Hub dataset.push_to_hub(DATASET_NAME, token=HUGGINGFACE_TOKEN) # Return a message indicating the success of the uploads return "File(s) uploaded successfully!" # Function to clear the upload file list def clear_files(): return None # Returning None clears the file input component # Create the Gradio interface with the upload section above the output section with gr.Blocks() as iface: gr.Markdown("# Upload Bark and Ambrosia Beetle Files Here!") gr.Markdown("Upload text files, Word docs, or PDFs") upload_file = gr.File(label="Upload your files", interactive=True, file_count="multiple") with gr.Row(): submit_button = gr.Button("Upload", variant="primary", scale=2) clear_button = gr.Button("Cancel", variant="secondary", scale=1) output_text = gr.Textbox(label="", visible=True) # Set this visible by default clear_button.click(fn=clear_files, inputs=None, outputs=upload_file) submit_button.click(fn=upload_files, inputs=upload_file, outputs=[output_text]) # Launch the Gradio app without share=True since it is on Hugging Face Spaces iface.launch()