from fastapi import FastAPI import os import pandas as pd from io import StringIO import os import base64 app = FastAPI() def get_download_link_dify(df): # code to save file in dify framework import requests # API Configuration BASE_URL = "http://redmindgpt.redmindtechnologies.com:81/v1" DATASET_ID = "084ae979-d101-414b-8854-9bbf5d3a442e" API_KEY = "dataset-feqz5KrqHkFRdWbh2DInt58L" dataset_name = 'output_dataset' # Endpoint URL url = f"{BASE_URL}/datasets/{DATASET_ID}/document/create-by-file" print(url) # Headers headers = { "Authorization": f"Bearer {API_KEY}" } # Data payload (form data as a plain text string) data_payload = { "data": """ { "indexing_technique": "high_quality", "process_rule": { "rules": { "pre_processing_rules": [ {"id": "remove_extra_spaces", "enabled": true}, {"id": "remove_urls_emails", "enabled": true} ], "segmentation": { "separator": "###", "max_tokens": 500 } }, "mode": "custom" } } """ } # Convert DataFrame to binary (in-memory) file_buffer = dataframe_to_binary(df) files = { "file": ("output.xlsx", file_buffer, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet") } # Send the POST request response = requests.post(url, headers=headers, data=data_payload, files=files) print(response) data = response.json() document_id = data['document']['id'] # code to get download_url url = f"http://redmindgpt.redmindtechnologies.com:81/v1/datasets/{DATASET_ID}/documents/{document_id}/upload-file" response = requests.get(url, headers=headers) print(response) download_url = response.json().get("download_url") download_url = download_url.replace("download/","") return download_url def dataframe_to_binary(df): import io # Create a BytesIO stream output = io.BytesIO() # Write the DataFrame to this in-memory buffer as an Excel file df.to_excel(output, index=False, engine="openpyxl") # Move the cursor to the beginning of the stream output.seek(0) return output # FastAPI Endpoints @app.get("/") def greet_json(): # Run Data Processing #process_and_store(pdf_path=pdf_file, pptx_path=pptx_file) return {"Document store": "created!"} @app.get("/save_file_dify") def save_file_dify(csv_data: str): # Split into lines lines = csv_data.split("\n") # Find the max number of columns max_cols = max(line.count(",") + 1 for line in lines if line.strip()) # Normalize all rows to have the same number of columns fixed_lines = [line + "," * (max_cols - line.count(",") - 1) for line in lines] # Reconstruct CSV string fixed_csv_data = "\n".join(fixed_lines) # Convert CSV string to DataFrame df = pd.read_csv(StringIO(fixed_csv_data)) #save in dify dataset and return download link download_link = get_download_link_dify(df) return download_link