Spaces:
Sleeping
Sleeping
from fastapi import FastAPI | |
import os | |
import pandas as pd | |
from io import StringIO | |
import os | |
import base64 | |
app = FastAPI() | |
def get_download_link_dify(df): | |
# code to save file in dify framework | |
import requests | |
# API Configuration | |
BASE_URL = "http://redmindgpt.redmindtechnologies.com:81/v1" | |
DATASET_ID = "084ae979-d101-414b-8854-9bbf5d3a442e" | |
API_KEY = "dataset-feqz5KrqHkFRdWbh2DInt58L" | |
dataset_name = 'output_dataset' | |
# Endpoint URL | |
url = f"{BASE_URL}/datasets/{DATASET_ID}/document/create-by-file" | |
print(url) | |
# Headers | |
headers = { | |
"Authorization": f"Bearer {API_KEY}" | |
} | |
# Data payload (form data as a plain text string) | |
data_payload = { | |
"data": """ | |
{ | |
"indexing_technique": "high_quality", | |
"process_rule": { | |
"rules": { | |
"pre_processing_rules": [ | |
{"id": "remove_extra_spaces", "enabled": true}, | |
{"id": "remove_urls_emails", "enabled": true} | |
], | |
"segmentation": { | |
"separator": "###", | |
"max_tokens": 500 | |
} | |
}, | |
"mode": "custom" | |
} | |
} | |
""" | |
} | |
# Convert DataFrame to binary (in-memory) | |
file_buffer = dataframe_to_binary(df) | |
files = { | |
"file": ("output.xlsx", file_buffer, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet") | |
} | |
# Send the POST request | |
response = requests.post(url, headers=headers, data=data_payload, files=files) | |
print(response) | |
data = response.json() | |
document_id = data['document']['id'] | |
# code to get download_url | |
url = f"http://redmindgpt.redmindtechnologies.com:81/v1/datasets/{DATASET_ID}/documents/{document_id}/upload-file" | |
response = requests.get(url, headers=headers) | |
print(response) | |
download_url = response.json().get("download_url") | |
download_url = download_url.replace("download/","") | |
return download_url | |
def dataframe_to_binary(df): | |
import io | |
# Create a BytesIO stream | |
output = io.BytesIO() | |
# Write the DataFrame to this in-memory buffer as an Excel file | |
df.to_excel(output, index=False, engine="openpyxl") | |
# Move the cursor to the beginning of the stream | |
output.seek(0) | |
return output | |
# FastAPI Endpoints | |
def greet_json(): | |
# Run Data Processing | |
#process_and_store(pdf_path=pdf_file, pptx_path=pptx_file) | |
return {"Document store": "created!"} | |
def save_file_dify(csv_data: str): | |
# Split into lines | |
lines = csv_data.split("\n") | |
# Find the max number of columns | |
max_cols = max(line.count(",") + 1 for line in lines if line.strip()) | |
# Normalize all rows to have the same number of columns | |
fixed_lines = [line + "," * (max_cols - line.count(",") - 1) for line in lines] | |
# Reconstruct CSV string | |
fixed_csv_data = "\n".join(fixed_lines) | |
# Convert CSV string to DataFrame | |
df = pd.read_csv(StringIO(fixed_csv_data)) | |
#save in dify dataset and return download link | |
download_link = get_download_link_dify(df) | |
return download_link | |