Redmind's picture
Update app.py
8fae5ad verified
from fastapi import FastAPI
import os
import pandas as pd
from io import StringIO
import os
import base64
app = FastAPI()
def get_download_link_dify(df):
# code to save file in dify framework
import requests
# API Configuration
BASE_URL = "http://redmindgpt.redmindtechnologies.com:81/v1"
DATASET_ID = "084ae979-d101-414b-8854-9bbf5d3a442e"
API_KEY = "dataset-feqz5KrqHkFRdWbh2DInt58L"
dataset_name = 'output_dataset'
# Endpoint URL
url = f"{BASE_URL}/datasets/{DATASET_ID}/document/create-by-file"
print(url)
# Headers
headers = {
"Authorization": f"Bearer {API_KEY}"
}
# Data payload (form data as a plain text string)
data_payload = {
"data": """
{
"indexing_technique": "high_quality",
"process_rule": {
"rules": {
"pre_processing_rules": [
{"id": "remove_extra_spaces", "enabled": true},
{"id": "remove_urls_emails", "enabled": true}
],
"segmentation": {
"separator": "###",
"max_tokens": 500
}
},
"mode": "custom"
}
}
"""
}
# Convert DataFrame to binary (in-memory)
file_buffer = dataframe_to_binary(df)
files = {
"file": ("output.xlsx", file_buffer, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
}
# Send the POST request
response = requests.post(url, headers=headers, data=data_payload, files=files)
print(response)
data = response.json()
document_id = data['document']['id']
# code to get download_url
url = f"http://redmindgpt.redmindtechnologies.com:81/v1/datasets/{DATASET_ID}/documents/{document_id}/upload-file"
response = requests.get(url, headers=headers)
print(response)
download_url = response.json().get("download_url")
download_url = download_url.replace("download/","")
return download_url
def dataframe_to_binary(df):
import io
# Create a BytesIO stream
output = io.BytesIO()
# Write the DataFrame to this in-memory buffer as an Excel file
df.to_excel(output, index=False, engine="openpyxl")
# Move the cursor to the beginning of the stream
output.seek(0)
return output
# FastAPI Endpoints
@app.get("/")
def greet_json():
# Run Data Processing
#process_and_store(pdf_path=pdf_file, pptx_path=pptx_file)
return {"Document store": "created!"}
@app.get("/save_file_dify")
def save_file_dify(csv_data: str):
# Split into lines
lines = csv_data.split("\n")
# Find the max number of columns
max_cols = max(line.count(",") + 1 for line in lines if line.strip())
# Normalize all rows to have the same number of columns
fixed_lines = [line + "," * (max_cols - line.count(",") - 1) for line in lines]
# Reconstruct CSV string
fixed_csv_data = "\n".join(fixed_lines)
# Convert CSV string to DataFrame
df = pd.read_csv(StringIO(fixed_csv_data))
#save in dify dataset and return download link
download_link = get_download_link_dify(df)
return download_link