restapitrial_vectordb

Sleeping

App Files Files Community

restapitrial_vectordb / app.py

Redmind

Update app.py

8fae5ad verified 17 days ago

raw

history blame contribute delete

3.31 kB

	from fastapi import FastAPI
	import os


	import pandas as pd
	from io import StringIO
	import os
	import base64

	app = FastAPI()

	def get_download_link_dify(df):
	# code to save file in dify framework
	import requests

	# API Configuration
	BASE_URL = "http://redmindgpt.redmindtechnologies.com:81/v1"
	DATASET_ID = "084ae979-d101-414b-8854-9bbf5d3a442e"
	API_KEY = "dataset-feqz5KrqHkFRdWbh2DInt58L"

	dataset_name = 'output_dataset'
	# Endpoint URL
	url = f"{BASE_URL}/datasets/{DATASET_ID}/document/create-by-file"
	print(url)
	# Headers
	headers = {
	"Authorization": f"Bearer {API_KEY}"
	}

	# Data payload (form data as a plain text string)
	data_payload = {
	"data": """
	{
	"indexing_technique": "high_quality",
	"process_rule": {
	"rules": {
	"pre_processing_rules": [
	{"id": "remove_extra_spaces", "enabled": true},
	{"id": "remove_urls_emails", "enabled": true}
	],
	"segmentation": {
	"separator": "###",
	"max_tokens": 500
	}
	},
	"mode": "custom"
	}
	}
	"""
	}

	# Convert DataFrame to binary (in-memory)
	file_buffer = dataframe_to_binary(df)

	files = {
	"file": ("output.xlsx", file_buffer, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
	}

	# Send the POST request
	response = requests.post(url, headers=headers, data=data_payload, files=files)
	print(response)
	data = response.json()
	document_id = data['document']['id']

	# code to get download_url
	url = f"http://redmindgpt.redmindtechnologies.com:81/v1/datasets/{DATASET_ID}/documents/{document_id}/upload-file"

	response = requests.get(url, headers=headers)
	print(response)

	download_url = response.json().get("download_url")
	download_url = download_url.replace("download/","")
	return download_url

	def dataframe_to_binary(df):
	import io
	# Create a BytesIO stream
	output = io.BytesIO()

	# Write the DataFrame to this in-memory buffer as an Excel file
	df.to_excel(output, index=False, engine="openpyxl")

	# Move the cursor to the beginning of the stream
	output.seek(0)

	return output



	# FastAPI Endpoints
	@app.get("/")
	def greet_json():
	# Run Data Processing
	#process_and_store(pdf_path=pdf_file, pptx_path=pptx_file)
	return {"Document store": "created!"}

	@app.get("/save_file_dify")
	def save_file_dify(csv_data: str):

	# Split into lines
	lines = csv_data.split("\n")

	# Find the max number of columns
	max_cols = max(line.count(",") + 1 for line in lines if line.strip())

	# Normalize all rows to have the same number of columns
	fixed_lines = [line + "," * (max_cols - line.count(",") - 1) for line in lines]

	# Reconstruct CSV string
	fixed_csv_data = "\n".join(fixed_lines)

	# Convert CSV string to DataFrame
	df = pd.read_csv(StringIO(fixed_csv_data))


	#save in dify dataset and return download link
	download_link = get_download_link_dify(df)

	return download_link