import requests import sys import spacy from io import BytesIO import pandas as pd import json # Load your custom NER model nlp_ner = spacy.load("ner_model_v1") # def send_results_to_api(data, result_url): # headers = {"Content-Type": "application/json"} # response = requests.post(result_url, json=data, headers=headers) # if response.status_code == 200: # return response.json() # Return any response from the API if needed # else: # return {"error": f"Failed to send results to API: {response.status_code}"} def process_xlsx(params): # xlsx_file = 'https://fragilestatesindex.org/wp-content/uploads/2023/06/FSI-2023-DOWNLOAD.xlsx' try: params = json.loads(params) except json.JSONDecodeError as e: return {"error": f"Invalid JSON input: {e.msg} at line {e.lineno} column {e.colno}"} print(f"JSON : \n{params}") addresses = params.get("urls", []) if not params.get("normalfileID",[]): file_ids = [None]*len(addresses) else: file_ids = params.get("normalfileID",[]) # api = params.get("api", "") # job_id = params.get("job_id", "") solutions=[] text_id = 1 for adress,file_id in zip(addresses, file_ids): doc = nlp_ner(adress) # Initialize the dictionary to store the results entities_dict = {} # Extract entities and their indices for idx, ent in enumerate(doc.ents): if ent.label_ not in entities_dict: entities_dict[ent.label_] = [] entities_dict[ent.label_].append({'word': ent.text, 'index': idx}) # Create the final output dictionary # obj = {"text": adress, "answer":entities_dict, "qcUser": None, "normalfileID": file_id} solutions.append(obj) print(f"{text_id}/{len(addresses)}\n") text_id = text_id+1 # result_url = f"{api}/{job_id}" # send_results_to_api(solutions, result_url) return json.dumps({"solutions": solutions}) import gradio as gr inputt = gr.Textbox(label="Parameters (JSON format) Eg. {'texts':['file1.mp3','file2.wav']}") outputs = gr.JSON() application = gr.Interface(fn=process_xlsx, inputs=inputt, outputs=outputs, title="Named Entity Recognition with API Integration") application.launch()