embeding_api

Paused

App Files Files Community

Arafath10 commited on Jul 31, 2024

Commit

3a02c2a

verified ·

1 Parent(s): 782b200

Update main.py

Browse files

Files changed (1) hide show

main.py +4 -121

main.py CHANGED Viewed

@@ -1,11 +1,7 @@
 from fastapi import FastAPI, HTTPException
 from fastapi.responses import JSONResponse
 from fastapi.middleware.cors import CORSMiddleware
-import requests
-from AWSClaude import AWSClaude
-import json
-import concurrent.futures
-import time
 app = FastAPI()
 app.add_middleware(
@@ -20,120 +16,7 @@ app.add_middleware(
 @app.post("/get_n_depth_results")
 async def get_n_depth_results(url,input_query):
-    all_content = {}
-    def add_pdf_content(selected_pdf):
-        for pdf_url in selected_pdf:
-            print(pdf_url)
-            response = requests.get(pdf_url)
-            # Save the content of the response as a PDF file
-            pdf_path = "temp.pdf"
-            with open(pdf_path, "wb") as file:
-                file.write(response.content)
-            print(f"PDF file saved as {pdf_path}")
-            url = "http://localhost:5000/ask"
-            # url = "https://us-central1-neuralgap-1.cloudfunctions.net/scraperPDFDocxTables_v2"
-            data = {"processTables": "True"}
-            headers = {"Origin": "http://localhost:8080"}
-            with open(pdf_path, "rb") as file:
-                file_contents = file.read()
-                files = {
-                    "pdf": (
-                        pdf_path,
-                        file_contents,
-                        "application/pdf",
-                    )
-                }
-                response = requests.post(url, files=files, data=data, headers=headers)
-                all_content[pdf_url] = response.json()
-    def scrapper(input_url):
-        params = {'url': input_url}
-        headers = {'accept': 'application/json'}
-        url = 'https://chromium-qpxamiokfa-uc.a.run.app/get_scraped_data'
-        try:
-            response = requests.get(url, headers=headers, params=params)
-            all_url = response.json()["URL"]
-            all_content[input_url] = response.json()["Content"]
-            return all_url
-        except:
-            print(f"found a error url : {input_url}=========================================")
-            return "none"
-    pdf_urls = []
-    def separate_pdf_and_nonPDF_links(urls):
-        # Separate URLs into two lists
-        pdf_links = [url for url in urls if url and url.endswith('.pdf')]
-        if pdf_links:
-            pdf_urls.append(pdf_links)
-        return [url for url in urls if not (url and url.endswith('.pdf'))]  # other links for rescraping
-    def call_llm_service(scraped_data, input_url, input_query, pdf):
-        query = f"""
-        Here are my scraped links:
-        {scraped_data}
-        correct hostname: {input_url} use this host name for all other tasks
-        I need the always full (www.hotname.com/../) {pdf} URLs for the most relevant links related to "{input_query}". use the correct hostname from this provided content, give raw hyperlink with json format only don't give extra text details. only give json output
-        example json format is only links don't include keys (i need the always full (www.hotname.com/../))
-        """
-        llm = "ClaudeHaiku"
-        env = ""
-        user_id = "KAusXF7jp0Q40urdZWtDLXEhrmA"
-        thread_id = "hKxvoVgi7vRJCHhvMzH5"
-        stream_id = "stream1"
-        app_type = "sentinel"
-        other_request_params = {"messages": [
-            {"role": "user", "content": query},
-        ]}
-        return AWSClaude(llm, env, user_id, thread_id, stream_id, app_type, other_request_params).invoke()
-    input_url = f'["{url}"]'
-    input_query = input_query
-    for step in range(1, 3):
-        print(f"=================={step} step of scraping to get selected URLs from LLM=================================")
-        next_urls = []
-        with concurrent.futures.ThreadPoolExecutor() as executor:
-            futures = [executor.submit(scrapper, input_url) for input_url in (json.loads(input_url)[:2])]
-            for future in concurrent.futures.as_completed(futures):
-                next_urls.append(separate_pdf_and_nonPDF_links(future.result()))
-        selected_links_from_llm = call_llm_service(next_urls, input_url, input_query, "")
-        input_url = selected_links_from_llm
-        print(json.loads(input_url)[:2])
-    if not pdf_urls:
-        print(pdf_urls)
-        #return all_content.keys()
-        return all_content
-    else:
-        selected_pdf = json.loads(call_llm_service(pdf_urls, input_url, input_query, "only end with .pdf extension"))
-        print(pdf_urls)
-        print("selected pdf")
-        print(selected_pdf)
-        #return all_content.keys()
-        return all_content
-# # Start time
-# start_time = time.time()
-# print(main("https://www.keells.com/", "Please analyse reports"))
-# # End time
-# end_time = time.time()
-# # Calculate the time taken
-# time_taken = end_time - start_time
-# print(f"Time taken: {time_taken} seconds")

 from fastapi import FastAPI, HTTPException
 from fastapi.responses import JSONResponse
 from fastapi.middleware.cors import CORSMiddleware
 app = FastAPI()
 app.add_middleware(
 @app.post("/get_n_depth_results")
 async def get_n_depth_results(url,input_query):
+          return "done"