File size: 10,738 Bytes
b1c8f17
eb92a4f
b5b2e6a
b1c8f17
005f2d7
b1c8f17
 
2e76cf7
 
 
22917d7
7183db2
2c7b10d
b1c8f17
7bd26ee
b1c8f17
 
2e76cf7
 
 
 
b1c8f17
576d9e2
 
 
 
 
 
 
b5b2e6a
270e05e
b1c8f17
 
270e05e
dca52fb
7174a4c
1ea43e5
7174a4c
 
10a855b
c8a7661
5192fdb
7174a4c
74042ff
c8a7661
5192fdb
7174a4c
859648b
 
7174a4c
 
c8a7661
7174a4c
74042ff
7174a4c
4209060
 
 
 
 
 
1faa796
4209060
a4dd9db
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7bd26ee
1fc729a
0b115cd
 
 
bdd570c
 
 
7174a4c
 
1fc729a
 
3a0393b
0b115cd
bdd570c
f87e22f
1fc729a
 
 
 
 
7174a4c
 
1fc729a
00eb6f3
1fc729a
3963c7b
3a0393b
0b115cd
3a0393b
1fc729a
7183db2
7a5d35d
3963c7b
c25e4d4
dff558a
6a1816b
7a5d35d
545c7b4
1fc729a
7a5d35d
e7a043e
1fc729a
6a1816b
7174a4c
6fe2220
0692f72
005f2d7
e7a043e
005f2d7
3a0393b
005f2d7
 
 
 
 
 
 
 
0b115cd
 
005f2d7
 
 
 
 
1fc729a
 
 
b1c8f17
 
 
0b115cd
 
b1c8f17
0b115cd
00eb6f3
b998742
3963c7b
b1c8f17
1fc729a
 
 
 
 
c8510e0
 
 
 
 
7bd26ee
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
from fastapi import FastAPI, HTTPException, Request, Query
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from typing import List, Dict, Any
from helper_functions_api import has_tables, extract_data_from_tag, openrouter_response,md_to_html, search_brave, fetch_and_extract_content, limit_tokens, together_response, insert_data
import os
from dotenv import load_dotenv, find_dotenv
from datetime import datetime, timedelta
from fastapi_cache import FastAPICache
from fastapi_cache.backends.inmemory import InMemoryBackend
from fastapi_cache.decorator import cache
import asyncio
import re
# Load environment variables from .env file
#load_dotenv("keys.env")

app = FastAPI()

@app.on_event("startup")
async def startup():
    FastAPICache.init(InMemoryBackend(), prefix="fastapi-cache")

# Groq model names
llm_default_small = "llama3-8b-8192"
llm_default_medium = "llama3-70b-8192"

# Together Model names (fallback)
llm_fallback_small = "meta-llama/Llama-3-8b-chat-hf"
llm_fallback_medium = "meta-llama/Llama-3-70b-chat-hf"

SysPromptJson = "You are now in the role of an expert AI who can extract structured information from user request. Both key and value pairs must be in double quotes. You must respond ONLY with a valid JSON file. Do not add any additional comments."
SysPromptList = "You are now in the role of an expert AI who can extract structured information from user request. All elements must be in double quotes. You must respond ONLY with a valid python List. Do not add any additional comments."
SysPromptDefault = "You are an expert AI, complete the given task. Do not add any additional comments."
SysPromptMd = "You are an expert AI who can create a structured report using information provided in the context from user request.The report should be in markdown format consists of markdown tables structured into subtopics. Do not add any additional comments."

prompt_user = {}
prompt_system = {}
prompt_user["online"] = {}
prompt_user["offline"] = {}
prompt_user["online"]["chat"] = "Write a well thought out, detailed and structured answer to the query:: {description} #### , refer the provided internet search results reference:{reference}"
prompt_user["online"]["report"] = "Write a well thought out, detailed and structured Report to the query:: {description} #### , refer the provided internet search results reference:{reference}, The report should be well formatted using markdown format structured into subtopics as necessory"
prompt_user["online"]["report_table"] = "Write a well thought out Report to the query:: {description},#### , refer the provided internet search results reference:{reference}. The report should be well formatted using markdown format,  structured into subtopics, include tables or lists as needed to make it well readable"

prompt_user["offline"]["chat"] = "Write a well thought out, detailed and structured answer to the query:: {description}"
prompt_user["offline"]["report"] = "Write a well thought out, detailed and structured Report to the query:: {description}. The report should be well formatted using markdown format,  structured into subtopics"
prompt_user["offline"]["report_table"] = "Write a detailed and structured Report to the query:: {description}, The report should be well formatted using markdown format,  structured into subtopics, include tables or lists as needed to make it well readable"

prompt_system["online"] = """You are an expert AI who can create a detailed structured report using internet search results.

                                1 filter and summarize relevant information, if there are conflicting information, use the latest source.
                                2. use it to construct a clear and factual answer.
                                Your response should be structured and properly formatted using markdown headings, subheadings, tables, use as necessory. Ignore Links and references"""

prompt_system["offline"] = """You are an expert AI who can create detailed answers. Your response should be properly formatted and well readable using markdown formatting."""

TOGETHER_API_KEY = os.getenv('TOGETHER_API_KEY')
BRAVE_API_KEY = os.getenv('BRAVE_API_KEY')
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
HELICON_API_KEY = os.getenv("HELICON_API_KEY")
SUPABASE_USER = os.environ['SUPABASE_USER']
SUPABASE_PASSWORD = os.environ['SUPABASE_PASSWORD']
OPENROUTER_API_KEY = "sk-or-v1-"+os.environ['OPENROUTER_API_KEY']

# sys_prompts = {
#     "offline": {
#         "Chat": "You are an expert AI, complete the given task. Do not add any additional comments.",
#         "Full Text Report": "You are an expert AI who can create a detailed report from user request. The report should be in markdown format. Do not add any additional comments.",
#         "Tabular Report": "You are an expert AI who can create a structured report from user request.The report should be in markdown format structured into subtopics/tables/lists. Do not add any additional comments.",
#         "Tables only": "You are an expert AI who can create a structured tabular report from user request.The report should be in markdown format consists of only markdown tables. Do not add any additional comments.",
#     },
#     "online": {
#         "Chat": "You are an expert AI, complete the given task using the provided context. Do not add any additional comments.",
#         "Full Text Report": "You are an expert AI who can create a detailed report using information scraped from the internet. You should decide which information is relevant to the given task and use it to create a report. The report should be in markdown format. Do not add any additional comments.",
#         "Tabular Report": """You are an expert AI who can provide answers using internet search results.
#                                 1 filter and summarize relevant information, if there are conflicting information, use the latest source.
#                                 2. use it to construct a clear and factual answer.
#                                 Your response should be properly formatted and well readable using markdown formatting. """,
#         "Tables only": "You are an expert AI who can create a structured tabular report using information scraped from the internet. You should decide which information is relevant to the given task. The report should be in markdown format consists of only markdown tables. Do not add any additional comments.",
#     },
# }


class QueryModel(BaseModel):
    user_query: str = Query(default="", description="Initial user query")
    topic: str = Query(default="", description="Topic name to generate Report")
    description: str = Query(description="Description/prompt for report (REQUIRED)")
    user_id: str = Query(default="", description="unique user id")
    user_name: str = Query(default="", description="user name")
    internet: bool = Query(default=True, description="Enable Internet search")
    output_format: str = Query(default="report_table", description="Output format for the report",
                               enum=["chat", "report", "report_table"])
    data_format: str = Query(default="Structured data", description="Type of data to extract from the internet",
                             enum=["No presets", "Structured data", "Quantitative data"])
    generate_charts: bool = Query(default=False, description="Include generated charts")
    output_as_md: bool = Query(default=False, description="Output report in markdown (default output in HTML)")

@cache(expire=604800)
async def generate_report(query: QueryModel):
    query_str = query.topic
    description = query.description
    user_id = query.user_id
    internet = "online" if query.internet else "offline"
    user_prompt_final = prompt_user[internet][query.output_format]
    system_prompt_final = prompt_system[internet]
    data_format = query.data_format
    optimized_search_query = ""
    all_text_with_urls = [("", "")]
    full_search_object = {}
    generate_charts = query.generate_charts
    output_as_md = query.output_as_md
    
    if query.internet:
        search_query = re.sub(r'[^\w\s]', '', description).strip()
        try:
            urls, optimized_search_query, full_search_object = search_brave(search_query, num_results=8)
            all_text_with_urls = fetch_and_extract_content(data_format, urls, optimized_search_query)
            reference = limit_tokens(str(all_text_with_urls),token_limit=5000)
            user_prompt_final = user_prompt_final.format(description=description, reference=reference)
        except Exception as e:
            print(e)
            query.internet = False
            print("failed to search/scrape results, falling back to LLM response")

    if not query.internet:
        user_prompt_final = prompt_user["offline"][query.output_format].format(description=description)
        system_prompt_final = prompt_system["offline"]

    md_report = together_response(user_prompt_final, model=llm_default_medium, SysPrompt=system_prompt_final)
    html_report = md_to_html(md_report)

    # Render Charts
    if generate_charts and has_tables(html_report):
        print("tables found, creating charts")
        try:
            
            prompt = "convert the numerical data tables in the given content to embedded html plotly.js charts if appropriate, use appropriate colors, \
            output format:\
            <report>output the full content without any other changes in md format enclosed in tags like this</report> using the following:" + str(md_report)
                        
            messages = [{"role": 'user', "content": prompt}]
            md_report = extract_data_from_tag(openrouter_response(messages, model="anthropic/claude-3.5-sonnet"),"report")
            print(md_report)
        
        except Exception as e:
            print(e) 
            print("failed to generate charts, falling back to original report")
        
    if user_id != "test":
        insert_data(user_id, query_str, description, str(all_text_with_urls), md_report)
    references_html = {}
    for text, url in all_text_with_urls:
        references_html[url] = str(md_to_html(text))

    final_report = md_report if output_as_md else md_to_html(md_report)
    
    return {
        "report": final_report,
        "references": references_html,
        "search_query": optimized_search_query,
        "search_data_full":full_search_object
    }

@app.post("/generate_report")
async def api_generate_report(request: Request, query: QueryModel):
    return await generate_report(query)
    
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],)