Spaces:
Runtime error
Runtime error
Upload hogwats_gemini.py
Browse files- hogwats_gemini.py +65 -0
hogwats_gemini.py
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import google.generativeai as genai
|
2 |
+
from config import gemini_api
|
3 |
+
import tiktoken
|
4 |
+
|
5 |
+
def get_answer(query, company_name, chunked_raw_content):
|
6 |
+
|
7 |
+
genai.configure(api_key=gemini_api)
|
8 |
+
|
9 |
+
# Create the model
|
10 |
+
# See https://ai.google.dev/api/python/google/generativeai/GenerativeModel
|
11 |
+
generation_config = {
|
12 |
+
"temperature": 1,
|
13 |
+
"top_p": 0.95,
|
14 |
+
"top_k": 64,
|
15 |
+
"max_output_tokens": 8192,
|
16 |
+
"response_mime_type": "text/plain",
|
17 |
+
}
|
18 |
+
safety_settings = [
|
19 |
+
{
|
20 |
+
"category": "HARM_CATEGORY_HARASSMENT",
|
21 |
+
"threshold": "BLOCK_MEDIUM_AND_ABOVE",
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"category": "HARM_CATEGORY_HATE_SPEECH",
|
25 |
+
"threshold": "BLOCK_MEDIUM_AND_ABOVE",
|
26 |
+
},
|
27 |
+
{
|
28 |
+
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
|
29 |
+
"threshold": "BLOCK_MEDIUM_AND_ABOVE",
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
|
33 |
+
"threshold": "BLOCK_MEDIUM_AND_ABOVE",
|
34 |
+
},
|
35 |
+
]
|
36 |
+
chunks = []
|
37 |
+
for chunk in chunked_raw_content:
|
38 |
+
chunk = chunk.replace("PDF_FILE_____data_dumpster_", "")
|
39 |
+
chunk = chunk.replace("data_dumpster_", "")
|
40 |
+
chunks.append(chunk)
|
41 |
+
context = str(chunks)
|
42 |
+
|
43 |
+
enc = tiktoken.get_encoding("cl100k_base")
|
44 |
+
|
45 |
+
|
46 |
+
toks = enc.encode(context)
|
47 |
+
if len(toks) >= 900000:
|
48 |
+
chunk_size = int(len(context) // (len(toks) / 900000))
|
49 |
+
chunk_size
|
50 |
+
context = context[:chunk_size]
|
51 |
+
|
52 |
+
|
53 |
+
model = genai.GenerativeModel(
|
54 |
+
model_name="gemini-1.5-flash-latest",
|
55 |
+
safety_settings=safety_settings,
|
56 |
+
generation_config=generation_config,
|
57 |
+
system_instruction=f"You are an expert at a Private Equity fund. You are helping a colleague with his due diligence on {company_name}. All the questions you will receive are in the context of this due diligence. You always precise the sources from the context (given below) you use.\nYou answer any question based on the following context elements:\n{context}",
|
58 |
+
)
|
59 |
+
chat_session = model.start_chat(
|
60 |
+
history=[],
|
61 |
+
)
|
62 |
+
|
63 |
+
|
64 |
+
response = chat_session.send_message(f"{query} - (Bain style answer + sources properly renamed if needed)")
|
65 |
+
return response.text
|