Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
@@ -1,132 +1,125 @@
|
|
1 |
-
from fastapi import FastAPI
|
2 |
-
|
3 |
-
import
|
4 |
-
from
|
5 |
-
from
|
6 |
-
import
|
7 |
-
from
|
8 |
-
from
|
9 |
-
|
10 |
-
from
|
11 |
-
|
12 |
-
from
|
13 |
-
import
|
14 |
-
|
15 |
-
|
16 |
-
from
|
17 |
-
import
|
18 |
-
from
|
19 |
-
from
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
docs
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
with open(file.filename, 'wb') as f:
|
127 |
-
f.write(contents)
|
128 |
-
|
129 |
-
db = upload_file(file.filename)
|
130 |
-
result = predict(question, db)
|
131 |
-
return {"answer":result}
|
132 |
-
|
|
|
1 |
+
from fastapi import FastAPI
|
2 |
+
import os
|
3 |
+
from langchain.chat_models import ChatOpenAI
|
4 |
+
from langchain.prompts import PromptTemplate
|
5 |
+
from langchain.memory import ConversationBufferMemory
|
6 |
+
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
7 |
+
from langchain_core.output_parsers import StrOutputParser
|
8 |
+
from langchain_core.runnables import RunnablePassthrough
|
9 |
+
import os
|
10 |
+
from langchain_community.document_loaders import PyPDFLoader
|
11 |
+
import os
|
12 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
13 |
+
from langchain_community.embeddings.sentence_transformer import (
|
14 |
+
SentenceTransformerEmbeddings,
|
15 |
+
)
|
16 |
+
from langchain_chroma import Chroma
|
17 |
+
from sentence_transformers import SentenceTransformer
|
18 |
+
from langchain_core.messages import AIMessage, HumanMessage
|
19 |
+
from fastapi import FastAPI, Request, UploadFile, File
|
20 |
+
|
21 |
+
os.environ['HF_HOME'] = '/hug/cache/'
|
22 |
+
os.environ['TRANSFORMERS_CACHE'] = '/blabla/cache/'
|
23 |
+
|
24 |
+
app = FastAPI()
|
25 |
+
|
26 |
+
def predict(message, db):
|
27 |
+
|
28 |
+
llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)
|
29 |
+
template = """You are a general purpose chatbot. Be friendly and kind. Help people answer their questions. Use the context below to answer the questions
|
30 |
+
{context}
|
31 |
+
Question: {question}
|
32 |
+
Helpful Answer:"""
|
33 |
+
QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"],template=template,)
|
34 |
+
memory = ConversationBufferMemory(
|
35 |
+
memory_key="chat_history",
|
36 |
+
return_messages=True
|
37 |
+
)
|
38 |
+
|
39 |
+
retriever = db.as_retriever(k=3)
|
40 |
+
|
41 |
+
contextualize_q_system_prompt = """Given a chat history and the latest user question \
|
42 |
+
which might reference context in the chat history, formulate a standalone question \
|
43 |
+
which can be understood without the chat history. Do NOT answer the question, \
|
44 |
+
just reformulate it if needed and otherwise return it as is."""
|
45 |
+
contextualize_q_prompt = ChatPromptTemplate.from_messages(
|
46 |
+
[
|
47 |
+
("system", contextualize_q_system_prompt),
|
48 |
+
MessagesPlaceholder(variable_name="chat_history"),
|
49 |
+
("human", "{question}"),
|
50 |
+
]
|
51 |
+
)
|
52 |
+
contextualize_q_chain = contextualize_q_prompt | llm | StrOutputParser()
|
53 |
+
def contextualized_question(input: dict):
|
54 |
+
if input.get("chat_history"):
|
55 |
+
return contextualize_q_chain
|
56 |
+
else:
|
57 |
+
return input["question"]
|
58 |
+
|
59 |
+
rag_chain = (
|
60 |
+
RunnablePassthrough.assign(
|
61 |
+
context=contextualized_question | retriever
|
62 |
+
)
|
63 |
+
| QA_CHAIN_PROMPT
|
64 |
+
| llm
|
65 |
+
)
|
66 |
+
history = []
|
67 |
+
ai_msg = rag_chain.invoke({"question": message, "chat_history": history})
|
68 |
+
print(ai_msg)
|
69 |
+
bot_response = ai_msg.content.strip()
|
70 |
+
|
71 |
+
# Ensure history is correctly formatted as a list of tuples (user_message, bot_response)
|
72 |
+
history.append((HumanMessage(content=message), AIMessage(content=bot_response)))
|
73 |
+
|
74 |
+
docs = db.similarity_search(message,k=3)
|
75 |
+
extra = "\n" + "*"*100 + "\n"
|
76 |
+
additional_info = []
|
77 |
+
for d in docs:
|
78 |
+
citations = d.metadata["source"] + " pg." + str(d.metadata["page"])
|
79 |
+
additional_info = d.page_content
|
80 |
+
extra += citations + "\n" + additional_info + "\n" + "*"*100 + "\n"
|
81 |
+
# Return the bot's response and the updated history
|
82 |
+
return bot_response + extra
|
83 |
+
|
84 |
+
def upload_file(file_path):
|
85 |
+
|
86 |
+
loaders = []
|
87 |
+
print(file_path)
|
88 |
+
loaders.append(PyPDFLoader(file_path))
|
89 |
+
|
90 |
+
documents = []
|
91 |
+
for loader in loaders:
|
92 |
+
documents.extend(loader.load())
|
93 |
+
|
94 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=16)
|
95 |
+
docs = text_splitter.split_documents(documents)
|
96 |
+
|
97 |
+
model = "thenlper/gte-large"
|
98 |
+
embedding_function = SentenceTransformerEmbeddings(model_name=model)
|
99 |
+
print(f"Model's maximum sequence length: {SentenceTransformer(model).max_seq_length}")
|
100 |
+
collection_name = "Autism"
|
101 |
+
persist_directory = "./chroma"
|
102 |
+
print(len(docs))
|
103 |
+
db = Chroma.from_documents(docs, embedding_function)
|
104 |
+
print("Done Processing, you can query")
|
105 |
+
|
106 |
+
return db
|
107 |
+
|
108 |
+
|
109 |
+
class Item(BaseModel):
|
110 |
+
code: str
|
111 |
+
|
112 |
+
@app.get("/")
|
113 |
+
async def root():
|
114 |
+
return {"Code Review Automation":"Version 1.0 'First Draft'"}
|
115 |
+
|
116 |
+
@app.post("/UploadFile/")
|
117 |
+
def predict(question: str, file: UploadFile = File(...)):
|
118 |
+
contents = file.file.read()
|
119 |
+
with open(file.filename, 'wb') as f:
|
120 |
+
f.write(contents)
|
121 |
+
|
122 |
+
db = upload_file(file.filename)
|
123 |
+
result = predict(question, db)
|
124 |
+
return {"answer":result}
|
125 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|