Spaces:
Sleeping
Sleeping
import os | |
import gradio as gr | |
from langchain.chat_models import AzureChatOpenAI | |
from langchain.schema import format_document | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
from langchain.prompts import PromptTemplate | |
from langchain.prompts import ChatPromptTemplate | |
from langchain.schema.output_parser import StrOutputParser | |
from langchain.schema.runnable import RunnableLambda, RunnablePassthrough | |
from operator import itemgetter | |
# import socks | |
# import socket | |
# import requests | |
# # 设置 SOCKS5 代理和认证信息 | |
# socks.set_default_proxy(socks.SOCKS5, "sftp-v-proxy.szh.internet.bosch.com", 1080, True, 'zfn3wx_ftp', 'Bosch@123') | |
# | |
# # 将 socket 的默认连接重定向到 SOCKS5 代理 | |
# socket.socket = socks.socksocket | |
os.environ["OPENAI_API_KEY"] = '8b3bb832d6ef4a019a6fbddb4986cb9b' | |
os.environ["OPENAI_API_TYPE"] = 'azure' | |
os.environ["OPENAI_API_VERSION"] = '2023-07-01-preview' | |
os.environ["OPENAI_API_BASE"] = 'https://ostingpteu.openai.azure.com/' | |
llm = AzureChatOpenAI(deployment_name='OstinAIEU', model_name="gpt-35-turbo") | |
import time | |
from langchain.vectorstores import Weaviate | |
import weaviate | |
WEAVIATE_URL = 'http://40.81.20.137:8080' | |
client = weaviate.Client( | |
url=WEAVIATE_URL | |
) | |
embedding = OpenAIEmbeddings(deployment="ostinembedding") | |
vectordb = Weaviate(client=client, index_name="GS_data", text_key="text") | |
from langchain.retrievers.weaviate_hybrid_search import WeaviateHybridSearchRetriever | |
from langchain.schema import Document | |
# 定义元数据的过滤条件 | |
retriever = WeaviateHybridSearchRetriever( | |
client=client, | |
index_name="GS_data", | |
text_key="text", | |
attributes=['title', 'update_time', 'source_name', 'url'], | |
create_schema_if_missing=True, | |
k=5, | |
) | |
from typing import List | |
def _format_docs(docs: List[Document]) -> str: | |
buffer = '' | |
for doc in docs: | |
# Start with the document's title if available | |
# doc_string = f"Title: {doc.metadata.get('title', 'No Title')}\n" | |
# Iterate over all metadata key-value pairs | |
doc_string = '' | |
for key, value in doc.metadata.items(): | |
doc_string += f"{key.capitalize()}: {value}\n" | |
# Adding this document's string to the buffer | |
buffer += doc_string + '\n' # Added an extra newline for separation between documents | |
return buffer | |
DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}") | |
def _combine_documents( | |
docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, document_separator="\n\n" | |
): | |
doc_strings = [format_document(doc, document_prompt) for doc in docs] | |
return document_separator.join(doc_strings) | |
template = """"You are an expert, tasked to answer any question about Global Business Services (GS) . Using the | |
provided context, answer the user's question to the best of your ability using the resources provided. Generate a | |
comprehensive and informative answer (but no more than 80 words) for a given question based solely on the context. | |
Use an unbiased and journalistic tone. Combine search results together into a coherent answer. Do not repeat text | |
If there is nothing in the context relevant to the question at hand, just say "Sorry, I'm not sure. Could you provide | |
more information?" Don't try to make up an answer. You should use bullet points in your answer for readability." | |
{context} | |
Question: {question} | |
""" | |
ANSWER_PROMPT = ChatPromptTemplate.from_template(template) | |
def ans_format(ans) -> str: | |
answer = ans['answer'] | |
sources = ans['sources'] | |
return f"{answer} \n\n \n\nHere are the sources:\n{sources}" | |
# Now we retrieve the documents | |
retrieved_documents = RunnablePassthrough.assign(docs=itemgetter('question') | retriever) | |
# Now we construct the inputs for the final prompt | |
final_inputs = { | |
"context": lambda x: _combine_documents(x["docs"]), | |
"question": itemgetter("question"), | |
} | |
# And finally, we do the part that returns the answers | |
answer = { | |
"answer": final_inputs | ANSWER_PROMPT | llm, | |
"docs": itemgetter("docs"), | |
} | |
organized_ans = { | |
'ans': { | |
'answer': lambda x: x["answer"].content, | |
'sources': lambda x: _format_docs(x["docs"]), | |
} | |
| RunnableLambda(ans_format) | |
| StrOutputParser() | |
} | |
# And now we put it all together! | |
final_chain = retrieved_documents | answer | organized_ans | RunnablePassthrough() | |
def response(msg: str) -> str: | |
inp = {'question': msg} | |
return final_chain.invoke(inp)['ans'] | |
gr.Interface(fn=response, inputs=gr.Textbox(lines=2, placeholder="Ask Here..."), outputs="text").launch() | |