Spaces:
Build error
Build error
import streamlit as st | |
from langchain import PromptTemplate | |
from langchain_community.llms import LlamaCpp | |
from langchain.chains import RetrievalQA | |
from langchain_community.embeddings import SentenceTransformerEmbeddings | |
from qdrant_client import QdrantClient | |
from langchain_community.vectorstores import Qdrant | |
import os | |
import json | |
from huggingface_hub import hf_hub_download | |
from langchain.retrievers import EnsembleRetriever | |
# from ingest import ClinicalBertEmbeddings, keyword_retriever | |
from langchain_community.llms import CTransformers | |
from transformers import AutoTokenizer, AutoModel | |
# # Initialize Streamlit app | |
# st.set_page_config(page_title="Document Retrieval App", layout='wide') | |
# # Download and initialize LLM model | |
# MODEL_PATH = './' | |
# # Some basic configurations for the model | |
# config = { | |
# "max_new_tokens": 2048, | |
# "context_length": 4096, | |
# "repetition_penalty": 1.1, | |
# "temperature": 0.5, | |
# "top_k": 50, | |
# "top_p": 0.9, | |
# "stream": True, | |
# "threads": int(os.cpu_count() / 2) | |
# } | |
# # We use Langchain's CTransformers llm class to load our quantized model | |
# llm = CTransformers(model=MODEL_PATH, | |
# config=config) | |
# # Tokenizer for Mistral-7B-Instruct from HuggingFace | |
# tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1") | |
# model_name = "aaditya/OpenBioLLM-Llama3-8B-GGUF" | |
# model_file = "openbiollm-llama3-8b.Q5_K_M.gguf" | |
# model_path = hf_hub_download(model_name, filename=model_file, local_dir='./') | |
# local_llm = "openbiollm-llama3-8b.Q5_K_M.gguf" | |
# llm = LlamaCpp( | |
# model_path=local_llm, | |
# temperature=0.3, | |
# n_ctx=2048, | |
# top_p=1 | |
# ) | |
# st.sidebar.title("Document Retrieval App") | |
# # Initialize embeddings | |
# embeddings = ClinicalBertEmbeddings() | |
# # Qdrant setup for medical_image collection | |
# url = "http://localhost:6333" | |
# client_medical = QdrantClient(url=url, prefer_grpc=False) | |
# db_medical = Qdrant(client=client_medical, embeddings=embeddings, collection_name="medical_image") | |
# # Qdrant setup for pdf collection | |
# client_pdf = QdrantClient(url=url, prefer_grpc=False) | |
# db_pdf = Qdrant(client=client_pdf, embeddings=embeddings, collection_name="pdf") | |
# # Define retrievers for both collections | |
# retriever_medical = db_medical.as_retriever(search_kwargs={"k": 1}) | |
# retriever_pdf = db_pdf.as_retriever(search_kwargs={"k": 1}) | |
# # Ensemble retriever combining both retrievers | |
# ensemble_retriever = EnsembleRetriever(retrievers=[retriever_medical, retriever_pdf], weights=[0.5, 0.5]) | |
# # Prompt template for querying | |
# prompt_template = """Use the following pieces of information to answer the user's question. | |
# If you don't know the answer, just say that you don't know, don't try to make up an answer. | |
# Context: {context} | |
# Question: {question} | |
# Only return the helpful answer. Answer must be detailed and well explained. | |
# Helpful answer: | |
# """ | |
# prompt = PromptTemplate(template=prompt_template, input_variables=['context', 'question']) | |
# # Streamlit app layout | |
# with st.sidebar: | |
# query = st.text_area("Enter your query here:") | |
# if st.button("Get Response"): | |
# st.write("Processing query...") | |
# chain_type_kwargs = {"prompt": prompt} | |
# qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=ensemble_retriever, return_source_documents=True, chain_type_kwargs=chain_type_kwargs, verbose=True) | |
# response = qa(query) | |
# # Process response to extract answer, source document, and metadata | |
# answer = response['result'] | |
# source_document = response['source_documents'][0].page_content | |
# doc = response['source_documents'][0].metadata['source'] | |
# # Display response | |
# st.subheader("Answer:") | |
# st.write(answer) | |
# st.subheader("Source Document:") | |
# st.write(source_document) | |
# st.subheader("Document Metadata:") | |
# st.write(doc) | |
# # Run the app | |
# if __name__ == '__main__': | |
# st.title("Document Retrieval App") | |
# st.write("Enter your query in the sidebar and click 'Get Response' to retrieve relevant documents.") | |
# Define model and prompt template | |
# Set your Hugging Face API token | |
os.environ['HUGGINGFACE_HUB_TOKEN'] = '' | |
model_name = "mistralai/Mistral-7B-Instruct-v0.1" | |
model_file = "mistral-7b-instruct.q4_0.bin" | |
model_path = hf_hub_download(model_name, filename=model_file, local_dir='./', use_auth_token='HUGGINGFACE_HUB_TOKEN') | |