Spaces:

thewise
/

Chat-w-git-Codellama

Running

File size: 5,657 Bytes

import os
import sys
import docarray
sys.path.append('../..')
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.vectorstores import DocArrayInMemorySearch
from langchain.document_loaders import TextLoader
from langchain.chains import RetrievalQA,  ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.chat_models import ChatOllama
from langchain.document_loaders import TextLoader
from langchain.document_loaders import GitLoader
from langchain.memory import ConversationBufferMemory, ConversationBufferWindowMemory
from langchain.vectorstores import Chroma
from langchain.embeddings import OllamaEmbeddings
from langchain.prompts import PromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate, AIMessagePromptTemplate, ChatPromptTemplate
import datetime
import shutil

# Function to load the data from github using langchain with string type url, string type branch, string type file_filter
def loader(url: str, branch: str, file_filter: str):
    repo_path = "./github_repo"
    if os.path.exists(repo_path):
        shutil.rmtree(repo_path)

    loader = GitLoader(
    clone_url= url,
    repo_path="./github_repo/",
    branch=branch,
    file_filter=lambda file_path: file_path.endswith(tuple(file_filter.split(','))) # Filter out files in Data but whole repo is cloned
    )

    data = loader.load()
    return data
    

#Function to split the data into chunks using recursive character text splitter
def split_data(data):
    splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=150,
            length_function=len,  # Function to measure the length of chunks while splitting
            add_start_index=True  # Include the starting position of each chunk in metadata
    )
    chunks = splitter.split_documents(data)
    return chunks

#Function to ingest the chunks into a vectorstore of doc
def ingest_chunks(chunks):
    embedding = OllamaEmbeddings(
    base_url='https://thewise-ollama-server.hf.space',
    model="nomic-embed-text",
)
    vector_store = DocArrayInMemorySearch.from_documents(chunks, embedding)

    repo_path = "./github_repo"
    if os.path.exists(repo_path):
        shutil.rmtree(repo_path)

    return vector_store

#Retreival function to get the data from the database and reply to the user
def retreival(vector_store, k):
    #Creating LLM
    llm = ChatOllama(
    base_url='https://thewise-ollama-server.hf.space',
    model="codellama")
                     
    # Define the system message template
    #Adding CHAT HISTORY to the System template explicitly because mainly Chat history goes to Condense the Human Question with Backround (Not template), but System template goes straight the LLM Chain
    #Explicitly adding chat history to access previous chats and answer "what is my previous question?"
    #Great thing this also sends the chat history to the LLM Model along with the context and question
    system_template = """You're a code summarisation assistant. Given the following extracted parts of a long document as "CONTEXT" create a final answer.
    If you don't know the answer, just say that you don't know. Don't try to make up an answer.
    Only If asked to create a "DIAGRAM" for code use "MERMAID SYNTAX LANGUAGE" in your answer from "CONTEXT" and "CHAT HISTORY" with a short explanation of diagram.
    CONTEXT: {context}
    =======
    CHAT HISTORY: {chat_history}
    =======
    FINAL ANSWER:"""

    human_template = """{question}"""

    # ai_template = """
    # FINAL ANSWER:"""

    # Create the chat prompt templates
    messages = [
    SystemMessagePromptTemplate.from_template(system_template),
    HumanMessagePromptTemplate.from_template(human_template)
    # AIMessagePromptTemplate.from_template(ai_template)
    ]

    PROMPT = ChatPromptTemplate.from_messages(messages)

    #Creating memory
    # memory = ConversationBufferMemory(
    #         memory_key="chat_history",
    #         input_key="question",
    #         output_key="answer",
    #         return_messages=True)

    memory = ConversationBufferWindowMemory(
            memory_key="chat_history",
            input_key="question",
            output_key="answer",
            return_messages=True,
            k=5)

    #Creating the retriever, this can also be a contextual compressed retriever
    retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": k}) #search_type can be "similarity" or "mmr"

    chain = ConversationalRetrievalChain.from_llm(
        llm=llm, 
        chain_type="stuff", #chain type can be refine, stuff, map_reduce
        retriever=retriever, 
        memory=memory,
        return_source_documents=True, #When used these 2 properties, the output gets 3 properties: answer, source_document, source_document_score and then have to speocify input and output key in memory for it to work
        combine_docs_chain_kwargs=dict({"prompt": PROMPT})
        )

    return chain

#Class using all above components to create QA system
class ConversationalResponse:
    def __init__(self, url, branch, file_filter):
        self.url = url
        self.branch = branch
        self.file_filter = file_filter
        self.data = loader(self.url, self.branch, self.file_filter)
        self.chunks = split_data(self.data)
        self.vector_store = ingest_chunks(self.chunks)
        self.chain_type = "stuff"
        self.k = 10
        self.chain = retreival(self.vector_store, self.k)

    def __call__(self, question):
        agent = self.chain(question)
        return agent['answer']