import os import re from openai import OpenAI from langchain_openai import ChatOpenAI from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.chains.combine_documents import create_stuff_documents_chain from langchain_core.prompts import ChatPromptTemplate from langchain.chains import create_retrieval_chain from langchain_community.vectorstores import FAISS from langchain_community.document_loaders import UnstructuredWordDocumentLoader as DocxLoader from fastapi.middleware.cors import CORSMiddleware from fastapi import FastAPI, Request from pydantic import BaseModel from langchain_community.embeddings import HuggingFaceBgeEmbeddings import nltk import time # Set writable paths for cache and data cache_dir = '/tmp' nltk_data_path = os.path.join(cache_dir, 'nltk_data') # Configure NLTK and other library paths os.environ['TRANSFORMERS_CACHE'] = os.path.join(cache_dir, 'transformers_cache') os.environ['HF_HOME'] = os.path.join(cache_dir, 'huggingface') os.environ['XDG_CACHE_HOME'] = cache_dir # Add NLTK data path nltk.data.path.append(nltk_data_path) # Ensure the directory exists try: os.makedirs(nltk_data_path, exist_ok=True) except OSError as e: print(f"Error creating directory {nltk_data_path}: {e}") raise # Download required NLTK resources try: nltk.download('punkt', download_dir=nltk_data_path) print("NLTK 'punkt' resource downloaded successfully.") except Exception as e: print(f"Error downloading NLTK resources: {e}") raise def clean_response(response): cleaned = response.strip() cleaned = re.sub(r'^\"|\"$', '', cleaned) cleaned = re.sub(r'\n+', '\n', cleaned) cleaned = cleaned.replace('\\n', '') return cleaned app = FastAPI() app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) openai_api_key = os.environ.get('OPENAI_API_KEY') llm = ChatOpenAI( api_key=openai_api_key, model_name="gpt-4-turbo-preview", temperature=0.7 ) conversation_history = {} # Dictionary to maintain contextual memory @app.get("/") def read_root(): return {"Hello": "World"} class Query(BaseModel): session_id: str # Unique identifier for user session query_text: str prompt_template = ChatPromptTemplate.from_template( """ You are a helpful assistant designed specifically for the Thapar Institute of Engineering and Technology (TIET), a renowned technical college. Your task is to answer all queries related to TIET. Every response you provide should be relevant to the context of TIET. If a question falls outside of this context, please decline by stating, 'Sorry, I cannot help with that.' If the query is not related to TIET or falls outside the context of education, respond with: "Sorry, I cannot help with that. I'm specifically designed to answer questions about the Thapar Institute of Engineering and Technology. For more information, please contact at our toll-free number: 18002024100 or E-mail us at admissions@thapar.edu" {context} Question: {input} """ ) def vector_embedding(): try: file_path = "./data/Data.docx" if not os.path.exists(file_path): print(f"The file {file_path} does not exist.") return {"response": "Error: Data file not found"} loader = DocxLoader(file_path) documents = loader.load() print(f"Loaded document: {file_path}") text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100) chunks = text_splitter.split_documents(documents) print(f"Created {len(chunks)} chunks.") model_name = "BAAI/bge-base-en" encode_kwargs = {'normalize_embeddings': True} model_norm = HuggingFaceBgeEmbeddings(model_name=model_name, encode_kwargs=encode_kwargs) db = FAISS.from_documents(chunks, model_norm) db.save_local("./vectors_db") print("Vector store created and saved successfully.") return {"response": "Vector Store DB Is Ready"} except Exception as e: print(f"An error occurred: {str(e)}") return {"response": f"Error: {str(e)}"} def get_embeddings(): model_name = "BAAI/bge-base-en" encode_kwargs = {'normalize_embeddings': True} model_norm = HuggingFaceBgeEmbeddings(model_name=model_name, encode_kwargs=encode_kwargs) return model_norm @app.post("/chat") def chat_endpoint(query: Query): try: session_id = query.session_id if session_id not in conversation_history: conversation_history[session_id] = [] embeddings = get_embeddings() vectors = FAISS.load_local("./vectors_db", embeddings, allow_dangerous_deserialization=True) except Exception as e: print(f"Error loading vector store: {str(e)}") return {"response": "Vector Store Not Found or Error Loading. Please run /setup first."} prompt1 = query.query_text if prompt1: start = time.process_time() document_chain = create_stuff_documents_chain(llm, prompt_template) retriever = vectors.as_retriever() retrieval_chain = create_retrieval_chain(retriever, document_chain) # Combine context from conversation history context = "\n".join(conversation_history[session_id]) response = retrieval_chain.invoke({'input': prompt1, 'context': context}) cleaned_response = clean_response(response['answer']) # Update conversation history conversation_history[session_id].append(f"User: {prompt1}") conversation_history[session_id].append(f"Assistant: {cleaned_response}") print("Response time:", time.process_time() - start) return {"response": cleaned_response} else: return {"response": "No Query Found"} @app.get("/setup") def setup(): return vector_embedding() if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=8000)