import os
import re
from openai import OpenAI
from langchain_openai import ChatOpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import UnstructuredWordDocumentLoader as DocxLoader
from fastapi.middleware.cors import CORSMiddleware
from fastapi import FastAPI, Request
from pydantic import BaseModel
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
import nltk
import time

# Set writable paths for cache and data
cache_dir = '/tmp'
nltk_data_path = os.path.join(cache_dir, 'nltk_data')

# Configure NLTK and other library paths
os.environ['TRANSFORMERS_CACHE'] = os.path.join(cache_dir, 'transformers_cache')
os.environ['HF_HOME'] = os.path.join(cache_dir, 'huggingface')
os.environ['XDG_CACHE_HOME'] = cache_dir

# Add NLTK data path
nltk.data.path.append(nltk_data_path)

# Ensure the directory exists
try:
    os.makedirs(nltk_data_path, exist_ok=True)
except OSError as e:
    print(f"Error creating directory {nltk_data_path}: {e}")
    raise

# Download required NLTK resources
try:
    nltk.download('punkt', download_dir=nltk_data_path)
    print("NLTK 'punkt' resource downloaded successfully.")
except Exception as e:
    print(f"Error downloading NLTK resources: {e}")
    raise

def clean_response(response):
    cleaned = response.strip()
    cleaned = re.sub(r'^\"|\"$', '', cleaned)
    cleaned = re.sub(r'\n+', '\n', cleaned)
    cleaned = cleaned.replace('\\n', '')
    return cleaned

app = FastAPI()

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

openai_api_key = os.environ.get('OPENAI_API_KEY')
llm = ChatOpenAI(
    api_key=openai_api_key,
    model_name="gpt-4-turbo-preview",
    temperature=0.7
)

conversation_history = {}  # Dictionary to maintain contextual memory

@app.get("/")
def read_root():
    return {"Hello": "World"}

class Query(BaseModel):
    session_id: str  # Unique identifier for user session
    query_text: str

prompt_template = ChatPromptTemplate.from_template(
    """
    You are a helpful assistant designed specifically for the Thapar Institute of Engineering and Technology (TIET), a renowned technical college. Your task is to answer all queries related to TIET. Every response you provide should be relevant to the context of TIET. If a question falls outside of this context, please decline by stating, 'Sorry, I cannot help with that.'

    If the query is not related to TIET or falls outside the context of education, respond with:
    "Sorry, I cannot help with that. I'm specifically designed to answer questions about the Thapar Institute of Engineering and Technology. For more information, please contact at our toll-free number: 18002024100 or E-mail us at admissions@thapar.edu"

    <context>
    {context}
    </context>
    Question: {input}
    """
)

def vector_embedding():
    try:
        file_path = "./data/Data.docx"
        if not os.path.exists(file_path):
            print(f"The file {file_path} does not exist.")
            return {"response": "Error: Data file not found"}

        loader = DocxLoader(file_path)
        documents = loader.load()

        print(f"Loaded document: {file_path}")

        text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
        chunks = text_splitter.split_documents(documents)

        print(f"Created {len(chunks)} chunks.")

        model_name = "BAAI/bge-base-en"
        encode_kwargs = {'normalize_embeddings': True}
        model_norm = HuggingFaceBgeEmbeddings(model_name=model_name, encode_kwargs=encode_kwargs)

        db = FAISS.from_documents(chunks, model_norm)
        db.save_local("./vectors_db")

        print("Vector store created and saved successfully.")
        return {"response": "Vector Store DB Is Ready"}

    except Exception as e:
        print(f"An error occurred: {str(e)}")
        return {"response": f"Error: {str(e)}"}

def get_embeddings():
    model_name = "BAAI/bge-base-en"
    encode_kwargs = {'normalize_embeddings': True}
    model_norm = HuggingFaceBgeEmbeddings(model_name=model_name, encode_kwargs=encode_kwargs)
    return model_norm

@app.post("/chat")
def chat_endpoint(query: Query):
    try:
        session_id = query.session_id
        if session_id not in conversation_history:
            conversation_history[session_id] = []

        embeddings = get_embeddings()
        vectors = FAISS.load_local("./vectors_db", embeddings, allow_dangerous_deserialization=True)
    except Exception as e:
        print(f"Error loading vector store: {str(e)}")
        return {"response": "Vector Store Not Found or Error Loading. Please run /setup first."}

    prompt1 = query.query_text
    if prompt1:
        start = time.process_time()
        document_chain = create_stuff_documents_chain(llm, prompt_template)
        retriever = vectors.as_retriever()
        retrieval_chain = create_retrieval_chain(retriever, document_chain)

        # Combine context from conversation history
        context = "\n".join(conversation_history[session_id])
        response = retrieval_chain.invoke({'input': prompt1, 'context': context})

        cleaned_response = clean_response(response['answer'])

        # Update conversation history
        conversation_history[session_id].append(f"User: {prompt1}")
        conversation_history[session_id].append(f"Assistant: {cleaned_response}")

        print("Response time:", time.process_time() - start)
        return {"response": cleaned_response}
    else:
        return {"response": "No Query Found"}

@app.get("/setup")
def setup():
    return vector_embedding()

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)