from flask import Flask, request, jsonify
import os
from PyPDF2 import PdfReader
import docx
from langchain.chat_models import ChatOpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory

app = Flask(__name__)

# Define a plapyceholder for your OpenAI API key (replace with your actual key)
openai_api_key = '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'

# Define the file paths of the files in your local directory
# Replace these paths with the actual paths of your files
file_paths = ["cv.pdf"]

# Initialize conversation chain globally
conversation_chain = None

def read_files(file_paths):
    text = ""
    for file_path in file_paths:
        file_extension = os.path.splitext(file_path)[1]
        if file_extension == ".pdf":
            text += get_pdf_text(file_path)
        elif file_extension == ".docx":
            text += get_docx_text(file_path)
        else:
            text += get_csv_text(file_path)
    return text

def get_pdf_text(pdf_path):
    pdf_reader = PdfReader(pdf_path)
    text = ""
    for page in pdf_reader.pages:
        text += page.extract_text()
    return text

def get_docx_text(docx_path):
    doc = docx.Document(docx_path)
    all_text = []
    for doc_para in doc.paragraphs:
        all_text.append(doc_para.text)
    text = ' '.join(all_text)
    return text

def get_csv_text(csv_path):
    # Placeholder for CSV processing logic
    return "CSV content goes here"

def get_text_chunks(text):
    text_splitter = CharacterTextSplitter(
        separator="\n",
        chunk_size=900,
        chunk_overlap=100,
        length_function=len
    )
    chunks = text_splitter.split_text(text)
    return chunks

def get_vectorstore(text_chunks):
    embeddings = HuggingFaceEmbeddings()
    knowledge_base = FAISS.from_texts(text_chunks, embeddings)
    return knowledge_base

def initialize_conversation_chain():
    global conversation_chain
    text_chunks = get_text_chunks(read_files(file_paths))
    vector_store = get_vectorstore(text_chunks)
    llm = ChatOpenAI(openai_api_key=openai_api_key, model_name='gpt-3.5-turbo', temperature=0.1)
    memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
    conversation_chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=vector_store.as_retriever(),
        memory=memory
    )

@app.route('/process_files', methods=['POST'])

def process_files():
    global conversation_chain
    if conversation_chain is None:
        initialize_conversation_chain()

    return jsonify({"message": "Files processed successfully."})

@app.route('/ask_question', methods=['POST'])
def ask_question():
    user_question = request.form.get("question")

    if not user_question:
        return jsonify({"error": "Please provide a question."}), 400

    if conversation_chain is None:
        return jsonify({"error": "Conversation chain not initialized. Please process files first."}), 400

    response = conversation_chain({'question': user_question})
    chat_history = response['chat_history']
    response_message = chat_history[-1].content

    return jsonify({"response": response_message})

def handle_user_input(user_question):
    with conversation_chain:
        response = conversation_chain({'question': user_question})
    return response['chat_history'][-1].content

if __name__ == '__main__':
    app.run(debug=True)