from flask import Flask, request, jsonify import os from PyPDF2 import PdfReader import docx from langchain.chat_models import ChatOpenAI from langchain.text_splitter import CharacterTextSplitter from langchain.embeddings.huggingface import HuggingFaceEmbeddings from langchain.vectorstores import FAISS from langchain.chains import ConversationalRetrievalChain from langchain.memory import ConversationBufferMemory app = Flask(__name__) # Define a plapyceholder for your OpenAI API key (replace with your actual key) openai_api_key = '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' # Define the file paths of the files in your local directory # Replace these paths with the actual paths of your files file_paths = ["cv.pdf"] # Initialize conversation chain globally conversation_chain = None def read_files(file_paths): text = "" for file_path in file_paths: file_extension = os.path.splitext(file_path)[1] if file_extension == ".pdf": text += get_pdf_text(file_path) elif file_extension == ".docx": text += get_docx_text(file_path) else: text += get_csv_text(file_path) return text def get_pdf_text(pdf_path): pdf_reader = PdfReader(pdf_path) text = "" for page in pdf_reader.pages: text += page.extract_text() return text def get_docx_text(docx_path): doc = docx.Document(docx_path) all_text = [] for doc_para in doc.paragraphs: all_text.append(doc_para.text) text = ' '.join(all_text) return text def get_csv_text(csv_path): # Placeholder for CSV processing logic return "CSV content goes here" def get_text_chunks(text): text_splitter = CharacterTextSplitter( separator="\n", chunk_size=900, chunk_overlap=100, length_function=len ) chunks = text_splitter.split_text(text) return chunks def get_vectorstore(text_chunks): embeddings = HuggingFaceEmbeddings() knowledge_base = FAISS.from_texts(text_chunks, embeddings) return knowledge_base def initialize_conversation_chain(): global conversation_chain text_chunks = get_text_chunks(read_files(file_paths)) vector_store = get_vectorstore(text_chunks) llm = ChatOpenAI(openai_api_key=openai_api_key, model_name='gpt-3.5-turbo', temperature=0.1) memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True) conversation_chain = ConversationalRetrievalChain.from_llm( llm=llm, retriever=vector_store.as_retriever(), memory=memory ) @app.route('/process_files', methods=['POST']) def process_files(): global conversation_chain if conversation_chain is None: initialize_conversation_chain() return jsonify({"message": "Files processed successfully."}) @app.route('/ask_question', methods=['POST']) def ask_question(): user_question = request.form.get("question") if not user_question: return jsonify({"error": "Please provide a question."}), 400 if conversation_chain is None: return jsonify({"error": "Conversation chain not initialized. Please process files first."}), 400 response = conversation_chain({'question': user_question}) chat_history = response['chat_history'] response_message = chat_history[-1].content return jsonify({"response": response_message}) def handle_user_input(user_question): with conversation_chain: response = conversation_chain({'question': user_question}) return response['chat_history'][-1].content if __name__ == '__main__': app.run(debug=True)