Ahaduzzaman commited on
Commit
35d610e
·
1 Parent(s): b60194a

Upload 3 files

Browse files
Files changed (3) hide show
  1. api.py +113 -0
  2. cv.pdf +0 -0
  3. requirements.txt +0 -0
api.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify
2
+ import os
3
+ from PyPDF2 import PdfReader
4
+ import docx
5
+ from langchain.chat_models import ChatOpenAI
6
+ from langchain.text_splitter import CharacterTextSplitter
7
+ from langchain.embeddings.huggingface import HuggingFaceEmbeddings
8
+ from langchain.vectorstores import FAISS
9
+ from langchain.chains import ConversationalRetrievalChain
10
+ from langchain.memory import ConversationBufferMemory
11
+
12
+ app = Flask(__name__)
13
+
14
+ # Define a plapyceholder for your OpenAI API key (replace with your actual key)
15
+ openai_api_key = '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'
16
+
17
+ # Define the file paths of the files in your local directory
18
+ # Replace these paths with the actual paths of your files
19
+ file_paths = ["cv.pdf"]
20
+
21
+ # Initialize conversation chain globally
22
+ conversation_chain = None
23
+
24
+ def read_files(file_paths):
25
+ text = ""
26
+ for file_path in file_paths:
27
+ file_extension = os.path.splitext(file_path)[1]
28
+ if file_extension == ".pdf":
29
+ text += get_pdf_text(file_path)
30
+ elif file_extension == ".docx":
31
+ text += get_docx_text(file_path)
32
+ else:
33
+ text += get_csv_text(file_path)
34
+ return text
35
+
36
+ def get_pdf_text(pdf_path):
37
+ pdf_reader = PdfReader(pdf_path)
38
+ text = ""
39
+ for page in pdf_reader.pages:
40
+ text += page.extract_text()
41
+ return text
42
+
43
+ def get_docx_text(docx_path):
44
+ doc = docx.Document(docx_path)
45
+ all_text = []
46
+ for doc_para in doc.paragraphs:
47
+ all_text.append(doc_para.text)
48
+ text = ' '.join(all_text)
49
+ return text
50
+
51
+ def get_csv_text(csv_path):
52
+ # Placeholder for CSV processing logic
53
+ return "CSV content goes here"
54
+
55
+ def get_text_chunks(text):
56
+ text_splitter = CharacterTextSplitter(
57
+ separator="\n",
58
+ chunk_size=900,
59
+ chunk_overlap=100,
60
+ length_function=len
61
+ )
62
+ chunks = text_splitter.split_text(text)
63
+ return chunks
64
+
65
+ def get_vectorstore(text_chunks):
66
+ embeddings = HuggingFaceEmbeddings()
67
+ knowledge_base = FAISS.from_texts(text_chunks, embeddings)
68
+ return knowledge_base
69
+
70
+ def initialize_conversation_chain():
71
+ global conversation_chain
72
+ text_chunks = get_text_chunks(read_files(file_paths))
73
+ vector_store = get_vectorstore(text_chunks)
74
+ llm = ChatOpenAI(openai_api_key=openai_api_key, model_name='gpt-3.5-turbo', temperature=0.1)
75
+ memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
76
+ conversation_chain = ConversationalRetrievalChain.from_llm(
77
+ llm=llm,
78
+ retriever=vector_store.as_retriever(),
79
+ memory=memory
80
+ )
81
+
82
+ @app.route('/process_files', methods=['POST'])
83
+
84
+ def process_files():
85
+ global conversation_chain
86
+ if conversation_chain is None:
87
+ initialize_conversation_chain()
88
+
89
+ return jsonify({"message": "Files processed successfully."})
90
+
91
+ @app.route('/ask_question', methods=['POST'])
92
+ def ask_question():
93
+ user_question = request.form.get("question")
94
+
95
+ if not user_question:
96
+ return jsonify({"error": "Please provide a question."}), 400
97
+
98
+ if conversation_chain is None:
99
+ return jsonify({"error": "Conversation chain not initialized. Please process files first."}), 400
100
+
101
+ response = conversation_chain({'question': user_question})
102
+ chat_history = response['chat_history']
103
+ response_message = chat_history[-1].content
104
+
105
+ return jsonify({"response": response_message})
106
+
107
+ def handle_user_input(user_question):
108
+ with conversation_chain:
109
+ response = conversation_chain({'question': user_question})
110
+ return response['chat_history'][-1].content
111
+
112
+ if __name__ == '__main__':
113
+ app.run(debug=True)
cv.pdf ADDED
Binary file (582 kB). View file
 
requirements.txt ADDED
Binary file (150 Bytes). View file