Spaces:

Ahaduzzaman
/

chatpdf

Running

App Files Files Community

Ahaduzzaman commited on Sep 21, 2023

Commit

35d610e

1 Parent(s): b60194a

Upload 3 files

Browse files

Files changed (3) hide show

api.py +113 -0
cv.pdf +0 -0
requirements.txt +0 -0

api.py ADDED Viewed

	@@ -0,0 +1,113 @@

+from flask import Flask, request, jsonify
+import os
+from PyPDF2 import PdfReader
+import docx
+from langchain.chat_models import ChatOpenAI
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.embeddings.huggingface import HuggingFaceEmbeddings
+from langchain.vectorstores import FAISS
+from langchain.chains import ConversationalRetrievalChain
+from langchain.memory import ConversationBufferMemory
+app = Flask(__name__)
+# Define a plapyceholder for your OpenAI API key (replace with your actual key)
+openai_api_key = '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'
+# Define the file paths of the files in your local directory
+# Replace these paths with the actual paths of your files
+file_paths = ["cv.pdf"]
+# Initialize conversation chain globally
+conversation_chain = None
+def read_files(file_paths):
+    text = ""
+    for file_path in file_paths:
+        file_extension = os.path.splitext(file_path)[1]
+        if file_extension == ".pdf":
+            text += get_pdf_text(file_path)
+        elif file_extension == ".docx":
+            text += get_docx_text(file_path)
+        else:
+            text += get_csv_text(file_path)
+    return text
+def get_pdf_text(pdf_path):
+    pdf_reader = PdfReader(pdf_path)
+    text = ""
+    for page in pdf_reader.pages:
+        text += page.extract_text()
+    return text
+def get_docx_text(docx_path):
+    doc = docx.Document(docx_path)
+    all_text = []
+    for doc_para in doc.paragraphs:
+        all_text.append(doc_para.text)
+    text = ' '.join(all_text)
+    return text
+def get_csv_text(csv_path):
+    # Placeholder for CSV processing logic
+    return "CSV content goes here"
+def get_text_chunks(text):
+    text_splitter = CharacterTextSplitter(
+        separator="\n",
+        chunk_size=900,
+        chunk_overlap=100,
+        length_function=len
+    )
+    chunks = text_splitter.split_text(text)
+    return chunks
+def get_vectorstore(text_chunks):
+    embeddings = HuggingFaceEmbeddings()
+    knowledge_base = FAISS.from_texts(text_chunks, embeddings)
+    return knowledge_base
+def initialize_conversation_chain():
+    global conversation_chain
+    text_chunks = get_text_chunks(read_files(file_paths))
+    vector_store = get_vectorstore(text_chunks)
+    llm = ChatOpenAI(openai_api_key=openai_api_key, model_name='gpt-3.5-turbo', temperature=0.1)
+    memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
+    conversation_chain = ConversationalRetrievalChain.from_llm(
+        llm=llm,
+        retriever=vector_store.as_retriever(),
+        memory=memory
+    )
+@app.route('/process_files', methods=['POST'])
+def process_files():
+    global conversation_chain
+    if conversation_chain is None:
+        initialize_conversation_chain()
+    return jsonify({"message": "Files processed successfully."})
+@app.route('/ask_question', methods=['POST'])
+def ask_question():
+    user_question = request.form.get("question")
+    if not user_question:
+        return jsonify({"error": "Please provide a question."}), 400
+    if conversation_chain is None:
+        return jsonify({"error": "Conversation chain not initialized. Please process files first."}), 400
+    response = conversation_chain({'question': user_question})
+    chat_history = response['chat_history']
+    response_message = chat_history[-1].content
+    return jsonify({"response": response_message})
+def handle_user_input(user_question):
+    with conversation_chain:
+        response = conversation_chain({'question': user_question})
+    return response['chat_history'][-1].content
+if __name__ == '__main__':
+    app.run(debug=True)

cv.pdf ADDED Viewed

Binary file (582 kB). View file

requirements.txt ADDED Viewed

Binary file (150 Bytes). View file