Redmind_GPT_API_Aug1

Sleeping

App Files Files Community

lakshmivairamani commited on Jul 22, 2024

Commit

8bac072

verified ·

1 Parent(s): b534452

Upload 16 files

Browse files

Files changed (16) hide show

config/__pycache__/settings.cpython-310.pyc +0 -0
config/__pycache__/settings.cpython-311.pyc +0 -0
config/__pycache__/settings.cpython-312.pyc +0 -0
config/settings.py +4 -0
logs/redmindgen.log +0 -0
services/__pycache__/chat_service.cpython-310.pyc +0 -0
services/__pycache__/chat_service.cpython-311.pyc +0 -0
services/__pycache__/chat_service.cpython-312.pyc +0 -0
services/__pycache__/file_upload_service.cpython-310.pyc +0 -0
services/__pycache__/file_upload_service.cpython-312.pyc +0 -0
services/__pycache__/multidoc_files_upload.cpython-310.pyc +0 -0
services/__pycache__/multidoc_files_upload.cpython-311.pyc +0 -0
services/chat_service.py +137 -0
services/file_upload_service.py +141 -0
static/img/AI.jpg +0 -0
static/img/redmindlogo3.jpg +0 -0

config/__pycache__/settings.cpython-310.pyc ADDED Viewed

Binary file (424 Bytes). View file

config/__pycache__/settings.cpython-311.pyc ADDED Viewed

Binary file (511 Bytes). View file

config/__pycache__/settings.cpython-312.pyc ADDED Viewed

Binary file (444 Bytes). View file

config/settings.py ADDED Viewed

	@@ -0,0 +1,4 @@

+import os
+class Settings:
+    DB_URI = "mysql+mysqlconnector://redmindgen:51(xtzb0z_P8wRkowkDGQe@167.71.75.10:3306/collegedb"

logs/redmindgen.log ADDED Viewed

The diff for this file is too large to render. See raw diff

services/__pycache__/chat_service.cpython-310.pyc ADDED Viewed

Binary file (6.05 kB). View file

services/__pycache__/chat_service.cpython-311.pyc ADDED Viewed

Binary file (9.56 kB). View file

services/__pycache__/chat_service.cpython-312.pyc ADDED Viewed

Binary file (8.75 kB). View file

services/__pycache__/file_upload_service.cpython-310.pyc ADDED Viewed

Binary file (5.25 kB). View file

services/__pycache__/file_upload_service.cpython-312.pyc ADDED Viewed

Binary file (8.64 kB). View file

services/__pycache__/multidoc_files_upload.cpython-310.pyc ADDED Viewed

Binary file (4.42 kB). View file

services/__pycache__/multidoc_files_upload.cpython-311.pyc ADDED Viewed

Binary file (8.14 kB). View file

services/chat_service.py ADDED Viewed

	@@ -0,0 +1,137 @@

+import os
+import logging
+from dotenv import load_dotenv
+from langchain.memory import ConversationSummaryMemory
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_community.utilities import SQLDatabase
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.runnables import RunnablePassthrough
+from langchain_openai import ChatOpenAI
+from langchain_openai import OpenAIEmbeddings
+from langchain.agents import create_tool_calling_agent, AgentExecutor, Tool
+from langchain_community.vectorstores import FAISS
+from config.settings import Settings
+# Load environment variables
+load_dotenv()
+open_api_key_token = os.getenv('OPENAI_API_KEY')
+#db_uri = os.getenv('POST_DB_URI')
+db_uri = Settings.DB_URI
+class ChatAgentService:
+    def __init__(self):
+        # Database setup
+        self.db = SQLDatabase.from_uri(db_uri)
+        self.llm = ChatOpenAI(model="gpt-3.5-turbo-0125", api_key=open_api_key_token,max_tokens=150,temperature=0.2)
+        self.memory = ConversationSummaryMemory(llm=self.llm, return_messages=True)
+        # Tools setup
+        self.tools = [
+            Tool(
+                name="DatabaseQuery",
+                func=self.database_tool,
+                description="Queries the SQL database using dynamically generated SQL queries based on user questions. Aimed to retrieve structured data like counts, specific records, or summaries from predefined schemas.",
+                tool_choice="required"
+            ),
+            Tool(
+                name="DocumentData",
+                func=self.document_data_tool,
+                description="Searches through indexed documents to find relevant information based on user queries. Handles unstructured data from various document formats like PDF, DOCX, or TXT files.",
+                tool_choice="required"
+            ),
+        ]
+        # Agent setup
+        prompt_template = self.setup_prompt()
+        self.agent = create_tool_calling_agent(self.llm.bind(memory=self.memory), self.tools, prompt_template)
+        self.agent_executor = AgentExecutor(agent=self.agent, tools=self.tools, memory=self.memory, verbose=True)
+    def setup_prompt(self):
+        prompt_template = f"""
+        You are an assistant that helps with database queries and document retrieval.
+        Please base your responses strictly on available data and avoid assumptions.
+        If the question pertains to numerical data or structured queries, use the DatabaseQuery tool.
+        If the question relates to content within various documents, use the DocumentData tool.
+        Question: {{input}}
+        {{agent_scratchpad}}
+        """
+        return ChatPromptTemplate.from_template(prompt_template)
+    def database_tool(self, question):
+        sql_query = self.generate_sql_query(question)
+        return self.run_query(sql_query)
+    def get_schema(self,_):
+        # print(self.db.get_table_info())
+        return self.db.get_table_info()
+    def generate_sql_query(self, question):
+        schema = self.get_schema(None)  # Get the schema using the function
+        template_query_generation = """Generate a SQL query to answer the user's question based on the available database schema.
+        {schema}
+        Question: {question}
+        SQL Query:"""
+        prompt_query_generation = ChatPromptTemplate.from_template(template_query_generation)
+        # Correctly setting up the initial data dictionary for the chain
+        input_data = {'question': question}
+        # Setup the chain correctly
+        sql_chain = (RunnablePassthrough.assign(schema=self.get_schema)
+                     | prompt_query_generation
+                     | self.llm.bind(stop="\nSQL Result:")
+                     | StrOutputParser())
+        # Make sure to invoke with an empty dictionary if all needed data is already assigned
+        return sql_chain.invoke(input_data)
+    def run_query(self, query):
+        try:
+            logging.info(f"Executing SQL query: {query}")
+            result = self.db.run(query)
+            logging.info(f"Query successful: {result}")
+            return result
+        except Exception as e:
+            logging.error(f"Error executing query: {query}, Error: {str(e)}")
+            return None
+    def document_data_tool(self, query):
+        try:
+            logging.info(f"Searching documents for query: {query}")
+            embeddings = OpenAIEmbeddings(api_key=open_api_key_token)
+            index_paths = self.find_index_for_document(query)
+            responses = []
+            for index_path in index_paths:
+                vector_store = FAISS.load_local(index_path, embeddings, allow_dangerous_deserialization=True)
+                response = self.query_vector_store(vector_store, query)
+                responses.append(response)
+            logging.info(f"Document search results: {responses}")
+            return "\n".join(responses)
+        except Exception as e:
+            logging.error(f"Error in document data tool for query: {query}, Error: {str(e)}")
+            return "Error processing document query."
+    def find_index_for_document(self, query):
+        base_path = os.getenv('VECTOR_DB_PATH')
+        # document_hint = self.extract_document_hint(query)
+        index_paths = []
+        for root, dirs, files in os.walk(base_path):
+            for dir in dirs:
+                if 'index.faiss' in os.listdir(os.path.join(root, dir)):
+                    index_paths.append(os.path.join(root, dir, ''))
+        return index_paths
+    def query_vector_store(self, vector_store, query):
+        docs = vector_store.similarity_search(query)
+        return '\n\n'.join([doc.page_content for doc in docs])
+    def answer_question(self, user_question):
+        try:
+            logging.info(f"Received question: {user_question}")
+            response = self.agent_executor.invoke({"input": user_question})
+            output_response = response.get("output", "No valid response generated.")
+            logging.info(f"Response generated: {output_response}")
+            return output_response
+        except Exception as e:
+            logging.error(f"Error processing question: {user_question}, Error: {str(e)}")
+            return f"An error occurred: {str(e)}"

services/file_upload_service.py ADDED Viewed

	@@ -0,0 +1,141 @@

+import io
+import os
+import tempfile
+import hashlib
+import json
+import logging
+import pandas as pd
+from datetime import datetime
+from dotenv import load_dotenv
+from langchain_community.vectorstores import FAISS
+from langchain_openai import OpenAIEmbeddings
+from langchain.text_splitter import CharacterTextSplitter
+from PyPDF2 import PdfReader
+from docx import Document
+# from transformers import pipeline
+# Load environment variables
+load_dotenv()
+open_api_key_token = os.getenv('OPENAI_API_KEY')
+class FileHandler:
+    def __init__(self, vector_db_path):
+        self.vector_db_path = vector_db_path
+        self.embeddings = OpenAIEmbeddings(api_key=open_api_key_token)
+        # self.summarizer = pipeline("summarization")
+    def prepare_metadata_string(self, document_name, document_description, department, version, last_updated):
+        metadata_string = f"\nDocument Name: {document_name}\nDocument Description: {document_description}\nDepartment: {department}\nVersion: {version}\nLast Updated: {last_updated}"
+        return metadata_string
+    async def handle_file_upload(self, file, document_name, document_description, department, version, last_updated):
+        content = await file.read()
+        file_hash = hashlib.md5(content).hexdigest()
+        file_key = f"{file.filename}_{file_hash}"
+        vector_store_path = os.path.join(self.vector_db_path, f"{file_key}.vectorstore")
+        metadata_path = os.path.join(self.vector_db_path, f"{file_key}.metadata.json")
+        metadata_string = self.prepare_metadata_string(document_name, document_description, department, version,
+                                                       last_updated)
+        if os.path.exists(vector_store_path) and os.path.exists(metadata_path):
+            with open(metadata_path, 'r') as md_file:
+                metadata = json.load(md_file)
+            return {'path': vector_store_path, 'metadata': metadata, 'status': 'skipped - duplicate'}
+        if file.filename.endswith('.csv') or file.filename.endswith('.xlsx'):
+            texts = self.load_and_split_table(content, file.filename,metadata_string)
+        else:
+            texts = await self.load_and_split_text(content, file.filename,metadata_string)
+        vector_store = self.create_vector_store(texts)
+        vector_store.save_local(vector_store_path)
+        metadata = {
+            'filename': file.filename,
+            'document_name': document_name,
+            'document_description': document_description,
+            'department': department,
+            'version': version,
+            'last_updated': last_updated,
+            'hash': file_hash,
+            'upload_date': datetime.now().isoformat(),
+            'file_path': vector_store_path,
+            'file_size': len(content),
+            'content_type': file.content_type
+        }
+        with open(metadata_path, 'w') as md_file:
+            json.dump(metadata, md_file)
+        return {"message": "File processed and vector store created successfully", "file_metadata": metadata}
+    def summarize_text(self, text):
+        try:
+            summary = self.summarizer(text, max_length=150, min_length=10, do_sample=False)
+            logging.info("Text summarization successful")
+            return summary[0]['summary_text']
+        except Exception as e:
+            logging.error(f"Error in summarization: {str(e)}")
+            # Log error or handle exception
+            return text  # Return original text if summarization is not possible
+    def load_and_split_table(self, content, filename,metadata_string):
+        # Handle CSV and Excel file reading
+        if filename.endswith('.csv'):
+            df = pd.read_csv(io.StringIO(content.decode('utf-8')))
+        else:  # Excel
+            df = pd.read_excel(io.BytesIO(content))
+        text = df.to_string(index=False)  # Convert DataFrame to string
+        text += metadata_string  # Append metadata to the text
+        return self.split_text(text)
+    async def load_and_split_text(self, content, filename,metadata_string):
+        with tempfile.NamedTemporaryFile(delete=False, mode='w+b', suffix=f"_{filename}") as temp_file:
+            temp_file.write(content)
+            temp_file.flush()
+            temp_file_path = temp_file.name
+        # Ensure the temp file is closed before reading from it
+        if filename.endswith('.pdf'):
+            texts = await self.load_and_split_pdf(temp_file_path,metadata_string)
+        elif filename.endswith('.docx'):
+            texts = await self.load_and_split_docx(temp_file_path,metadata_string)
+        elif filename.endswith('.txt'):
+            texts = await self.load_and_split_txt(temp_file_path,metadata_string)
+        # Apply summarization here to each text segment
+        # summarized_texts = [self.summarize_text(text) for text in texts]
+        # os.unlink(temp_file_path)  # Explicitly remove the temporary file
+        # return summarized_texts
+        os.unlink(temp_file_path)  # Explicitly remove the temporary file
+        return texts
+    async def load_and_split_pdf(self, pdf_path,metadata_string):
+        reader = PdfReader(pdf_path)
+        text = ''
+        for page in reader.pages:
+            text += page.extract_text() or ""
+            text += metadata_string  # Append metadata to the text
+        return self.split_text(text)
+    async def load_and_split_docx(self, docx_path,metadata_string):
+        doc = Document(docx_path)
+        text = '\n'.join([paragraph.text for paragraph in doc.paragraphs if paragraph.text])
+        text += metadata_string  # Append metadata to the text
+        return self.split_text(text)
+    async def load_and_split_txt(self, txt_path,metadata_string):
+        with open(txt_path, 'r', encoding='utf-8') as file:
+            text = file.read()
+            text += metadata_string  # Append metadata to the text
+        return self.split_text(text)
+    def split_text(self, text):
+        text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+        return text_splitter.split_text(text)
+    def create_vector_store(self, texts):
+        return FAISS.from_texts(texts, self.embeddings)

static/img/AI.jpg ADDED Viewed

static/img/redmindlogo3.jpg ADDED Viewed