Spaces:

salgadev
/

docverifyrag

Sleeping

App Files Files Community

Carlos Salgado commited on Apr 17

Commit

5876325

•

1 Parent(s): 6a48da2

add api prompt template

Browse files

Files changed (3) hide show

backend/generate_metadata.py +15 -11
backend/requirements.txt +1 -0
backend/schema.py +2 -2

backend/generate_metadata.py CHANGED Viewed

@@ -24,6 +24,15 @@ vectorstore = Vectara(vectara_customer_id=vectara_customer_id,
                       vectara_corpus_id=vectara_corpus_id,
                       vectara_api_key=vectara_api_key)
 def ingest(file_path):
     extension = file_path.split('.')[-1]
@@ -51,22 +60,18 @@ def ingest(file_path):
         "",
     ])
     docs = text_splitter.split_documents(documents)
-    #print(docs)
     return docs
-    # vectara = Vectara.from_documents(docs, embedding=FakeEmbeddings(size=768))
-    # retriever = vectara.as_retriever()
-    # return retriever
-def extract_metadata(docs):
     # plain text
     context = "".join(
         [doc.page_content.replace('\n\n','').replace('..','') for doc in docs])
     # Create client
     client = openai.OpenAI(
         base_url="https://api.together.xyz/v1",
@@ -75,16 +80,15 @@ def extract_metadata(docs):
     # Call the LLM with the JSON schema
     chat_completion = client.chat.completions.create(
-        model="mistralai/Mixtral-8x7B-Instruct-v0.1",
-        response_format={"type": "json_object", "schema": Metadata.model_json_schema()},
         messages=[
             {
                 "role": "system",
-                "content": f"You are a helpful assistant that understands BIM documents and engineering disciplines. Your answer should be in JSON format and only include the title, a brief one-sentence summary, and the discipline the document belongs to, distinguishing between {[d.value for d in BimDiscipline]} based on the given document."
             },
             {
                 "role": "user",
-                "content": f"Analyze the provided document, which could be in either German or English. Extract the title, summarize it briefly in one sentence, and infer the discipline. Document:\n{context}"
             }
         ]
     )

                       vectara_corpus_id=vectara_corpus_id,
                       vectara_api_key=vectara_api_key)
+prompt_template = """
+BimDiscipline = ['plumbing', 'network', 'heating', 'electrical', 'ventilation', 'architecture']
+You are a helpful assistant that understands BIM documents and engineering disciplines. Your answer should be in JSON format and only include the title, a brief one-sentence summary, and the discipline the document belongs to, distinguishing between {[d.value for d in BimDiscipline]} based on the given document."
+Analyze the provided document, which could be in either German or English. Extract the title, summarize it briefly in one sentence, and infer the discipline. Document:
+context="
+"""
 def ingest(file_path):
     extension = file_path.split('.')[-1]
         "",
     ])
     docs = text_splitter.split_documents(documents)
     return docs
+def extract_metadata(docs):
     # plain text
     context = "".join(
         [doc.page_content.replace('\n\n','').replace('..','') for doc in docs])
+    prompt = f'{prompt_template}{context}"'
     # Create client
     client = openai.OpenAI(
         base_url="https://api.together.xyz/v1",
     # Call the LLM with the JSON schema
     chat_completion = client.chat.completions.create(
+        model="mistralai/Mixtral-8x7B-Instruct-v0.1",
         messages=[
             {
                 "role": "system",
+                "content": f"You are a helpful assistant that responsds in JSON format"
             },
             {
                 "role": "user",
+                "content": prompt
             }
         ]
     )

backend/requirements.txt CHANGED Viewed

@@ -8,3 +8,4 @@ langchain
 openai
 chromadb
 tiktoken

 openai
 chromadb
 tiktoken
+python-poppler

backend/schema.py CHANGED Viewed

@@ -5,11 +5,11 @@ from pydantic import BaseModel, Field, conlist
 from enum import Enum
 class BimDiscipline(str, Enum):
-    plumbing = 'S - Sanitär'
     network = 'D - Datennetz'
     heating = 'H - Heizung'
     electrical = 'E - Elektro'
-    ventilation = 'L - Lüftung'
     architecture = 'A - Architektur'
 # Define the schema for the output.

 from enum import Enum
 class BimDiscipline(str, Enum):
+    plumbing = 'S - Sanitaer'
     network = 'D - Datennetz'
     heating = 'H - Heizung'
     electrical = 'E - Elektro'
+    ventilation = 'L - Lueftung'
     architecture = 'A - Architektur'
 # Define the schema for the output.