Carlos Salgado commited on
Commit
c62f8e2
·
1 Parent(s): 394b995

add api prompt template

Browse files
backend/generate_metadata.py CHANGED
@@ -23,6 +23,15 @@ vectorstore = Vectara(vectara_customer_id=vectara_customer_id,
23
  vectara_corpus_id=vectara_corpus_id,
24
  vectara_api_key=vectara_api_key)
25
 
 
 
 
 
 
 
 
 
 
26
 
27
  def ingest(file_path):
28
  extension = file_path.split('.')[-1]
@@ -50,22 +59,18 @@ def ingest(file_path):
50
  "",
51
  ])
52
  docs = text_splitter.split_documents(documents)
53
- #print(docs)
54
 
55
  return docs
56
 
57
 
58
- # vectara = Vectara.from_documents(docs, embedding=FakeEmbeddings(size=768))
59
- # retriever = vectara.as_retriever()
60
 
61
- # return retriever
62
-
63
-
64
- def extract_metadata(docs):
65
  # plain text
66
  context = "".join(
67
  [doc.page_content.replace('\n\n','').replace('..','') for doc in docs])
68
 
 
 
69
  # Create client
70
  client = openai.OpenAI(
71
  base_url="https://api.together.xyz/v1",
@@ -74,16 +79,15 @@ def extract_metadata(docs):
74
 
75
  # Call the LLM with the JSON schema
76
  chat_completion = client.chat.completions.create(
77
- model="mistralai/Mixtral-8x7B-Instruct-v0.1",
78
- response_format={"type": "json_object", "schema": Metadata.model_json_schema()},
79
  messages=[
80
  {
81
  "role": "system",
82
- "content": f"You are a helpful assistant that understands BIM documents and engineering disciplines. Your answer should be in JSON format and only include the title, a brief one-sentence summary, and the discipline the document belongs to, distinguishing between {[d.value for d in BimDiscipline]} based on the given document."
83
  },
84
  {
85
  "role": "user",
86
- "content": f"Analyze the provided document, which could be in either German or English. Extract the title, summarize it briefly in one sentence, and infer the discipline. Document:\n{context}"
87
  }
88
  ]
89
  )
 
23
  vectara_corpus_id=vectara_corpus_id,
24
  vectara_api_key=vectara_api_key)
25
 
26
+ prompt_template = """
27
+ BimDiscipline = ['plumbing', 'network', 'heating', 'electrical', 'ventilation', 'architecture']
28
+
29
+ You are a helpful assistant that understands BIM documents and engineering disciplines. Your answer should be in JSON format and only include the title, a brief one-sentence summary, and the discipline the document belongs to, distinguishing between {[d.value for d in BimDiscipline]} based on the given document."
30
+
31
+ Analyze the provided document, which could be in either German or English. Extract the title, summarize it briefly in one sentence, and infer the discipline. Document:
32
+ context="
33
+ """
34
+
35
 
36
  def ingest(file_path):
37
  extension = file_path.split('.')[-1]
 
59
  "",
60
  ])
61
  docs = text_splitter.split_documents(documents)
 
62
 
63
  return docs
64
 
65
 
 
 
66
 
67
+ def extract_metadata(docs):
 
 
 
68
  # plain text
69
  context = "".join(
70
  [doc.page_content.replace('\n\n','').replace('..','') for doc in docs])
71
 
72
+ prompt = f'{prompt_template}{context}"'
73
+
74
  # Create client
75
  client = openai.OpenAI(
76
  base_url="https://api.together.xyz/v1",
 
79
 
80
  # Call the LLM with the JSON schema
81
  chat_completion = client.chat.completions.create(
82
+ model="mistralai/Mixtral-8x7B-Instruct-v0.1",
 
83
  messages=[
84
  {
85
  "role": "system",
86
+ "content": f"You are a helpful assistant that responsds in JSON format"
87
  },
88
  {
89
  "role": "user",
90
+ "content": prompt
91
  }
92
  ]
93
  )
backend/requirements.txt CHANGED
@@ -8,3 +8,4 @@ langchain
8
  openai
9
  chromadb
10
  tiktoken
 
 
8
  openai
9
  chromadb
10
  tiktoken
11
+ python-poppler
backend/schema.py CHANGED
@@ -5,11 +5,11 @@ from pydantic import BaseModel, Field, conlist
5
  from enum import Enum
6
 
7
  class BimDiscipline(str, Enum):
8
- plumbing = 'S - Sanitär'
9
  network = 'D - Datennetz'
10
  heating = 'H - Heizung'
11
  electrical = 'E - Elektro'
12
- ventilation = 'L - Lüftung'
13
  architecture = 'A - Architektur'
14
 
15
  # Define the schema for the output.
 
5
  from enum import Enum
6
 
7
  class BimDiscipline(str, Enum):
8
+ plumbing = 'S - Sanitaer'
9
  network = 'D - Datennetz'
10
  heating = 'H - Heizung'
11
  electrical = 'E - Elektro'
12
+ ventilation = 'L - Lueftung'
13
  architecture = 'A - Architektur'
14
 
15
  # Define the schema for the output.