ShravanHN commited on
Commit
86e4e69
·
1 Parent(s): d974af2

Updated with llama 3.1 model , added logs for the debuging

Browse files
Files changed (2) hide show
  1. app.py +72 -5
  2. requirements.txt +1 -1
app.py CHANGED
@@ -42,7 +42,7 @@ h1 {
42
  """
43
 
44
  # Load the tokenizer and model with quantization
45
- model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
46
  bnb_config = BitsAndBytesConfig(
47
  load_in_4bit=True,
48
  bnb_4bit_use_double_quant=True,
@@ -82,13 +82,76 @@ terminators = [
82
  tokenizer.convert_tokens_to_ids("<|eot_id|>")
83
  ]
84
 
 
 
 
 
 
 
 
85
  SYS_PROMPT = """
86
- Extract all relevant keywords and add quantity from the following text and format the result in nested JSON, ignoring personal details and focusing only on the scope of work as shown in the example:
87
- Good JSON example: {'lobby': {'frcm': {'replace': {'carpet': 1, 'carpet_pad': 1, 'base': 1, 'window_treatments': 1, 'artwork_and_decorative_accessories': 1, 'portable_lighting': 1, 'upholstered_furniture_and_decorative_pillows': 1, 'millwork': 1} } } }
88
- Bad JSON example: {'lobby': { 'frcm': { 'replace': [ 'carpet', 'carpet_pad', 'base', 'window_treatments', 'artwork_and_decorative_accessories', 'portable_lighting', 'upholstered_furniture_and_decorative_pillows', 'millwork'] } } }
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  Make sure to fetch details from the provided text and ignore unnecessary information. The response should be in JSON format only, without any additional comments.
90
- """
91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  def chunk_text(text, chunk_size=5000):
93
  """
94
  Splits the input text into chunks of specified size.
@@ -102,6 +165,7 @@ def chunk_text(text, chunk_size=5000):
102
  """
103
  words = text.split()
104
  chunks = [' '.join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
 
105
  return chunks
106
 
107
  def combine_responses(responses):
@@ -175,9 +239,12 @@ def chat_llama3_8b(message: str, history: list, temperature: float, max_new_toke
175
 
176
  chunks = chunk_text(message)
177
  responses = []
 
178
  for chunk in chunks:
 
179
  response = generate_response_for_chunk(chunk, history, temperature, max_new_tokens)
180
  responses.append(response)
 
181
  final_output = combine_responses(responses)
182
 
183
  end_time = time.time()
 
42
  """
43
 
44
  # Load the tokenizer and model with quantization
45
+ model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
46
  bnb_config = BitsAndBytesConfig(
47
  load_in_4bit=True,
48
  bnb_4bit_use_double_quant=True,
 
82
  tokenizer.convert_tokens_to_ids("<|eot_id|>")
83
  ]
84
 
85
+ # SYS_PROMPT = """
86
+ # Extract all relevant keywords and add quantity from the following text and format the result in nested JSON, ignoring personal details and focusing only on the scope of work as shown in the example:
87
+ # Good JSON example: {'lobby': {'frcm': {'replace': {'carpet': 1, 'carpet_pad': 1, 'base': 1, 'window_treatments': 1, 'artwork_and_decorative_accessories': 1, 'portable_lighting': 1, 'upholstered_furniture_and_decorative_pillows': 1, 'millwork': 1} } } }
88
+ # Bad JSON example: {'lobby': { 'frcm': { 'replace': [ 'carpet', 'carpet_pad', 'base', 'window_treatments', 'artwork_and_decorative_accessories', 'portable_lighting', 'upholstered_furniture_and_decorative_pillows', 'millwork'] } } }
89
+ # Make sure to fetch details from the provided text and ignore unnecessary information. The response should be in JSON format only, without any additional comments.
90
+ # """
91
+
92
  SYS_PROMPT = """
93
+ Extract all relevant keywords and add quantities from the following text and format the result in nested JSON, ignoring personal details and focusing only on the area and furniture items as shown in the example. Each item should have a count, which will be set to 1 for simplicity. The response should be in JSON format only, without any additional comments.
94
+
95
+ Good JSON example:{
96
+ "Lobby Area/Entrance": {
97
+ "Vinyl wall covering": 1,
98
+ "Decorative hardwired lighting": 1
99
+ },
100
+ "Lobby": {
101
+ "Carpet, carpet pad, and base": 1,
102
+ "Window treatments": 1,
103
+ "Artwork and decorative accessories": 1,
104
+ "Portable lighting": 1,
105
+ "Upholstered furniture and decorative pillows": 1,
106
+ "Millwork": 1
107
+ }
108
+ }
109
  Make sure to fetch details from the provided text and ignore unnecessary information. The response should be in JSON format only, without any additional comments.
 
110
 
111
+ Task:
112
+ Convert the provided extracted text into the JSON format described above.
113
+
114
+ Provided Text:
115
+
116
+ PROPERTY IMPROVEMENT PLAN
117
+ PREPARED FOR:
118
+ Springfield, IL
119
+ To be relicensed as Hilton Garden Inn
120
+ ...
121
+ Patios/The Terrace - Install patio decorative lighting. Install patio furniture. (lounge chairs, chaise, dining tables/chairs)
122
+ ...
123
+ Lobby Area - Replace carpet, carpet pad, and base. Replace window treatments. Replace artwork and decorative accessories. Replace portable lighting. (floor lamps, table lamps) Replace upholstered furniture and decorative pillows. Replace millwork. Replace the television(s).
124
+ ...
125
+ Registration Area - Replace vinyl wall covering. Replace hard surface floor covering. Replace artwork. Install new signature graphics on the back wall.
126
+ ...
127
+
128
+ Expected Output (JSON format):
129
+ {
130
+ "Patios/The Terrace": {
131
+ "Patio decorative lighting": 1,
132
+ "Lounge chairs": 1,
133
+ "Chaise": 1,
134
+ "Dining tables": 1,
135
+ "Dining chairs": 1,
136
+ "Patio furniture": 1
137
+ },
138
+ "Lobby Area": {
139
+ "Carpet, carpet pad, and base": 1,
140
+ "Window treatments": 1,
141
+ "Artwork and decorative accessories": 1,
142
+ "Portable lighting (floor lamps, table lamps)": 1,
143
+ "Upholstered furniture and decorative pillows": 1,
144
+ "Millwork": 1,
145
+ "Television(s)": 1
146
+ },
147
+ "Registration Area": {
148
+ "Vinyl wall covering": 1,
149
+ "Hard surface floor covering": 1,
150
+ "Artwork (new signature graphics on the back wall)": 1
151
+ }
152
+ }
153
+
154
+ """
155
  def chunk_text(text, chunk_size=5000):
156
  """
157
  Splits the input text into chunks of specified size.
 
165
  """
166
  words = text.split()
167
  chunks = [' '.join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
168
+ logger.info(f"Total chunks created: {len(chunks)}")
169
  return chunks
170
 
171
  def combine_responses(responses):
 
239
 
240
  chunks = chunk_text(message)
241
  responses = []
242
+ count=0
243
  for chunk in chunks:
244
+ logger.info(f"Processing chunk {count+1}/{len(chunks)}")
245
  response = generate_response_for_chunk(chunk, history, temperature, max_new_tokens)
246
  responses.append(response)
247
+ count+=1
248
  final_output = combine_responses(responses)
249
 
250
  end_time = time.time()
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
  accelerate
2
- transformers
3
  SentencePiece
4
  bitsandbytes
 
1
  accelerate
2
+ transformers==4.43.1
3
  SentencePiece
4
  bitsandbytes