barunsaha commited on
Commit
57daf6a
·
1 Parent(s): abd7b16

Move text utility functions to a separate module

Browse files
Files changed (2) hide show
  1. app.py +14 -72
  2. helpers/text_helper.py +58 -0
app.py CHANGED
@@ -15,7 +15,7 @@ from langchain_core.prompts import ChatPromptTemplate
15
  # from transformers import AutoTokenizer
16
 
17
  from global_config import GlobalConfig
18
- from helpers import llm_helper, pptx_helper
19
 
20
 
21
  @st.cache_data
@@ -159,7 +159,12 @@ def set_up_chat_ui():
159
  ):
160
 
161
  progress_bar_pptx = st.progress(0, 'Preparing to run...')
162
- if not _is_valid_prompt(prompt):
 
 
 
 
 
163
  return
164
 
165
  logger.info('User input: %s | #characters: %d', prompt, len(prompt))
@@ -228,7 +233,13 @@ def set_up_chat_ui():
228
  # There maybe trailing ``` at the end of the response -- remove them
229
  # To be careful: ``` may be part of the content as well when code is generated
230
  progress_bar_pptx.progress(50, 'Analyzing response...')
231
- response_cleaned = _clean_json(response)
 
 
 
 
 
 
232
 
233
  # Now create the PPT file
234
  progress_bar_pptx.progress(75, 'Creating the slide deck...give it a moment...')
@@ -285,24 +296,6 @@ def generate_slide_deck(json_str: str):
285
  logger.error('Caught a generic exception: %s', str(ex))
286
 
287
 
288
- def _is_valid_prompt(prompt: str) -> bool:
289
- """
290
- Verify whether user input satisfies the concerned constraints.
291
-
292
- :param prompt: The user input text.
293
- :return: True if all criteria are satisfied; False otherwise.
294
- """
295
-
296
- if len(prompt) < 5 or ' ' not in prompt:
297
- st.error(
298
- 'Not enough information provided!'
299
- ' Please be a little more descriptive and type a few words with a few characters :)'
300
- )
301
- return False
302
-
303
- return True
304
-
305
-
306
  def _is_it_refinement() -> bool:
307
  """
308
  Whether it is the initial prompt or a refinement.
@@ -353,57 +346,6 @@ def _display_messages_history(view_messages: st.expander):
353
  with view_messages:
354
  view_messages.json(st.session_state[CHAT_MESSAGES])
355
 
356
- def _clean_json(json_str: str) -> str:
357
- """
358
- Attempt to clean a JSON response string from the LLM by removing the trailing ```
359
- and any text beyond that.
360
- CAUTION: May not be always accurate.
361
-
362
- :param json_str: The input string in JSON format.
363
- :return: The "cleaned" JSON string.
364
- """
365
-
366
- # An example of response containing JSON and other text:
367
- # {
368
- # "title": "AI and the Future: A Transformative Journey",
369
- # "slides": [
370
- # ...
371
- # ]
372
- # } <<---- This is end of valid JSON content
373
- # ```
374
- #
375
- # ```vbnet
376
- # Please note that the JSON output is in valid format but the content of the "Role of GPUs in AI" slide is just an example and may not be factually accurate. For accurate information, you should consult relevant resources and update the content accordingly.
377
- # ```
378
- str_len = len(json_str)
379
- response_cleaned = json_str
380
-
381
- while True:
382
- idx = json_str.rfind('```') # -1 on failure
383
-
384
- if idx <= 0:
385
- break
386
-
387
- # In the ideal scenario, the character before the last ``` should be
388
- # a new line or a closing bracket }
389
- prev_char = json_str[idx - 1]
390
- print(f'{idx=}, {prev_char=}')
391
-
392
- if prev_char == '}':
393
- response_cleaned = json_str[:idx]
394
- elif prev_char == '\n' and json_str[idx - 2] == '}':
395
- response_cleaned = json_str[:idx]
396
-
397
- json_str = json_str[:idx]
398
-
399
- logger.info(
400
- 'Cleaning JSON response:: original length: %d | cleaned length: %d',
401
- str_len, len(response_cleaned)
402
- )
403
- logger.debug('Cleaned JSON: %s', response_cleaned)
404
-
405
- return response_cleaned
406
-
407
 
408
  def _display_download_button(file_path: pathlib.Path):
409
  """
 
15
  # from transformers import AutoTokenizer
16
 
17
  from global_config import GlobalConfig
18
+ from helpers import llm_helper, pptx_helper, text_helper
19
 
20
 
21
  @st.cache_data
 
159
  ):
160
 
161
  progress_bar_pptx = st.progress(0, 'Preparing to run...')
162
+ if not text_helper.is_valid_prompt(prompt):
163
+ st.error(
164
+ 'Not enough information provided!'
165
+ ' Please be a little more descriptive and type a few words'
166
+ ' with a few characters :)'
167
+ )
168
  return
169
 
170
  logger.info('User input: %s | #characters: %d', prompt, len(prompt))
 
233
  # There maybe trailing ``` at the end of the response -- remove them
234
  # To be careful: ``` may be part of the content as well when code is generated
235
  progress_bar_pptx.progress(50, 'Analyzing response...')
236
+ response_cleaned = text_helper.get_clean_json(response)
237
+
238
+ logger.info(
239
+ 'Cleaned JSON response:: original length: %d | cleaned length: %d',
240
+ len(response), len(response_cleaned)
241
+ )
242
+ logger.debug('Cleaned JSON: %s', response_cleaned)
243
 
244
  # Now create the PPT file
245
  progress_bar_pptx.progress(75, 'Creating the slide deck...give it a moment...')
 
296
  logger.error('Caught a generic exception: %s', str(ex))
297
 
298
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
299
  def _is_it_refinement() -> bool:
300
  """
301
  Whether it is the initial prompt or a refinement.
 
346
  with view_messages:
347
  view_messages.json(st.session_state[CHAT_MESSAGES])
348
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
349
 
350
  def _display_download_button(file_path: pathlib.Path):
351
  """
helpers/text_helper.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def is_valid_prompt(prompt: str) -> bool:
2
+ """
3
+ Verify whether user input satisfies the concerned constraints.
4
+
5
+ :param prompt: The user input text.
6
+ :return: True if all criteria are satisfied; False otherwise.
7
+ """
8
+
9
+ if len(prompt) < 7 or ' ' not in prompt:
10
+ return False
11
+
12
+ return True
13
+
14
+
15
+ def get_clean_json(json_str: str) -> str:
16
+ """
17
+ Attempt to clean a JSON response string from the LLM by removing the trailing ```
18
+ and any text beyond that.
19
+ CAUTION: May not be always accurate.
20
+
21
+ :param json_str: The input string in JSON format.
22
+ :return: The "cleaned" JSON string.
23
+ """
24
+
25
+ # An example of response containing JSON and other text:
26
+ # {
27
+ # "title": "AI and the Future: A Transformative Journey",
28
+ # "slides": [
29
+ # ...
30
+ # ]
31
+ # } <<---- This is end of valid JSON content
32
+ # ```
33
+ #
34
+ # ```vbnet
35
+ # Please note that the JSON output is in valid format but the content of the "Role of GPUs in AI" slide is just an example and may not be factually accurate. For accurate information, you should consult relevant resources and update the content accordingly.
36
+ # ```
37
+ str_len = len(json_str)
38
+ response_cleaned = json_str
39
+
40
+ while True:
41
+ idx = json_str.rfind('```') # -1 on failure
42
+
43
+ if idx <= 0:
44
+ break
45
+
46
+ # In the ideal scenario, the character before the last ``` should be
47
+ # a new line or a closing bracket }
48
+ prev_char = json_str[idx - 1]
49
+ print(f'{idx=}, {prev_char=}')
50
+
51
+ if prev_char == '}':
52
+ response_cleaned = json_str[:idx]
53
+ elif prev_char == '\n' and json_str[idx - 2] == '}':
54
+ response_cleaned = json_str[:idx]
55
+
56
+ json_str = json_str[:idx]
57
+
58
+ return response_cleaned