ManishThota commited on
Commit
57cdbdf
1 Parent(s): 422b42e

Update src/text_processor.py

Browse files
Files changed (1) hide show
  1. src/text_processor.py +10 -47
src/text_processor.py CHANGED
@@ -1,9 +1,7 @@
1
 
2
  import torch
3
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
4
- import json
5
  import warnings
6
- from pydantic import BaseModel
7
  from typing import Dict
8
  import spaces
9
 
@@ -39,62 +37,27 @@ def load_model_pipeline(model_path: str):
39
  # Initialize the pipeline and keep it in memory
40
  pipe = load_model_pipeline(model_path)
41
 
42
- # Generate logic from LLM output
43
  @spaces.GPU(duration=50)
44
  def generate_logic(llm_output: str) -> str:
45
  prompt = f"""
46
- Provide the response in json string for the below keys and context based on the description: '{llm_output}'.
47
-
48
- Screen.interaction_yes: This field indicates whether there was an interaction of the person with a screen during the activity. A value of 1 means there was screen interaction (Yes), and a value of 0 means there was no screen interaction (No).
49
- Hands.free: This field indicates whether the person's hands were free during the activity. A value of 1 means the person was not holding anything (Yes), indicating free hands. A value of 0 means the person was holding something (No), indicating the hands were not free.
50
- Indoors: This field indicates whether the activity took place indoors. A value of 1 means the activity occurred inside a building or enclosed space (Yes), and a value of 0 means the activity took place outside (No).
51
- Standing: This field indicates whether the person was standing during the activity. A value of 1 means the person was standing (Yes), and a value of 0 means the person was not standing (No).
52
  """
53
 
54
  messages = [
55
- {"role": "system", "content": "Please answer questions just based on this information: " + llm_output},
56
  {"role": "user", "content": prompt},
57
  ]
58
 
59
  response = pipe(messages, **generation_args)
60
  generated_text = response[0]['generated_text']
61
 
62
- # Extract JSON from the generated text
63
- start_index = generated_text.find('{')
64
- end_index = generated_text.rfind('}') + 1
65
- json_str = generated_text[start_index:end_index]
66
-
67
- # Log the generated JSON string for debugging
68
- print(f"Generated JSON: {json_str}")
69
 
70
- if not json_str.strip():
71
- raise ValueError("Generated logic is empty or invalid JSON")
72
-
73
- return json_str
74
 
75
- # Pydantic model for structured output
76
- class VideoAnalysis(BaseModel):
77
- screen_interaction_yes: int
78
- hands_free: int
79
- indoors: int
80
- standing: int
81
-
82
- @classmethod
83
- def from_llm_output(cls, generated_logic: str) -> 'VideoAnalysis':
84
- try:
85
- logic_dict = json.loads(generated_logic)
86
- except json.JSONDecodeError as e:
87
- raise ValueError(f"Error decoding JSON: {e}") from e
88
-
89
- return cls(
90
- screen_interaction_yes=logic_dict.get("Screen.interaction_yes", 0),
91
- hands_free=logic_dict.get("Hands.free", 0),
92
- indoors=logic_dict.get("Indoors", 0),
93
- standing=logic_dict.get("Standing", 0)
94
- )
95
-
96
- # Main function to process LLM output
97
- def process_description(description: str) -> Dict:
98
- generated_logic = generate_logic(description)
99
- structured_output = VideoAnalysis.from_llm_output(generated_logic)
100
- return structured_output.dict()
 
1
 
2
  import torch
3
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 
4
  import warnings
 
5
  from typing import Dict
6
  import spaces
7
 
 
37
  # Initialize the pipeline and keep it in memory
38
  pipe = load_model_pipeline(model_path)
39
 
40
+ # Generate output from LLM
41
  @spaces.GPU(duration=50)
42
  def generate_logic(llm_output: str) -> str:
43
  prompt = f"""
44
+ Provide a detailed response based on the description: '{llm_output}'.
 
 
 
 
 
45
  """
46
 
47
  messages = [
48
+ {"role": "system", "content": "Please provide a detailed response."},
49
  {"role": "user", "content": prompt},
50
  ]
51
 
52
  response = pipe(messages, **generation_args)
53
  generated_text = response[0]['generated_text']
54
 
55
+ # Log the generated text
56
+ print(f"Generated Text: {generated_text}")
 
 
 
 
 
57
 
58
+ return generated_text
 
 
 
59
 
60
+ # Main function to process LLM output and return raw text
61
+ def process_description(description: str) -> str:
62
+ generated_output = generate_logic(description)
63
+ return generated_output