ManishThota commited on
Commit
6ee5519
1 Parent(s): be5f897

Update src/text_processor.py

Browse files
Files changed (1) hide show
  1. src/text_processor.py +36 -17
src/text_processor.py CHANGED
@@ -1,12 +1,23 @@
1
- # --- src/text_processor.py ---
2
- from huggingface_hub import InferenceClient
3
  from pydantic import BaseModel
4
- import os
5
 
6
- # Hugging Face Hub client setup
7
- client = InferenceClient(
 
 
 
8
  "microsoft/Phi-3-mini-4k-instruct",
9
- token = os.environ['HUGGINGFACE_API_KEY']
 
 
 
 
 
 
 
 
10
  )
11
 
12
  # Pydantic class for output validation
@@ -16,11 +27,12 @@ class VideoAnalysis(BaseModel):
16
  screen_interaction: int
17
  standing: int
18
 
 
19
  def process_description(description):
20
  # Construct a prompt for your LLM based on the video description
21
  prompt = f"""
22
  You are a helpful AI assistant. Analyze the following video description and answer the questions with 0 for True and 1 for False:
23
-
24
  Video Description: {description}
25
 
26
  Questions:
@@ -28,18 +40,25 @@ def process_description(description):
28
  - Are the subject's hands free?
29
  - Is there screen interaction by the subject?
30
  - Is the subject standing?
31
-
32
  Provide your answers in JSON format like this:
33
  {{"indoor": 0, "hands_free": 1, "screen_interaction": 0, "standing": 1}}
34
  """
35
 
36
- # Using the Hugging Face Hub InferenceClient for text generation
37
- response = client.chat_completion(
38
- messages=[{"role": "user", "content": prompt}],
39
- max_tokens=100, # Adjust as needed
40
- )
41
-
42
- # Extract the generated JSON text from the response
43
- json_text = response.choices[0].message.content
44
 
45
- return json_text
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
  from pydantic import BaseModel
4
+ import spaces
5
 
6
+ device = 'cuda'
7
+
8
+ # Load your LLM model and tokenizer
9
+ torch.random.manual_seed(0)
10
+ model = AutoModelForCausalLM.from_pretrained(
11
  "microsoft/Phi-3-mini-4k-instruct",
12
+ device_map=device,
13
+ torch_dtype="auto",
14
+ trust_remote_code=True,
15
+ )
16
+ tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
17
+ pipe = pipeline(
18
+ "text-generation",
19
+ model=model,
20
+ tokenizer=tokenizer,
21
  )
22
 
23
  # Pydantic class for output validation
 
27
  screen_interaction: int
28
  standing: int
29
 
30
+ @spaces.GPU(duration=100)
31
  def process_description(description):
32
  # Construct a prompt for your LLM based on the video description
33
  prompt = f"""
34
  You are a helpful AI assistant. Analyze the following video description and answer the questions with 0 for True and 1 for False:
35
+
36
  Video Description: {description}
37
 
38
  Questions:
 
40
  - Are the subject's hands free?
41
  - Is there screen interaction by the subject?
42
  - Is the subject standing?
43
+
44
  Provide your answers in JSON format like this:
45
  {{"indoor": 0, "hands_free": 1, "screen_interaction": 0, "standing": 1}}
46
  """
47
 
48
+ generation_args = {
49
+ "max_new_tokens": 100, # Adjust as needed
50
+ "return_full_text": False,
51
+ "temperature": 0.0,
52
+ "do_sample": False,
53
+ }
 
 
54
 
55
+ output = pipe(prompt, **generation_args)
56
+ json_text = output[0]['generated_text']
57
+
58
+ try:
59
+ # Attempt to parse and validate the JSON response
60
+ analysis_result = VideoAnalysis.model_validate_json(json_text)
61
+ return analysis_result.model_dump_json() # Return as valid JSON
62
+ except Exception as e:
63
+ print(f"Error processing LLM output: {e}")
64
+ return {"error": "Could not process the video description."}