ManishThota commited on
Commit
e9b69dc
1 Parent(s): e13d6b8

Update src/text_processor.py

Browse files
Files changed (1) hide show
  1. src/text_processor.py +15 -27
src/text_processor.py CHANGED
@@ -1,20 +1,11 @@
1
- import torch
2
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
  from pydantic import BaseModel
4
- import spaces
5
 
6
- torch.random.manual_seed(0)
7
- model = AutoModelForCausalLM.from_pretrained(
8
  "microsoft/Phi-3-mini-4k-instruct",
9
- device_map="cuda",
10
- torch_dtype="auto",
11
- trust_remote_code=True,
12
- )
13
- tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
14
- pipe = pipeline(
15
- "text-generation",
16
- model=model,
17
- tokenizer=tokenizer,
18
  )
19
 
20
  # Pydantic class for output validation
@@ -24,13 +15,11 @@ class VideoAnalysis(BaseModel):
24
  screen_interaction: int
25
  standing: int
26
 
27
-
28
- @spaces.GPU(duration=100)
29
  def process_description(description):
30
  # Construct a prompt for your LLM based on the video description
31
  prompt = f"""
32
  You are a helpful AI assistant. Analyze the following video description and answer the questions with 0 for True and 1 for False:
33
-
34
  Video Description: {description}
35
 
36
  Questions:
@@ -38,21 +27,20 @@ def process_description(description):
38
  - Are the subject's hands free?
39
  - Is there screen interaction by the subject?
40
  - Is the subject standing?
41
-
42
  Provide your answers in JSON format like this:
43
  {{"indoor": 0, "hands_free": 1, "screen_interaction": 0, "standing": 1}}
44
  """
45
 
46
- generation_args = {
47
- "max_new_tokens": 100, # Adjust as needed
48
- "return_full_text": False,
49
- "temperature": 0.0,
50
- "do_sample": False,
51
- }
 
 
52
 
53
- output = pipe(prompt, **generation_args)
54
- json_text = output[0]['generated_text']
55
-
56
  try:
57
  # Attempt to parse and validate the JSON response
58
  analysis_result = VideoAnalysis.model_validate_json(json_text)
 
1
+ # --- src/text_processor.py ---
2
+ from huggingface_hub import InferenceClient
3
  from pydantic import BaseModel
 
4
 
5
+ # Hugging Face Hub client setup
6
+ client = InferenceClient(
7
  "microsoft/Phi-3-mini-4k-instruct",
8
+ token = api_key=os.environ['HUGGINGFACE_API_KEY']
 
 
 
 
 
 
 
 
9
  )
10
 
11
  # Pydantic class for output validation
 
15
  screen_interaction: int
16
  standing: int
17
 
 
 
18
  def process_description(description):
19
  # Construct a prompt for your LLM based on the video description
20
  prompt = f"""
21
  You are a helpful AI assistant. Analyze the following video description and answer the questions with 0 for True and 1 for False:
22
+
23
  Video Description: {description}
24
 
25
  Questions:
 
27
  - Are the subject's hands free?
28
  - Is there screen interaction by the subject?
29
  - Is the subject standing?
30
+
31
  Provide your answers in JSON format like this:
32
  {{"indoor": 0, "hands_free": 1, "screen_interaction": 0, "standing": 1}}
33
  """
34
 
35
+ # Using the Hugging Face Hub InferenceClient for text generation
36
+ response = client.chat_completion(
37
+ messages=[{"role": "user", "content": prompt}],
38
+ max_tokens=100, # Adjust as needed
39
+ )
40
+
41
+ # Extract the generated JSON text from the response
42
+ json_text = response.choices[0].message.content
43
 
 
 
 
44
  try:
45
  # Attempt to parse and validate the JSON response
46
  analysis_result = VideoAnalysis.model_validate_json(json_text)