Spaces:
Runtime error
Runtime error
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
from pydantic import BaseModel | |
import spaces | |
device = 'cuda' | |
# Load your LLM model and tokenizer | |
torch.random.manual_seed(0) | |
model = AutoModelForCausalLM.from_pretrained( | |
"microsoft/Phi-3-mini-4k-instruct", | |
device_map=device, | |
torch_dtype="auto", | |
trust_remote_code=True, | |
) | |
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct") | |
pipe = pipeline( | |
"text-generation", | |
model=model, | |
tokenizer=tokenizer, | |
) | |
# Pydantic class for output validation | |
class VideoAnalysis(BaseModel): | |
indoor: int | |
hands_free: int | |
screen_interaction: int | |
standing: int | |
def process_description(description): | |
# Construct a prompt for your LLM based on the video description | |
prompt = f""" | |
You are a helpful AI assistant. Analyze the following video description and answer the questions with 0 for True and 1 for False: | |
Video Description: {description} | |
Questions: | |
- Is the scene indoors? | |
- Are the subject's hands free? | |
- Is there screen interaction by the subject? | |
- Is the subject standing? | |
Provide your answers in JSON format like this: | |
{{"indoor": 0, "hands_free": 1, "screen_interaction": 0, "standing": 1}} | |
""" | |
generation_args = { | |
"max_new_tokens": 100, # Adjust as needed | |
"return_full_text": False, | |
"temperature": 0.0, | |
"do_sample": False, | |
} | |
output = pipe(prompt, **generation_args) | |
json_text = output[0]['generated_text'] | |
try: | |
# Attempt to parse and validate the JSON response | |
analysis_result = VideoAnalysis.model_validate_json(json_text) | |
return analysis_result.model_dump_json() # Return as valid JSON | |
except Exception as e: | |
print(f"Error processing LLM output: {e}") | |
return {"error": "Could not process the video description."} |