Madhuri123 commited on
Commit
051047a
·
verified ·
1 Parent(s): e561674

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -27
app.py CHANGED
@@ -1,25 +1,31 @@
1
  import streamlit as st
2
- from transformers import pipeline,AutoFeatureExtractor
3
  from PIL import Image
4
  import torch
5
 
6
  # Load Hugging Face token
7
  HF_TOKEN = st.secrets["Hf_token"]
8
 
9
- # Load the model and pipeline
10
  model_id = "meta-llama/Llama-3.2-11B-Vision"
11
- @st.cache_resource
 
 
12
  def initialize_pipeline():
13
- return pipeline(
14
- "image-text-to-text",
 
15
  model=model_id,
16
- model_kwargs={"torch_dtype": torch.bfloat16, "use_auth_token": HF_TOKEN}
 
17
  )
 
18
 
19
- # Preprocess image function
20
- def preprocess_image(image, size=(224, 224)):
21
- """Converts the image to RGB and resizes it to the required dimensions."""
22
- return image.convert("RGB").resize(size)
 
23
 
24
  # Streamlit UI
25
  st.title("Image and Text to Text Generation")
@@ -31,29 +37,21 @@ input_text = st.text_input("Enter your text input (optional):")
31
  if st.button("Generate"):
32
  if uploaded_file:
33
  try:
34
- # Preprocess image
35
- image = Image.open(uploaded_file)
36
- preprocessed_image = preprocess_image(image)
37
-
38
- # Initialize pipeline
39
- model_pipeline = initialize_pipeline()
40
-
41
- # Create inputs for the pipeline
42
- inputs = {"images": [preprocessed_image], "text": input_text}
43
 
44
- # Run the model and get the response
45
- response = model_pipeline(**inputs)
46
  st.write("Generated Response:")
47
  st.write(response)
48
- except ValueError as ve:
49
- if str(ve) == "The number of image token (0) should be the same as in the number of provided images (1)":
50
- st.error("Ensure your image is correctly preprocessed and passed to the model.")
51
- else:
52
- st.error(f"Error: {ve}")
53
  except Exception as e:
54
  st.error(f"Error: {e}")
55
  else:
56
- st.error("Please upload an image to proceed")
 
 
57
 
58
 
59
 
 
1
  import streamlit as st
2
+ from transformers import pipeline, AutoFeatureExtractor
3
  from PIL import Image
4
  import torch
5
 
6
  # Load Hugging Face token
7
  HF_TOKEN = st.secrets["Hf_token"]
8
 
9
+ # Model ID
10
  model_id = "meta-llama/Llama-3.2-11B-Vision"
11
+
12
+ # Initialize feature extractor and pipeline
13
+ @st.experimental_singleton
14
  def initialize_pipeline():
15
+ feature_extractor = AutoFeatureExtractor.from_pretrained(model_id, use_auth_token=HF_TOKEN)
16
+ model_pipeline = pipeline(
17
+ "image-to-text",
18
  model=model_id,
19
+ feature_extractor=feature_extractor,
20
+ model_kwargs={"torch_dtype": torch.float32, "use_auth_token": HF_TOKEN},
21
  )
22
+ return feature_extractor, model_pipeline
23
 
24
+ # Preprocess function using feature extractor
25
+ def preprocess_image(image, feature_extractor):
26
+ """Preprocess the image for the model."""
27
+ inputs = feature_extractor(images=image, return_tensors="pt")
28
+ return inputs.pixel_values
29
 
30
  # Streamlit UI
31
  st.title("Image and Text to Text Generation")
 
37
  if st.button("Generate"):
38
  if uploaded_file:
39
  try:
40
+ # Load and preprocess image
41
+ image = Image.open(uploaded_file).convert("RGB")
42
+ feature_extractor, model_pipeline = initialize_pipeline()
43
+ preprocessed_image = preprocess_image(image, feature_extractor)
 
 
 
 
 
44
 
45
+ # Run the pipeline
46
+ response = model_pipeline(images=preprocessed_image, text=input_text)
47
  st.write("Generated Response:")
48
  st.write(response)
 
 
 
 
 
49
  except Exception as e:
50
  st.error(f"Error: {e}")
51
  else:
52
+ st.error("Please upload an image to proceed.")
53
+
54
+
55
 
56
 
57