Madhuri123 commited on
Commit
07391f2
1 Parent(s): 5c15212

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -32
app.py CHANGED
@@ -1,51 +1,69 @@
1
  import streamlit as st
2
- from transformers import pipeline
3
  from PIL import Image
4
  import torch
5
 
6
  # Load Hugging Face token
7
  HF_TOKEN = st.secrets["Hf_token"]
8
 
9
-
10
  # Load the model and pipeline
11
  model_id = "meta-llama/Llama-3.2-11B-Vision"
12
 
13
- # Initialize pipeline for image-text-to-text
14
- pipeline = pipeline(
15
- "image-text-to-text",
16
- model=model_id,
17
- model_kwargs={"torch_dtype": torch.bfloat16, "use_auth_token": HF_TOKEN}
18
- )
19
- def preprocess_image(image):
20
- return image.convert("RGB")
21
-
22
- st.title("Image-Text-to-Text Inference")
23
- st.write(f"**Using model:** {model_id}")
 
 
 
24
 
25
- # Text Input
26
- input_text = st.text_input("Enter your prompt (optional):")
 
 
27
 
28
- # Image Input
29
- uploaded_file = st.file_uploader("Upload an image:", type=["jpg", "png", "jpeg"])
 
 
 
 
30
 
31
  if st.button("Generate"):
32
  if uploaded_file:
33
- # Preprocess the uploaded image
34
- image = preprocess_image(Image.open(uploaded_file))
35
-
36
- # Prepare inputs
37
- inputs = {"images": [image]} # Wrap the image in a list
38
- if input_text:
39
- inputs["text"] = input_text # Include text if provided
40
-
41
- # Generate response
42
- response = pipeline(**inputs)
43
-
44
- # Display results
45
- st.write("Generated Response:")
46
- st.write(response) # Assuming the pipeline returns text directly
 
 
 
 
 
 
 
 
47
  else:
48
- st.error("Please upload an image.")
 
 
49
 
50
 
51
 
 
1
  import streamlit as st
2
+ from transformers import pipeline,AutoFeatureExtractor
3
  from PIL import Image
4
  import torch
5
 
6
  # Load Hugging Face token
7
  HF_TOKEN = st.secrets["Hf_token"]
8
 
 
9
  # Load the model and pipeline
10
  model_id = "meta-llama/Llama-3.2-11B-Vision"
11
 
12
+ # Secret Token
13
+ HF_TOKEN = st.secrets["hf_token"]
14
+
15
+ # Model and Task Configuration
16
+ MODEL_ID = "meta-llama/Llama-3.2-11B-Vision" # Replace with the correct model ID
17
+
18
+ # Initialize the pipeline
19
+ @st.cache_resource
20
+ def initialize_pipeline():
21
+ return pipeline(
22
+ "image-text-to-text",
23
+ model=MODEL_ID,
24
+ model_kwargs={"torch_dtype": torch.bfloat16, "use_auth_token": HF_TOKEN}
25
+ )
26
 
27
+ # Preprocess image function
28
+ def preprocess_image(image, size=(224, 224)):
29
+ """Converts the image to RGB and resizes it to the required dimensions."""
30
+ return image.convert("RGB").resize(size)
31
 
32
+ # Streamlit UI
33
+ st.title("Image and Text to Text Generation")
34
+ st.write(f"**Using model:** {MODEL_ID}")
35
+
36
+ uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
37
+ input_text = st.text_input("Enter your text input (optional):")
38
 
39
  if st.button("Generate"):
40
  if uploaded_file:
41
+ try:
42
+ # Preprocess image
43
+ image = Image.open(uploaded_file)
44
+ preprocessed_image = preprocess_image(image)
45
+
46
+ # Initialize pipeline
47
+ model_pipeline = initialize_pipeline()
48
+
49
+ # Create inputs for the pipeline
50
+ inputs = {"images": [preprocessed_image], "text": input_text}
51
+
52
+ # Run the model and get the response
53
+ response = model_pipeline(**inputs)
54
+ st.write("Generated Response:")
55
+ st.write(response)
56
+ except ValueError as ve:
57
+ if str(ve) == "The number of image token (0) should be the same as in the number of provided images (1)":
58
+ st.error("Make sure your image is correctly preprocessed and passed to the model.")
59
+ else:
60
+ st.error(f"Error: {ve}")
61
+ except Exception as e:
62
+ st.error(f"Error: {e}")
63
  else:
64
+ st.error("Please upload an image to proceed.")
65
+
66
+
67
 
68
 
69