Madhuri123 commited on
Commit
2a17089
·
verified ·
1 Parent(s): 051047a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -49
app.py CHANGED
@@ -1,55 +1,54 @@
1
  import streamlit as st
2
- from transformers import pipeline, AutoFeatureExtractor
3
- from PIL import Image
4
  import torch
5
-
6
- # Load Hugging Face token
7
- HF_TOKEN = st.secrets["Hf_token"]
8
-
9
- # Model ID
10
- model_id = "meta-llama/Llama-3.2-11B-Vision"
11
-
12
- # Initialize feature extractor and pipeline
13
- @st.experimental_singleton
14
- def initialize_pipeline():
15
- feature_extractor = AutoFeatureExtractor.from_pretrained(model_id, use_auth_token=HF_TOKEN)
16
- model_pipeline = pipeline(
17
- "image-to-text",
18
- model=model_id,
19
- feature_extractor=feature_extractor,
20
- model_kwargs={"torch_dtype": torch.float32, "use_auth_token": HF_TOKEN},
21
  )
22
- return feature_extractor, model_pipeline
23
-
24
- # Preprocess function using feature extractor
25
- def preprocess_image(image, feature_extractor):
26
- """Preprocess the image for the model."""
27
- inputs = feature_extractor(images=image, return_tensors="pt")
28
- return inputs.pixel_values
29
-
30
- # Streamlit UI
31
- st.title("Image and Text to Text Generation")
32
- st.write(f"**Using model:** {model_id}")
33
-
34
- uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
35
- input_text = st.text_input("Enter your text input (optional):")
36
-
37
- if st.button("Generate"):
38
- if uploaded_file:
39
- try:
40
- # Load and preprocess image
41
- image = Image.open(uploaded_file).convert("RGB")
42
- feature_extractor, model_pipeline = initialize_pipeline()
43
- preprocessed_image = preprocess_image(image, feature_extractor)
44
-
45
- # Run the pipeline
46
- response = model_pipeline(images=preprocessed_image, text=input_text)
47
- st.write("Generated Response:")
48
- st.write(response)
49
- except Exception as e:
50
- st.error(f"Error: {e}")
51
- else:
52
- st.error("Please upload an image to proceed.")
 
 
 
 
 
 
 
53
 
54
 
55
 
 
1
  import streamlit as st
2
+ import requests
 
3
  import torch
4
+ from PIL import Image
5
+ from transformers import MllamaForConditionalGeneration, AutoProcessor
6
+
7
+ def load_model_and_processor(model_id):
8
+ """Load the model and processor."""
9
+ model = MllamaForConditionalGeneration.from_pretrained(
10
+ model_id,
11
+ torch_dtype=torch.bfloat16,
12
+ device_map="auto",
 
 
 
 
 
 
 
13
  )
14
+ processor = AutoProcessor.from_pretrained(model_id)
15
+ return model, processor
16
+
17
+ def generate_text(model, processor, image_url, prompt):
18
+ """Generate text using the model and processor."""
19
+ try:
20
+ image = Image.open(requests.get(image_url, stream=True).raw)
21
+ inputs = processor(image, prompt, return_tensors="pt").to(model.device)
22
+ output = model.generate(**inputs, max_new_tokens=30)
23
+ return processor.decode(output[0])
24
+ except Exception as e:
25
+ return f"Error: {e}"
26
+
27
+ # Streamlit App
28
+ st.title("LLaMA 3 Vision Haiku Generator")
29
+
30
+ # Model ID and loading
31
+ MODEL_ID = "meta-llama/Llama-3.2-11B-Vision"
32
+ model, processor = load_model_and_processor(MODEL_ID)
33
+
34
+ # User input for image URL and prompt
35
+ image_url = st.text_input("Enter the Image URL:", "https://huggingface.co/datasets/huggingface/documentation-images/resolve/0052a70beed5bf71b92610a43a52df6d286cd5f3/diffusers/rabbit.jpg")
36
+
37
+ prompt = st.text_area("Enter your prompt:", "<|image|><|begin_of_text|>If I had to write a haiku for this one")
38
+
39
+ if st.button("Generate Haiku"):
40
+ with st.spinner("Generating haiku..."):
41
+ result = generate_text(model, processor, image_url, prompt)
42
+
43
+ st.subheader("Generated Text")
44
+ st.write(result)
45
+
46
+ try:
47
+ st.image(image_url, caption="Input Image")
48
+ except Exception:
49
+ st.error("Failed to load image. Please check the URL.")
50
+
51
+
52
 
53
 
54