video / app.py
aakashch0179's picture
Update app.py
ec62616 verified
# Text to Vedio
# import torch
# from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
# from diffusers.utils import export_to_video
# import streamlit as st
# import numpy as np
# # Title and User Input
# st.title("Text-to-Video with Streamlit")
# prompt = st.text_input("Enter your text prompt:", "Spiderman is surfing")
# # Button to trigger generation
# if st.button("Generate Video"):
# # Ensure you have 'accelerate' version 0.17.0 or higher
# import accelerate
# if accelerate.__version__ < "0.17.0":
# st.warning("Please upgrade 'accelerate' to version 0.17.0 or higher for CPU offloading.")
# else:
# with st.spinner("Generating video..."):
# # Define the pipeline for image generation
# pipe = DiffusionPipeline.from_pretrained("damo-vilab/text-to-video-ms-1.7b",
# torch_dtype=torch.float16, variant="fp16", device="cpu")
# pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
# pipe.enable_model_cpu_offload()
# # Generate video frames
# video_frames = pipe(prompt, num_inference_steps=25).frames
# # Create dummy frames for testing (replace with actual manipulation later)
# dummy_frames = [np.ones((256, 256, 3), dtype=np.uint8) for _ in range(20)]
# # Export to video
# video_path = export_to_video(dummy_frames)
# # Display the video in the Streamlit app
# st.video(video_path)
# Text to 3D
# import streamlit as st
# import torch
# from diffusers import ShapEPipeline
# from diffusers.utils import export_to_gif
# from PIL import Image
# import numpy as np
# # import PyTorch
# # Model loading (Ideally done once at the start for efficiency)
# ckpt_id = "openai/shap-e"
# @st.cache_resource # Caches the model for faster subsequent runs
# def process_image_for_pil(image):
# if isinstance(image, torch.Tensor):
# # Your PyTorch conversion logic here (with correct indentation)
# # elif isinstance(image, np.ndarray):
# # Your Numpy conversion logic here (with correct indentation)
# image_array = image.astype('uint8') # Assuming 8-bit conversion is needed
# return Image.fromarray(image_array)
# else:
# raise TypeError("Unsupported image format. Please provide conversion logic.")
# test_image = np.random.randint(0, 256, size=(256, 256, 3), dtype=np.uint8) # Placeholder image
# result = process_image_for_pil(test_image)
# def should_resize(image): # Add 'image' as an argument
# """Determines whether to resize images (replace with your own logic)"""
# if image.width > 512 or image.height > 512:
# return True
# else:
# return False
# def load_model():
# return ShapEPipeline.from_pretrained(ckpt_id).to("cuda")
# pipe = load_model()
# # App Title
# st.title("Shark 3D Image Generator")
# # User Inputs
# prompt = st.text_input("Enter your prompt:", "a shark")
# guidance_scale = st.slider("Guidance Scale", 0.0, 20.0, 15.0, step=0.5)
# # Generate and Display Images
# if st.button("Generate"):
# with st.spinner("Generating images..."):
# images = pipe(prompt, guidance_scale=guidance_scale, num_inference_steps=64).images
# # ... (Process images for PIL conversion)
# # Resize Images (Optional)
# pil_images = [] # Modified to store resized images if needed
# for image in images:
# processed_image = process_image_for_pil(image)
# if should_resize(processed_image): # Pass image to should_resize
# resized_image = processed_image.resize((256, 256))
# pil_images.append(resized_image)
# else:
# pil_images.append(processed_image) # Append without resizing
# gif_path = export_to_gif(pil_images, "shark_3d.gif")
# st.image(pil_images[0])
# st.success("GIF saved as shark_3d.gif")
# # Visual Qa
# import streamlit as st
# import requests
# from PIL import Image
# from transformers import BlipProcessor, BlipForQuestionAnswering
# # Model Loading
# processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-capfilt-large")
# model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-capfilt-large")
# # Streamlit App Structure
# st.title("Visual Question Answering ")
# def get_image():
# img_url = st.text_input("Enter Image URL", value='https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg')
# if img_url:
# raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
# st.image(raw_image)
# return raw_image
# def process_vqa(image, question):
# if image and question:
# inputs = processor(image, question, return_tensors="pt")
# output = model.generate(**inputs)
# answer = processor.decode(output[0], skip_special_tokens=True)
# st.write("Answer:", answer)
# # User Input
# image = get_image()
# question = st.text_input("Ask your question about the image:")
# # Process Question and Generate Answer
# process_vqa(image, question)
# # Chat with pdf
# import gradio as gr
# import streamlit as st
# from langchain.embeddings.openai import OpenAIEmbeddings
# from langchain.text_splitter import CharacterTextSplitter
# from langchain.vectorstores import Chroma
# from langchain.chains import ConversationalRetrievalChain
# from langchain.chat_models import ChatOpenAI
# from langchain.document_loaders import PyPDFLoader
# import os
# import fitz
# from PIL import Image
# # Global variables
# COUNT, N = 0, 0
# chat_history = []
# chain = None # Initialize chain as None
# # Function to set the OpenAI API key
# def set_apikey(api_key):
# os.environ['OPENAI_API_KEY'] = api_key
# return disable_box
# # Function to enable the API key input box
# def enable_api_box():
# return enable_box
# # Function to add text to the chat history
# def add_text(history, text):
# if not text:
# raise gr.Error('Enter text')
# history = history + [(text, '')]
# return history
# # Function to process the PDF file and create a conversation chain
# def process_file(file):
# global chain
# if 'OPENAI_API_KEY' not in os.environ:
# raise gr.Error('Upload your OpenAI API key')
# # Replace with your actual PDF processing logic
# loader = PyPDFLoader(file.name)
# documents = loader.load()
# embeddings = OpenAIEmbeddings()
# pdfsearch = Chroma.from_documents(documents, embeddings)
# chain = ConversationalRetrievalChain.from_llm(ChatOpenAI(temperature=0.3),
# retriever=pdfsearch.as_retriever(search_kwargs={"k": 1}),
# return_source_documents=True)
# return chain
# # Function to generate a response based on the chat history and query
# def generate_response(history, query, pdf_upload):
# global COUNT, N, chat_history, chain
# if not pdf_upload:
# raise gr.Error(message='Upload a PDF')
# if COUNT == 0:
# chain = process_file(pdf_upload)
# COUNT += 1
# # Replace with your LangChain logic to generate a response
# result = chain({"question": query, 'chat_history': chat_history}, return_only_outputs=True)
# chat_history += [(query, result["answer"])]
# N = list(result['source_documents'][0])[1][1]['page'] # Adjust as needed
# for char in result['answer']:
# history[-1][-1] += char
# return history, ''
# # Function to render a specific page of a PDF file as an image
# def render_file(file):
# global N
# doc = fitz.open(file.name)
# page = doc[N]
# pix = page.get_pixmap(matrix=fitz.Matrix(300/72, 300/72))
# image = Image.frombytes('RGB', [pix.width, pix.height], pix.samples)
# return image
# # Function to render initial content from the PDF
# def render_first(pdf_file):
# # Replace with logic to process the PDF and generate an initial image
# image = Image.new('RGB', (600, 400), color = 'white') # Placeholder
# return image
# # Streamlit & Gradio Interface
# st.title("PDF-Powered Chatbot")
# with st.container():
# gr.Markdown("""
# <style>
# .image-container { height: 680px; }
# </style>
# """)
# with gr.Blocks() as demo:
# pdf_upload1 = gr.UploadButton("📁 Upload PDF 1", file_types=[".pdf"]) # Define pdf_upload1
# # ... (rest of your interface creation)
# txt = gr.Textbox(label="Enter your query", placeholder="Ask a question...")
# submit_btn = gr.Button('Submit')
# @submit_btn.click()
# def on_submit():
# add_text(chatbot, txt)
# generate_response(chatbot, txt, pdf_upload1) # Use pdf_upload1 here
# render_file(pdf_upload1) # Use pdf_upload1 here
# if __name__ == "__main__":
# gr.Interface(
# fn=generate_response,
# inputs=[
# "file", # Define pdf_upload1
# "text", # Define chatbot output
# "text" # Define txt
# ],
# outputs=[
# "image", # Define show_img
# "text", # Define chatbot output
# "text" # Define txt
# ],
# title="PDF-Powered Chatbot"
# ).launch(server_port=8888)
# # Text to audio
# from transformers import AutoProcessor , BarkModel
# import scipy
# processor = AutoProcessor.from_pretrained("suno/bark")
# model = BarkModel.from_pretrained("suno/bark")
# model.to("cuda")
# def generate_audio(text,preset, output):
# inputs = processor(text, voice_preset=preset)
# for k , v in inputs.items():
# inputs[k] = v.to("cuda")
# audio_array = model.generate(**inputs)
# audio_array = audio_array.cuda().numpy().squeeze()
# scipy.io.wavfile.write(output,rate= sample_rate,data= audio_array)
# generate_audio(
# text= "HI, welcome to our app hope you enjoy our app ,Thankyou for using our app YOURS Sincerely, Cosmo",
# preset= "v2/en_speaker_3",
# output= "output.wav",
# )
# Fbgroup
# from selenium import webdriver
# from selenium.webdriver.common.by import By
# from selenium.webdriver.common.keys import Keys
# import time
# from time import sleep
# from selenium.webdriver.chrome.options import Options
# from selenium.webdriver.chrome.service import Service
# from webdriver_manager.chrome import ChromeDriverManager
# import streamlit as st
# import setuptools
# with open("README.md", encoding="utf-8") as readme_file:
# readme = readme_file.read()
# setuptools.setup(
# name='webdriver_manager',
# python_requires=">=3.7",
# long_description=readme,
# long_description_content_type="text/markdown",
# packages=setuptools.find_packages(include=['webdriver_manager*']),
# include_package_data=True,
# version='4.0.1',
# description='Library provides the way to automatically manage drivers for different browsers',
# author='Sergey Pirogov',
# author_email='automationremarks@gmail.com',
# url='https://github.com/SergeyPirogov/webdriver_manager',
# keywords=['testing', 'selenium', 'driver', 'test automation'],
# classifiers=[
# 'License :: OSI Approved :: Apache Software License',
# 'Intended Audience :: Information Technology',
# 'Intended Audience :: Developers',
# 'Programming Language :: Python :: 3.7',
# 'Programming Language :: Python :: 3.8',
# 'Programming Language :: Python :: 3.9',
# 'Programming Language :: Python :: 3.10',
# 'Programming Language :: Python :: 3.11',
# 'Topic :: Software Development :: '
# 'Libraries :: Python Modules',
# 'Operating System :: Microsoft :: Windows',
# 'Operating System :: POSIX',
# 'Operating System :: Unix',
# 'Operating System :: MacOS',
# ],
# install_requires=[
# 'requests',
# 'python-dotenv',
# 'packaging'
# ],
# package_data={
# "webdriver_manager": ["py.typed"]
# },
# )
# def main():
# # Input fields
# st.title("Facebook Group Poster")
# account = st.text_input("Facebook Account Email", "sample@gmail.com")
# password = st.text_input("Facebook Password", "sample", type="password")
# groups_links_list = st.text_area("Facebook Group URLs (one per line)",
# "https://www.facebook.com/groups/sample1\nhttps://www.facebook.com/groups/sample2")
# message = st.text_area("Post Message", "Checkout this amazing script...")
# images_list = st.file_uploader("Upload Images", accept_multiple_files=True)
# if st.button('Post to Facebook Groups'):
# if not account or not password or not groups_links_list or not message or not images_list:
# st.error("Please fill all the fields.")
# else:
# chrome_options = Options()
# prefs = {"profile.default_content_setting_values.notifications": 2}
# chrome_options.add_experimental_option("prefs", prefs)
# with st.spinner("Posting to Facebook..."):
# driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)
# driver.get('https://www.facebook.com')
# # Login Logic (Replace XPaths if needed)
# emailelement = driver.find_element(By.XPATH,'//*[@id="email"]')
# emailelement.send_keys(account)
# passelement = driver.find_element(By.XPATH,'//*[@id="pass"]')
# passelement.send_keys(password)
# loginelement = driver.find_element(By.XPATH,'//*[@id="loginbutton"]')
# loginelement.click()
# # Posting Logic
# groups_links = groups_links_list.splitlines()
# for group in groups_links:
# driver.get(group)
# time.sleep(2)
# try:
# driver.find_element(By.XPATH,'//*[@label="Start Discussion"]').click()
# post_box=driver.find_element_by_css_selector("[name='xhpc_message_text']")
# except:
# post_box=driver.find_element_by_css_selector("[name='xhpc_message_text']")
# post_box.send_keys(message)
# time.sleep(1)
# # Image Upload Logic (Adapt based on Streamlit setup)
# for image_file in images_list:
# photo_element = driver.find_element(By.XPATH,'//input[@type="file"]')
# image_path = image_file.name # Placeholder! Adjust how you get the path
# photo_element.send_keys(image_path)
# time.sleep(1)
# # time.sleep(6)
# # post_button = driver.find_element_by_xpath("//*[@data-testid='react-composer-post-button']")
# # Handle image uploads (assuming one upload field per image)
# for image_file in images_list:
# photo_element = driver.find_element(By.XPATH,'//input[@type="file"]')
# photo_element.send_keys(image_file.name) # Or image_file.path, adjust as needed
# time.sleep(1)
# time.sleep(6)
# post_button = driver.find_element_by_xpath("//*[@data-testid='react-composer-post-button']")
# # ... (Rest of your logic to click the post button)
# driver.close()
# if __name__ == '__main__':
# main()
# text to Image
import streamlit as st
import torch
from diffusers import StableDiffusionXLPipeline, UNet2DConditionModel, EulerDiscreteScheduler
from huggingface_hub import hf_hub_download
from safetensors.torch import load_file
# Model Path/Repo Information
base = "stabilityai/stable-diffusion-xl-base-1.0"
repo = "ByteDance/SDXL-Lightning"
ckpt = "sdxl_lightning_4step_unet.safetensors"
# Load model (Executed only once for efficiency)
@st.cache_resource
def load_sdxl_pipeline():
unet = UNet2DConditionModel.from_config(base, subfolder="unet").to("cuda", torch.float16)
unet.load_state_dict(load_file(hf_hub_download(repo, ckpt), device="cuda"))
pipe = StableDiffusionXLPipeline.from_pretrained(base, unet=unet, torch_dtype=torch.float16, variant="fp16").to("cuda")
pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
return pipe
# Streamlit UI
st.title("Image Generation")
prompt = st.text_input("Enter your image prompt:")
if st.button("Generate Image"):
if not prompt:
st.warning("Please enter a prompt.")
else:
pipe = load_sdxl_pipeline() # Load the pipeline from cache
with torch.no_grad():
image = pipe(prompt).images[0]
st.image(image)
# text generation
# import streamlit as st
# from transformers import AutoTokenizer, AutoModelForCausalLM
# st.title("Text Generation with Bloom")
# tokenizer = AutoTokenizer.from_pretrained("bigscience/bloom")
# model = AutoModelForCausalLM.from_pretrained("bigscience/bloom")
# user_input = st.text_area("Enter your prompt:", height=100)
# if st.button('Generate Text'):
# inputs = tokenizer(user_input, return_tensors="pt")
# outputs = model.generate(**inputs, max_length=100) # Adjust max_length as needed
# generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
# st.write("Generated Text:")
# st.write(generated_text)