video

Running

App Files Files Community

video / app.py

aakashch0179

Update app.py

ec62616 verified 4 months ago

raw history blame contribute delete

No virus

17.9 kB

	# Text to Vedio
	# import torch
	# from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
	# from diffusers.utils import export_to_video
	# import streamlit as st
	# import numpy as np

	# # Title and User Input
	# st.title("Text-to-Video with Streamlit")
	# prompt = st.text_input("Enter your text prompt:", "Spiderman is surfing")

	# # Button to trigger generation
	# if st.button("Generate Video"):
	# # Ensure you have 'accelerate' version 0.17.0 or higher
	# import accelerate
	# if accelerate.__version__ < "0.17.0":
	# st.warning("Please upgrade 'accelerate' to version 0.17.0 or higher for CPU offloading.")
	# else:
	# with st.spinner("Generating video..."):
	# # Define the pipeline for image generation
	# pipe = DiffusionPipeline.from_pretrained("damo-vilab/text-to-video-ms-1.7b",
	# torch_dtype=torch.float16, variant="fp16", device="cpu")
	# pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
	# pipe.enable_model_cpu_offload()

	# # Generate video frames
	# video_frames = pipe(prompt, num_inference_steps=25).frames

	# # Create dummy frames for testing (replace with actual manipulation later)
	# dummy_frames = [np.ones((256, 256, 3), dtype=np.uint8) for _ in range(20)]

	# # Export to video
	# video_path = export_to_video(dummy_frames)

	# # Display the video in the Streamlit app
	# st.video(video_path)





	# Text to 3D

	# import streamlit as st
	# import torch
	# from diffusers import ShapEPipeline
	# from diffusers.utils import export_to_gif
	# from PIL import Image
	# import numpy as np
	# # import PyTorch

	# # Model loading (Ideally done once at the start for efficiency)
	# ckpt_id = "openai/shap-e"

	# @st.cache_resource # Caches the model for faster subsequent runs

	# def process_image_for_pil(image):
	# if isinstance(image, torch.Tensor):
	# # Your PyTorch conversion logic here (with correct indentation)
	# # elif isinstance(image, np.ndarray):
	# # Your Numpy conversion logic here (with correct indentation)
	# image_array = image.astype('uint8') # Assuming 8-bit conversion is needed
	# return Image.fromarray(image_array)
	# else:
	# raise TypeError("Unsupported image format. Please provide conversion logic.")

	# test_image = np.random.randint(0, 256, size=(256, 256, 3), dtype=np.uint8) # Placeholder image
	# result = process_image_for_pil(test_image)





	# def should_resize(image): # Add 'image' as an argument
	# """Determines whether to resize images (replace with your own logic)"""
	# if image.width > 512 or image.height > 512:
	# return True
	# else:
	# return False
	# def load_model():
	# return ShapEPipeline.from_pretrained(ckpt_id).to("cuda")

	# pipe = load_model()

	# # App Title
	# st.title("Shark 3D Image Generator")

	# # User Inputs
	# prompt = st.text_input("Enter your prompt:", "a shark")
	# guidance_scale = st.slider("Guidance Scale", 0.0, 20.0, 15.0, step=0.5)

	# # Generate and Display Images
	# if st.button("Generate"):
	# with st.spinner("Generating images..."):
	# images = pipe(prompt, guidance_scale=guidance_scale, num_inference_steps=64).images

	# # ... (Process images for PIL conversion)

	# # Resize Images (Optional)
	# pil_images = [] # Modified to store resized images if needed
	# for image in images:
	# processed_image = process_image_for_pil(image)
	# if should_resize(processed_image): # Pass image to should_resize
	# resized_image = processed_image.resize((256, 256))
	# pil_images.append(resized_image)
	# else:
	# pil_images.append(processed_image) # Append without resizing

	# gif_path = export_to_gif(pil_images, "shark_3d.gif")
	# st.image(pil_images[0])
	# st.success("GIF saved as shark_3d.gif")



	# # Visual Qa
	# import streamlit as st
	# import requests
	# from PIL import Image
	# from transformers import BlipProcessor, BlipForQuestionAnswering

	# # Model Loading
	# processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-capfilt-large")
	# model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-capfilt-large")

	# # Streamlit App Structure
	# st.title("Visual Question Answering ")

	# def get_image():
	# img_url = st.text_input("Enter Image URL", value='https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg')
	# if img_url:
	# raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
	# st.image(raw_image)
	# return raw_image

	# def process_vqa(image, question):
	# if image and question:
	# inputs = processor(image, question, return_tensors="pt")
	# output = model.generate(**inputs)
	# answer = processor.decode(output[0], skip_special_tokens=True)
	# st.write("Answer:", answer)

	# # User Input
	# image = get_image()
	# question = st.text_input("Ask your question about the image:")

	# # Process Question and Generate Answer
	# process_vqa(image, question)

	# # Chat with pdf
	# import gradio as gr
	# import streamlit as st
	# from langchain.embeddings.openai import OpenAIEmbeddings
	# from langchain.text_splitter import CharacterTextSplitter
	# from langchain.vectorstores import Chroma
	# from langchain.chains import ConversationalRetrievalChain
	# from langchain.chat_models import ChatOpenAI
	# from langchain.document_loaders import PyPDFLoader
	# import os
	# import fitz
	# from PIL import Image

	# # Global variables
	# COUNT, N = 0, 0
	# chat_history = []
	# chain = None # Initialize chain as None

	# # Function to set the OpenAI API key
	# def set_apikey(api_key):
	# os.environ['OPENAI_API_KEY'] = api_key
	# return disable_box

	# # Function to enable the API key input box
	# def enable_api_box():
	# return enable_box

	# # Function to add text to the chat history
	# def add_text(history, text):
	# if not text:
	# raise gr.Error('Enter text')
	# history = history + [(text, '')]
	# return history

	# # Function to process the PDF file and create a conversation chain
	# def process_file(file):
	# global chain
	# if 'OPENAI_API_KEY' not in os.environ:
	# raise gr.Error('Upload your OpenAI API key')

	# # Replace with your actual PDF processing logic
	# loader = PyPDFLoader(file.name)
	# documents = loader.load()
	# embeddings = OpenAIEmbeddings()
	# pdfsearch = Chroma.from_documents(documents, embeddings)

	# chain = ConversationalRetrievalChain.from_llm(ChatOpenAI(temperature=0.3),
	# retriever=pdfsearch.as_retriever(search_kwargs={"k": 1}),
	# return_source_documents=True)
	# return chain

	# # Function to generate a response based on the chat history and query
	# def generate_response(history, query, pdf_upload):
	# global COUNT, N, chat_history, chain
	# if not pdf_upload:
	# raise gr.Error(message='Upload a PDF')

	# if COUNT == 0:
	# chain = process_file(pdf_upload)
	# COUNT += 1

	# # Replace with your LangChain logic to generate a response
	# result = chain({"question": query, 'chat_history': chat_history}, return_only_outputs=True)
	# chat_history += [(query, result["answer"])]
	# N = list(result['source_documents'][0])[1][1]['page'] # Adjust as needed

	# for char in result['answer']:
	# history[-1][-1] += char
	# return history, ''

	# # Function to render a specific page of a PDF file as an image
	# def render_file(file):
	# global N
	# doc = fitz.open(file.name)
	# page = doc[N]
	# pix = page.get_pixmap(matrix=fitz.Matrix(300/72, 300/72))
	# image = Image.frombytes('RGB', [pix.width, pix.height], pix.samples)
	# return image

	# # Function to render initial content from the PDF
	# def render_first(pdf_file):
	# # Replace with logic to process the PDF and generate an initial image
	# image = Image.new('RGB', (600, 400), color = 'white') # Placeholder
	# return image

	# # Streamlit & Gradio Interface

	# st.title("PDF-Powered Chatbot")

	# with st.container():
	# gr.Markdown("""
	# <style>
	# .image-container { height: 680px; }
	# </style>
	# """)

	# with gr.Blocks() as demo:
	# pdf_upload1 = gr.UploadButton("📁 Upload PDF 1", file_types=[".pdf"]) # Define pdf_upload1

	# # ... (rest of your interface creation)

	# txt = gr.Textbox(label="Enter your query", placeholder="Ask a question...")
	# submit_btn = gr.Button('Submit')

	# @submit_btn.click()
	# def on_submit():
	# add_text(chatbot, txt)
	# generate_response(chatbot, txt, pdf_upload1) # Use pdf_upload1 here
	# render_file(pdf_upload1) # Use pdf_upload1 here

	# if __name__ == "__main__":
	# gr.Interface(
	# fn=generate_response,
	# inputs=[
	# "file", # Define pdf_upload1
	# "text", # Define chatbot output
	# "text" # Define txt
	# ],
	# outputs=[
	# "image", # Define show_img
	# "text", # Define chatbot output
	# "text" # Define txt
	# ],
	# title="PDF-Powered Chatbot"
	# ).launch(server_port=8888)

	# # Text to audio
	# from transformers import AutoProcessor , BarkModel
	# import scipy

	# processor = AutoProcessor.from_pretrained("suno/bark")
	# model = BarkModel.from_pretrained("suno/bark")
	# model.to("cuda")

	# def generate_audio(text,preset, output):
	# inputs = processor(text, voice_preset=preset)
	# for k , v in inputs.items():
	# inputs[k] = v.to("cuda")
	# audio_array = model.generate(**inputs)
	# audio_array = audio_array.cuda().numpy().squeeze()
	# scipy.io.wavfile.write(output,rate= sample_rate,data= audio_array)


	# generate_audio(

	# text= "HI, welcome to our app hope you enjoy our app ,Thankyou for using our app YOURS Sincerely, Cosmo",
	# preset= "v2/en_speaker_3",
	# output= "output.wav",


	# )


	# Fbgroup


	# from selenium import webdriver
	# from selenium.webdriver.common.by import By
	# from selenium.webdriver.common.keys import Keys
	# import time
	# from time import sleep
	# from selenium.webdriver.chrome.options import Options
	# from selenium.webdriver.chrome.service import Service
	# from webdriver_manager.chrome import ChromeDriverManager
	# import streamlit as st

	# import setuptools

	# with open("README.md", encoding="utf-8") as readme_file:
	# readme = readme_file.read()

	# setuptools.setup(
	# name='webdriver_manager',
	# python_requires=">=3.7",
	# long_description=readme,
	# long_description_content_type="text/markdown",
	# packages=setuptools.find_packages(include=['webdriver_manager*']),
	# include_package_data=True,
	# version='4.0.1',
	# description='Library provides the way to automatically manage drivers for different browsers',
	# author='Sergey Pirogov',
	# author_email='automationremarks@gmail.com',
	# url='https://github.com/SergeyPirogov/webdriver_manager',
	# keywords=['testing', 'selenium', 'driver', 'test automation'],
	# classifiers=[
	# 'License :: OSI Approved :: Apache Software License',
	# 'Intended Audience :: Information Technology',
	# 'Intended Audience :: Developers',
	# 'Programming Language :: Python :: 3.7',
	# 'Programming Language :: Python :: 3.8',
	# 'Programming Language :: Python :: 3.9',
	# 'Programming Language :: Python :: 3.10',
	# 'Programming Language :: Python :: 3.11',
	# 'Topic :: Software Development :: '
	# 'Libraries :: Python Modules',
	# 'Operating System :: Microsoft :: Windows',
	# 'Operating System :: POSIX',
	# 'Operating System :: Unix',
	# 'Operating System :: MacOS',
	# ],
	# install_requires=[
	# 'requests',
	# 'python-dotenv',
	# 'packaging'
	# ],
	# package_data={
	# "webdriver_manager": ["py.typed"]
	# },
	# )


	# def main():
	# # Input fields
	# st.title("Facebook Group Poster")
	# account = st.text_input("Facebook Account Email", "sample@gmail.com")
	# password = st.text_input("Facebook Password", "sample", type="password")
	# groups_links_list = st.text_area("Facebook Group URLs (one per line)",
	# "https://www.facebook.com/groups/sample1\nhttps://www.facebook.com/groups/sample2")
	# message = st.text_area("Post Message", "Checkout this amazing script...")
	# images_list = st.file_uploader("Upload Images", accept_multiple_files=True)

	# if st.button('Post to Facebook Groups'):
	# if not account or not password or not groups_links_list or not message or not images_list:
	# st.error("Please fill all the fields.")
	# else:
	# chrome_options = Options()
	# prefs = {"profile.default_content_setting_values.notifications": 2}
	# chrome_options.add_experimental_option("prefs", prefs)

	# with st.spinner("Posting to Facebook..."):
	# driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)
	# driver.get('https://www.facebook.com')

	# # Login Logic (Replace XPaths if needed)
	# emailelement = driver.find_element(By.XPATH,'//*[@id="email"]')
	# emailelement.send_keys(account)
	# passelement = driver.find_element(By.XPATH,'//*[@id="pass"]')
	# passelement.send_keys(password)
	# loginelement = driver.find_element(By.XPATH,'//*[@id="loginbutton"]')
	# loginelement.click()

	# # Posting Logic
	# groups_links = groups_links_list.splitlines()

	# for group in groups_links:
	# driver.get(group)
	# time.sleep(2)

	# try:
	# driver.find_element(By.XPATH,'//*[@label="Start Discussion"]').click()
	# post_box=driver.find_element_by_css_selector("[name='xhpc_message_text']")
	# except:
	# post_box=driver.find_element_by_css_selector("[name='xhpc_message_text']")
	# post_box.send_keys(message)

	# time.sleep(1)

	# # Image Upload Logic (Adapt based on Streamlit setup)
	# for image_file in images_list:
	# photo_element = driver.find_element(By.XPATH,'//input[@type="file"]')
	# image_path = image_file.name # Placeholder! Adjust how you get the path
	# photo_element.send_keys(image_path)
	# time.sleep(1)

	# # time.sleep(6)
	# # post_button = driver.find_element_by_xpath("//*[@data-testid='react-composer-post-button']")

	# # Handle image uploads (assuming one upload field per image)
	# for image_file in images_list:
	# photo_element = driver.find_element(By.XPATH,'//input[@type="file"]')
	# photo_element.send_keys(image_file.name) # Or image_file.path, adjust as needed
	# time.sleep(1)

	# time.sleep(6)
	# post_button = driver.find_element_by_xpath("//*[@data-testid='react-composer-post-button']")
	# # ... (Rest of your logic to click the post button)

	# driver.close()

	# if __name__ == '__main__':
	# main()

	# text to Image

	import streamlit as st
	import torch
	from diffusers import StableDiffusionXLPipeline, UNet2DConditionModel, EulerDiscreteScheduler
	from huggingface_hub import hf_hub_download
	from safetensors.torch import load_file

	# Model Path/Repo Information
	base = "stabilityai/stable-diffusion-xl-base-1.0"
	repo = "ByteDance/SDXL-Lightning"
	ckpt = "sdxl_lightning_4step_unet.safetensors"

	# Load model (Executed only once for efficiency)
	@st.cache_resource
	def load_sdxl_pipeline():
	unet = UNet2DConditionModel.from_config(base, subfolder="unet").to("cuda", torch.float16)
	unet.load_state_dict(load_file(hf_hub_download(repo, ckpt), device="cuda"))
	pipe = StableDiffusionXLPipeline.from_pretrained(base, unet=unet, torch_dtype=torch.float16, variant="fp16").to("cuda")
	pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
	return pipe

	# Streamlit UI
	st.title("Image Generation")
	prompt = st.text_input("Enter your image prompt:")

	if st.button("Generate Image"):
	if not prompt:
	st.warning("Please enter a prompt.")
	else:
	pipe = load_sdxl_pipeline() # Load the pipeline from cache
	with torch.no_grad():
	image = pipe(prompt).images[0]

	st.image(image)


	# text generation
	# import streamlit as st
	# from transformers import AutoTokenizer, AutoModelForCausalLM

	# st.title("Text Generation with Bloom")

	# tokenizer = AutoTokenizer.from_pretrained("bigscience/bloom")
	# model = AutoModelForCausalLM.from_pretrained("bigscience/bloom")

	# user_input = st.text_area("Enter your prompt:", height=100)

	# if st.button('Generate Text'):
	# inputs = tokenizer(user_input, return_tensors="pt")
	# outputs = model.generate(**inputs, max_length=100) # Adjust max_length as needed
	# generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
	# st.write("Generated Text:")
	# st.write(generated_text)