Spaces:

Francesco
/

YouTubeGuru

Runtime error

App Files Files Community

Francesco commited on May 21, 2023

Commit

5f25427

0 Parent(s):

first mvp

Browse files

Files changed (7) hide show

.gitignore +160 -0
README.md +7 -0
app.py +142 -0
prompts/output.txt +1 -0
prompts/system.prompt +1 -0
prompts/template.prompt +10 -0
requirements.txt +5 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,160 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/

README.md ADDED Viewed

	@@ -0,0 +1,7 @@

+we will use gradio
+1) Transcribe the yt video
+ - we need to have an input for you to place the video url
+2) Then we need to store it in a vector db
+    - ConversationTokenBufferMemory
+    https://python.langchain.com/en/latest/modules/memory/types/summary_buffer.html

app.py ADDED Viewed

	@@ -0,0 +1,142 @@

+import json
+import logging
+import os
+from pathlib import Path
+from typing import List
+from uuid import uuid4
+import gradio as gr
+import openai
+from langchain.chat_models import ChatOpenAI
+from langchain.prompts import HumanMessagePromptTemplate
+from langchain.schema import HumanMessage, SystemMessage
+from youtube_dl import YoutubeDL
+os.environ["OPENAI_API_KEY"] = "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"
+MODELS_NAMES = ["gpt-3.5-turbo", "gpt-4"]
+logging.basicConfig(
+    format="[%(asctime)s %(levelname)s]: %(message)s", level=logging.DEBUG
+)
+system_message = SystemMessage(content=Path("prompts/system.prompt").read_text())
+human_message_prompt_template = HumanMessagePromptTemplate.from_template(
+    Path("prompts/template.prompt").read_text()
+)
+def download_video_as_mp3(video_url: str, output_filename: str):
+    ydl_opts = {
+        "format": "bestaudio/best",
+        "outtmpl": output_filename,
+        "postprocessors": [
+            {
+                "key": "FFmpegExtractAudio",
+                "preferredcodec": "mp3",
+                "preferredquality": "192",
+            }
+        ],
+    }
+    with YoutubeDL(ydl_opts) as ydl:
+        ydl.download([video_url])
+def get_transcription(youtube_url: str):
+    logging.info(f"Transcribing {youtube_url}")
+    output_filename = Path(f"{str(uuid4())}.mp3")
+    download_video_as_mp3(youtube_url, str(output_filename))
+    logging.debug(f"video downloaded at {str(output_filename)}")
+    with output_filename.open("rb") as audio_file:
+        transcript = openai.Audio.transcribe("whisper-1", audio_file, language="en")
+    logging.info(f"Done!")
+    output_filename.unlink()
+    return transcript
+def get_youtube_video_info(youtube_transcription: str, messages: List, chat):
+    logging.info("Running GPT")
+    human_message = human_message_prompt_template.format(
+        youtube_transcription=youtube_transcription
+    )
+    messages.append(human_message)
+    reply = chat(messages)
+    messages.append(reply)
+    logging.info(f"Done!")
+    # we don't want the first ever message, too long
+    chatbot_messages = [("", reply.content)]
+    return chatbot_messages, messages
+def run_message_on_chatbot(chat, message: str, chatbot_messages, messages):
+    logging.info("asking question to GPT")
+    messages.append(HumanMessage(content=message))
+    reply = chat(messages)
+    messages.append(reply)
+    logging.debug(f"reply = {reply.content}")
+    logging.info(f"Done!")
+    chatbot_messages.append((message, messages[-1].content))
+    return "", chatbot_messages, messages
+def youtube_guru_button_handler(
+    youtube_url: str, messages: List, temperature: float, model_name: str
+):
+    chat = ChatOpenAI(model_name=model_name, temperature=temperature)
+    transcription = get_transcription(youtube_url)
+    chatbot_messages, messages = get_youtube_video_info(transcription, messages, chat)
+    return chatbot_messages, messages, chat
+def on_clear_button_click():
+    return "", [], [messages]
+with gr.Blocks() as demo:
+    messages = gr.State([system_message])
+    youtube_transcription = gr.State("")
+    model_selected = gr.State()
+    chat = gr.State()
+    with gr.Column():
+        gr.Markdown("# Welcome to YouTubeGuru!")
+        youtube_url = gr.Textbox(
+            label="video url", placeholder="https://www.youtube.com/watch?v=dQw4w9WgXcQ"
+        )
+        chatbot = gr.Chatbot()
+        msg = gr.Textbox(label="chat input")
+        msg.submit(
+            run_message_on_chatbot,
+            [chat, msg, chatbot, messages],
+            [msg, chatbot, messages],
+        )
+        with gr.Row():
+            with gr.Column():
+                clear = gr.Button("Clear")
+                clear.click(
+                    on_clear_button_click,
+                    [],
+                    [youtube_transcription, chatbot, messages],
+                    queue=False,
+                )
+            with gr.Accordion("Settings", open=False):
+                temperature = gr.Slider(
+                    minimum=0.0,
+                    maximum=1.0,
+                    value=0.7,
+                    step=0.1,
+                    label="temperate",
+                    interactive=True,
+                )
+                model_name = gr.Dropdown(
+                    choices=MODELS_NAMES, value=MODELS_NAMES[0], label="model"
+                )
+        button = gr.Button("Run 🚀")
+        button.click(
+            youtube_guru_button_handler,
+            inputs=[youtube_url, messages, temperature, model_name],
+            outputs=[chatbot, messages, chat],
+        )

prompts/output.txt ADDED Viewed

	@@ -0,0 +1 @@

+ '1. In this video, the speaker provides a list of 30 Twitter accounts that he believes are the best for following machine learning research. He explains that Twitter is a better platform for this than LinkedIn and goes into detail on how to optimize your Twitter feed. The speaker also gives insights and opinions on each of the accounts he recommends.\n\n2. This video provides a comprehensive list of 30 Twitter accounts that are great for following machine learning research. The speaker also gives tips on how to optimize your Twitter feed to get the most out of it. If you\'re interested in staying up-to-date on the latest machine learning research, this video might be a great resource for you.\n\n3. "30 Must-Follow Twitter Accounts for Machine Learning Research"

prompts/system.prompt ADDED Viewed

	@@ -0,0 +1 @@


1	+ You are YouTubeGuru, an AI-powered virtual assistant with expertise in summarizing YouTube videos, writing well-written descriptions from transcriptions, and devising impactful titles for your content.

prompts/template.prompt ADDED Viewed

	@@ -0,0 +1,10 @@

+Given a video transcription follow these tasks:
+{{
+    "summary": <Summarize the most essential aspects of the video in a concise manner>,
+    "description": <Generate a suitable YouTube description for the video, tailored to the content>,
+    "title": <Propose an attention-grabbing title for the YouTube video>
+}}
+Transcription:
+{youtube_transcription}

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+openai
+youtube-dl
+gradio
+git+https://github.com/ytdl-org/youtube-dl.git
+langchain