import json import gradio as gr import Model from pytube import YouTube import whisper import time import pickle from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.schema.document import Document # # == def get_title(link): with open("book.json", "r", encoding='utf-8') as read_file: test = json.load(read_file) title = test[0]['item'][0]['title'] return link + title def greet(link): model_test = Model.get_title(link) model_link = Model.greet(link) result = model_test + model_link return result def youtube_text(link): yt = YouTube(link) yt.streams.filter(only_audio=True).first().download \ (output_path=".", filename="test.mp3") start = time.time() model = whisper.load_model("small") result = model.transcribe("test.mp3") end = time.time() print(result["text"]) print(f"{end - start:.2f}sec") text_splitter = RecursiveCharacterTextSplitter( chunk_size=2000, chunk_overlap=50, length_function=len, ) docs = [Document(page_content=x) for x in text_splitter.split_text(result["text"])] split_docs = text_splitter.split_documents(docs) with open("split_example_small.pkl", "wb") as f: pickle.dump(split_docs, f) return docs[0] iface = gr.Interface(fn=youtube_text, inputs='text', outputs='text') iface.launch()