import json import gradio as gr import pandas as pd from openai import OpenAI from langchain.embeddings import OpenAIEmbeddings from langchain_community.vectorstores import FAISS MODEL = "gpt-4o" API_KEY = "sk-proj-FV9lzQDevcA7M7yllkL7T3BlbkFJgjk8JBewp08UwSFJwaXD" # BASE_URL = "https://youtu.be/" client = OpenAI(api_key = API_KEY) embeddings = OpenAIEmbeddings(model = "text-embedding-3-large", api_key = API_KEY) yt_chunks = FAISS.load_local("vector-large", embeddings, allow_dangerous_deserialization = True) df = pd.read_csv("data/ko-youtube-trans-U10k.csv") def find_docs(message): finding_docs = yt_chunks.similarity_search(message, k = 5) indices = [doc.metadata['row'] for doc in finding_docs] retrievers = [json.loads(df.loc[idx].to_json(force_ascii = False)) for idx in indices] return retrievers def predict(message, history): openai_input = list() retriever = find_docs(message) system_prompt = """- You are an AI chat bot that recommends YouTube content to users as an assistant.\n- You were created and powered by 'bigster (빅스터)', an AI & bigdata expert company.\n- Recommend YouTube content to users based on what's in “retriever”.\n- If the user's question is not related to content recommendations, please display a message declining to answer.\n- You must recommend at least 3 YouTube content items to the user based on the information in the 'retriever'. Be sure to explicitly include 'url' & 'videoChannelName' & 'videoName' information in your response. Also, for each featured piece of content, summarize what's in the 'transcription' and present it to the user. Use the following Markdown format to create hyperlinks: '[videoName](url)'\n\n retriever:\n{retriever}""" for human, assistant in history: openai_input.append({"role": "user", "content": human}) openai_input.append({"role": "assistant", "content": assistant}) openai_input = [item for item in openai_input if item['role'] != "system"] openai_input.append({"role": "system", "content": system_prompt.format(retriever = retriever)}) openai_input.append({"role": "user", "content": message}) response = client.chat.completions.create( model = MODEL, messages = openai_input, temperature = 1.0, stream = True ) partial_message = "" for chunk in response: if chunk.choices[0].delta.content is not None: partial_message = partial_message + chunk.choices[0].delta.content yield partial_message print(openai_input) gr.ChatInterface( predict, title = "YOUTUBE REC", theme = gr.themes.Soft(primary_hue = "purple"), examples = [ "네 이름은 뭐야?", "파이썬 프로그래밍 언어를 독학하기 위한 영상을 추천해줘.", "인간관계에서 큰 상실감을 느끼는 나를 위한 영상을 추천해줘.", "간단하고 초보자도 이해하기 쉬운 딥러닝 강의 추천해줘.", "프랑스 역사에 대해 공부하고 싶어요. 프랑스 역사에 관련된 다큐멘터리, 강의 영상을 추천해주세요.", "요즘 에세이를 작성하고 있습니다. 보다 효과적으로 글을 써내려가는 방법을 제시해주는 영상을 추천해주세요." ] ).launch(share = True, auth = ("user", "bigster123"))