drlee1's picture
Update app.py
0d054d8 verified
raw
history blame
No virus
3.42 kB
import json
import gradio as gr
import pandas as pd
from openai import OpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
MODEL = "gpt-4o"
API_KEY = "sk-proj-FV9lzQDevcA7M7yllkL7T3BlbkFJgjk8JBewp08UwSFJwaXD"
# BASE_URL = "https://youtu.be/"
client = OpenAI(api_key = API_KEY)
embeddings = OpenAIEmbeddings(model = "text-embedding-3-large", api_key = API_KEY)
yt_chunks = FAISS.load_local("vector-large", embeddings, allow_dangerous_deserialization = True)
df = pd.read_csv("data/ko-youtube-trans-U10k.csv")
def find_docs(message):
finding_docs = yt_chunks.similarity_search(message, k = 5)
indices = [doc.metadata['row'] for doc in finding_docs]
retrievers = [json.loads(df.loc[idx].to_json(force_ascii = False)) for idx in indices]
return retrievers
def predict(message, history):
openai_input = list()
retriever = find_docs(message)
system_prompt = """- You are an AI chat bot that recommends YouTube content to users as an assistant.\n- You were created and powered by 'bigster (λΉ…μŠ€ν„°)', an AI & bigdata expert company.\n- Recommend YouTube content to users based on what's in β€œretriever”.\n- If the user's question is not related to content recommendations, please display a message declining to answer.\n- You must recommend at least 3 YouTube content items to the user based on the information in the 'retriever'. Be sure to explicitly include 'url' & 'videoChannelName' & 'videoName' information in your response. Also, for each featured piece of content, summarize what's in the 'transcription' and present it to the user. Use the following Markdown format to create hyperlinks: '[videoName](url)'\n\n retriever:\n{retriever}"""
for human, assistant in history:
openai_input.append({"role": "user", "content": human})
openai_input.append({"role": "assistant", "content": assistant})
openai_input = [item for item in openai_input if item['role'] != "system"]
openai_input.append({"role": "system", "content": system_prompt.format(retriever = retriever)})
openai_input.append({"role": "user", "content": message})
response = client.chat.completions.create(
model = MODEL,
messages = openai_input,
temperature = 1.0,
stream = True
)
partial_message = ""
for chunk in response:
if chunk.choices[0].delta.content is not None:
partial_message = partial_message + chunk.choices[0].delta.content
yield partial_message
print(openai_input)
gr.ChatInterface(
predict,
title = "YOUTUBE REC",
theme = gr.themes.Soft(primary_hue = "purple"),
examples = [
"λ„€ 이름은 뭐야?",
"파이썬 ν”„λ‘œκ·Έλž˜λ° μ–Έμ–΄λ₯Ό λ…ν•™ν•˜κΈ° μœ„ν•œ μ˜μƒμ„ μΆ”μ²œν•΄μ€˜.",
"μΈκ°„κ΄€κ³„μ—μ„œ 큰 상싀감을 λŠλΌλŠ” λ‚˜λ₯Ό μœ„ν•œ μ˜μƒμ„ μΆ”μ²œν•΄μ€˜.",
"κ°„λ‹¨ν•˜κ³  μ΄ˆλ³΄μžλ„ μ΄ν•΄ν•˜κΈ° μ‰¬μš΄ λ”₯λŸ¬λ‹ κ°•μ˜ μΆ”μ²œν•΄μ€˜.",
"ν”„λž‘μŠ€ 역사에 λŒ€ν•΄ κ³΅λΆ€ν•˜κ³  μ‹Άμ–΄μš”. ν”„λž‘μŠ€ 역사에 κ΄€λ ¨λœ λ‹€νλ©˜ν„°λ¦¬, κ°•μ˜ μ˜μƒμ„ μΆ”μ²œν•΄μ£Όμ„Έμš”.",
"μš”μ¦˜ 에세이λ₯Ό μž‘μ„±ν•˜κ³  μžˆμŠ΅λ‹ˆλ‹€. 보닀 효과적으둜 글을 μ¨λ‚΄λ €κ°€λŠ” 방법을 μ œμ‹œν•΄μ£ΌλŠ” μ˜μƒμ„ μΆ”μ²œν•΄μ£Όμ„Έμš”."
]
).launch(share = True, auth = ("user", "bigster123"))