Spaces:
Runtime error
Runtime error
from dotenv import load_dotenv | |
load_dotenv() | |
import json | |
import os | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
from langchain.llms import OpenAI | |
from langchain.vectorstores import DeepLake | |
from names import DATASET_ID, MODEL_ID | |
def create_db(dataset_path: str, json_filepath: str) -> DeepLake: | |
with open(json_filepath, "r") as f: | |
data = json.load(f) | |
texts = [] | |
metadatas = [] | |
for movie, lyrics in data.items(): | |
for lyric in lyrics: | |
texts.append(lyric["text"]) | |
metadatas.append( | |
{ | |
"movie": movie, | |
"name": lyric["name"], | |
"embed_url": lyric["embed_url"], | |
} | |
) | |
embeddings = OpenAIEmbeddings(model=MODEL_ID) | |
db = DeepLake.from_texts( | |
texts, embeddings, metadatas=metadatas, dataset_path=dataset_path | |
) | |
return db | |
def load_db(dataset_path: str, *args, **kwargs) -> DeepLake: | |
db = DeepLake(dataset_path, *args, **kwargs) | |
return db | |
if __name__ == "__main__": | |
dataset_path = f"hub://{os.environ['ACTIVELOOP_ORG_ID']}/{DATASET_ID}" | |
create_db(dataset_path, "data/emotions_with_spotify_url.json") | |