CareerAdvisePro / evaluator /evaluator.py
jeongsk's picture
.
e7ac61a
# %%
from dotenv import load_dotenv
load_dotenv()
"""
# %%
import pandas as pd
df = pd.read_parquet(
"../raw_data/dale_carnegie/how_to_win_friends_and_influence_people.parquet"
)
df.head()
# %%
from langchain.schema import Document
documents = []
for index, row in df.iterrows():
doc = Document(page_content=row["text"])
documents.append(doc)
documents
# %%
from autorag.utils import cast_corpus_dataset
from autorag.data.corpus import langchain_documents_to_parquet
corpus_df = langchain_documents_to_parquet(documents)
corpus_df = cast_corpus_dataset(corpus_df)
corpus_df.to_parquet("./data/corpus.parquet")
# %%
import nest_asyncio
nest_asyncio.apply()
import os
from llama_index.llms.openai import OpenAI
from autorag.data.qacreation import generate_qa_llama_index, make_single_content_qa
llm = OpenAI(
api_base=os.getenv("OPENAI_BASE_URL"),
model="gpt-4o",
)
qa_df = make_single_content_qa(
corpus_df,
content_size=49,
qa_creation_func=generate_qa_llama_index,
llm=llm,
question_num_per_content=1,
)
qa_df.to_parquet("./data/qa.parquet")
"""
# %%
import nest_asyncio
nest_asyncio.apply()
import autorag as ag
from autorag.evaluator import Evaluator
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
ag.embedding_models["huggingface_baai_llm_embedder"] = HuggingFaceEmbedding(
"BAAI/llm-embedder"
)
ag.embedding_models["huggingface_baai_bge_large_en"] = HuggingFaceEmbedding(
"BAAI/bge-large-en-v1.5"
)
ag.embedding_models["huggingface_baai_bge_base_en"] = HuggingFaceEmbedding(
"BAAI/bge-base-en-v1.5"
)
ag.embedding_models["huggingface_baai_bge_small_en"] = HuggingFaceEmbedding(
"BAAI/bge-small-en-v1.5"
)
ag.embedding_models["huggingface_baai_bge_m3"] = HuggingFaceEmbedding("BAAI/bge-m3")
evaluator = Evaluator(
qa_data_path="./data/qa.parquet",
corpus_data_path="./data/corpus.parquet",
project_dir="./benchmark",
)
evaluator.start_trial("./config/config_small.yaml")
# %%