Spaces:

inflaton
/

rap

Sleeping

App Files Files Community

dh-mc commited on Oct 21, 2024

Commit

5b4cc8a

1 Parent(s): 251efe4

BERT-F1

Browse files

Files changed (3) hide show

.env.example +2 -81
app.py +29 -7
eval_modules/utils.py +13 -2

.env.example CHANGED Viewed

@@ -1,81 +1,2 @@
-LLM_MODEL_TYPE=huggingface
-# LLM_MODEL_TYPE=openai
-# LLM_MODEL_TYPE=hftgi
-# LLM_MODEL_TYPE=ollama
-# LLM_MODEL_TYPE=google
-# LLM_MODEL_TYPE=vllm
-HUGGINGFACE_AUTH_TOKEN=
-HFTGI_SERVER_URL=
-OPENAI_API_KEY=
-GOOGLE_API_KEY=
-# if unset, default to "gpt-3.5-turbo"
-OPENAI_MODEL_NAME=
-# GEMINI_MODEL_NAME=gemini-1.5-pro-latest
-# OLLAMA_MODEL_NAME=orca2:7b
-# OLLAMA_MODEL_NAME=mistral:7b
-# OLLAMA_MODEL_NAME=gemma:7b
-# OLLAMA_MODEL_NAME=llama2:7b
-OLLAMA_MODEL_NAME=llama3:8b
-OLLAMA_RP=1.15
-HF_RP=1.15
-LANGCHAIN_DEBUG=false
-BATCH_SIZE=1
-APPLY_CHAT_TEMPLATE_FOR_RAG=true
-# cpu, mps or cuda:0 - if unset, use whatever detected
-HF_EMBEDDINGS_DEVICE_TYPE=
-HF_PIPELINE_DEVICE_TYPE=
-# uncomment one of the below to load corresponding quantized model
-# LOAD_QUANTIZED_MODEL=4bit
-# LOAD_QUANTIZED_MODEL=8bit
-QA_WITH_RAG=true
-# QA_WITH_RAG=false
-RETRIEVER_TYPE=questions_file
-# RETRIEVER_TYPE=vectorstore
-QUESTIONS_FILE_PATH="./data/datasets/ms_macro.json"
-DISABLE_MODEL_PRELOADING=true
-CHAT_HISTORY_ENABLED=false
-SHOW_PARAM_SETTINGS=false
-SHARE_GRADIO_APP=false
-# if unset, default to "hkunlp/instructor-xl"
-HF_EMBEDDINGS_MODEL_NAME="hkunlp/instructor-large"
-# number of cpu cores - used to set n_threads for GPT4ALL & LlamaCpp models
-NUMBER_OF_CPU_CORES=
-USING_TORCH_BFLOAT16=true
-# HUGGINGFACE_MODEL_NAME_OR_PATH="databricks/dolly-v2-3b"
-# HUGGINGFACE_MODEL_NAME_OR_PATH="databricks/dolly-v2-7b"
-# HUGGINGFACE_MODEL_NAME_OR_PATH="databricks/dolly-v2-12b"
-# HUGGINGFACE_MODEL_NAME_OR_PATH="TheBloke/wizardLM-7B-HF"
-# HUGGINGFACE_MODEL_NAME_OR_PATH="TheBloke/vicuna-7B-1.1-HF"
-# HUGGINGFACE_MODEL_NAME_OR_PATH="nomic-ai/gpt4all-j"
-# HUGGINGFACE_MODEL_NAME_OR_PATH="nomic-ai/gpt4all-falcon"
-# HUGGINGFACE_MODEL_NAME_OR_PATH="lmsys/fastchat-t5-3b-v1.0"
-# HUGGINGFACE_MODEL_NAME_OR_PATH="meta-llama/Llama-2-7b-chat-hf"
-# HUGGINGFACE_MODEL_NAME_OR_PATH="meta-llama/Llama-2-13b-chat-hf"
-# HUGGINGFACE_MODEL_NAME_OR_PATH="meta-llama/Llama-2-70b-chat-hf"
-# HUGGINGFACE_MODEL_NAME_OR_PATH="meta-llama/Meta-Llama-3-8B-Instruct"
-# HUGGINGFACE_MODEL_NAME_OR_PATH="meta-llama/Meta-Llama-3-70B-Instruct"
-# HUGGINGFACE_MODEL_NAME_OR_PATH="microsoft/Orca-2-7b"
-# HUGGINGFACE_MODEL_NAME_OR_PATH="microsoft/Orca-2-13b"
-HUGGINGFACE_MODEL_NAME_OR_PATH="google/gemma-1.1-2b-it"
-# HUGGINGFACE_MODEL_NAME_OR_PATH="google/gemma-1.1-7b-it"
-# HUGGINGFACE_MODEL_NAME_OR_PATH="microsoft/Phi-3-mini-128k-instruct"
-# HUGGINGFACE_MODEL_NAME_OR_PATH="mistralai/Mistral-7B-Instruct-v0.2"


1	+ HF_TOKEN=
2	+ MODEL_NAME=microsoft/Phi-3.5-mini-instruct

app.py CHANGED Viewed

@@ -1,10 +1,30 @@
 import json
 import os
 import gradio as gr
-from huggingface_hub import InferenceClient
-from eval_modules.utils import calc_bleu_rouge_scores
 from eval_modules.calc_repetitions_v2e import detect_repetitions
 questions_file_path = os.getenv("QUESTIONS_FILE_PATH") or "./ms_macro.json"
 questions = json.loads(open(questions_file_path).read())
@@ -18,7 +38,8 @@ For more information on `huggingface_hub` Inference API support, please check th
 """
 # client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 # client = InferenceClient("HuggingFaceH4/zephyr-7b-gemma-v0.1")
-client = InferenceClient("microsoft/Phi-3.5-mini-instruct")
 def chat(
@@ -74,11 +95,11 @@ def chat(
     answer = partial_text
     (whitespace_score, repetition_score, total_repetitions) = detect_repetitions(answer)
     partial_text += "\n\nRepetition Metrics:\n"
-    partial_text += f"1. Whitespace Score: {whitespace_score:.3f}\n"
-    partial_text += f"1. Repetition Score: {repetition_score:.3f}\n"
     partial_text += f"1. Total Repetitions: {total_repetitions:.3f}\n"
     partial_text += (
-        f"1. Non-Repetitive Ratio: {1 - total_repetitions / len(answer):.3f}\n"
     )
     if index >= 0:  # RAG
@@ -87,11 +108,12 @@ def chat(
             if "wellFormedAnswers" in questions[index]
             else "answers"
         )
-        scores = calc_bleu_rouge_scores([answer], [questions[index][key]], debug=True)
         partial_text += "\n\n Performance Metrics:\n"
         partial_text += f'1. BLEU-1: {scores["bleu_scores"]["bleu"]:.3f}\n'
         partial_text += f'1. RougeL: {scores["rouge_scores"]["rougeL"]:.3f}\n'
         partial_text += f"\n\nGround truth: {questions[index][key][0]}\n"

 import json
 import os
+import sys
+import evaluate
 import gradio as gr
+from dotenv import find_dotenv, load_dotenv
+from huggingface_hub import InferenceClient, login
+found_dotenv = find_dotenv(".env")
+if len(found_dotenv) == 0:
+    found_dotenv = find_dotenv(".env.example")
+print(f"loading env vars from: {found_dotenv}")
+load_dotenv(found_dotenv, override=False)
+path = os.path.dirname(found_dotenv)
+print(f"Adding {path} to sys.path")
+sys.path.append(path)
+from eval_modules.utils import calc_perf_scores
 from eval_modules.calc_repetitions_v2e import detect_repetitions
+model_name = os.getenv("MODEL_NAME") or "microsoft/Phi-3.5-mini-instruct"
+hf_token = os.getenv("HF_TOKEN")
+login(token=hf_token, add_to_git_credential=True)
 questions_file_path = os.getenv("QUESTIONS_FILE_PATH") or "./ms_macro.json"
 questions = json.loads(open(questions_file_path).read())
 """
 # client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 # client = InferenceClient("HuggingFaceH4/zephyr-7b-gemma-v0.1")
+# client = InferenceClient("microsoft/Phi-3.5-mini-instruct")
+client = InferenceClient(model_name, token=hf_token)
 def chat(
     answer = partial_text
     (whitespace_score, repetition_score, total_repetitions) = detect_repetitions(answer)
     partial_text += "\n\nRepetition Metrics:\n"
+    partial_text += f"1. EWC Repetition Score: {whitespace_score:.3f}\n"
+    partial_text += f"1. Text Repetition Score: {repetition_score:.3f}\n"
     partial_text += f"1. Total Repetitions: {total_repetitions:.3f}\n"
     partial_text += (
+        f"1. Repetition Ratio: {total_repetitions / len(answer):.3f}\n"
     )
     if index >= 0:  # RAG
             if "wellFormedAnswers" in questions[index]
             else "answers"
         )
+        scores = calc_perf_scores([answer], [questions[index][key]], debug=True)
         partial_text += "\n\n Performance Metrics:\n"
         partial_text += f'1. BLEU-1: {scores["bleu_scores"]["bleu"]:.3f}\n'
         partial_text += f'1. RougeL: {scores["rouge_scores"]["rougeL"]:.3f}\n'
+        partial_text += f'1. BERT-F1: {scores["bert_scores"]["f1"][0]:.3f}\n'
         partial_text += f"\n\nGround truth: {questions[index][key][0]}\n"

eval_modules/utils.py CHANGED Viewed

@@ -173,9 +173,10 @@ def ensure_model_is_downloaded(llm_model_type):
 bleu = evaluate.load("bleu")
 rouge = evaluate.load("rouge")
-def calc_bleu_rouge_scores(predictions, references, debug=False):
     if debug:
         print("predictions:", predictions)
         print("references:", references)
@@ -184,7 +185,17 @@ def calc_bleu_rouge_scores(predictions, references, debug=False):
         predictions=predictions, references=references, max_order=1
     )
     rouge_scores = rouge.compute(predictions=predictions, references=references)
-    result = {"bleu_scores": bleu_scores, "rouge_scores": rouge_scores}
     if debug:
         print("result:", result)

 bleu = evaluate.load("bleu")
 rouge = evaluate.load("rouge")
+bert_score = evaluate.load("bertscore")
+def calc_perf_scores(predictions, references, debug=False):
     if debug:
         print("predictions:", predictions)
         print("references:", references)
         predictions=predictions, references=references, max_order=1
     )
     rouge_scores = rouge.compute(predictions=predictions, references=references)
+    bert_scores = bert_score.compute(
+        predictions=predictions,
+        references=references,
+        lang="en",
+        model_type="microsoft/deberta-large-mnli",
+    )
+    result = {
+        "bleu_scores": bleu_scores,
+        "rouge_scores": rouge_scores,
+        "bert_scores": bert_scores,
+    }
     if debug:
         print("result:", result)