DongfuJiang commited on
Commit
742d7b5
1 Parent(s): 525d2e5
Files changed (2) hide show
  1. app.py +26 -12
  2. requirements.txt +3 -1
app.py CHANGED
@@ -3,16 +3,22 @@ import os
3
  import gradio as gr
4
  import sys
5
  import copy
 
6
  from datasets import load_dataset
7
  from typing import List
8
  from llama_cpp import Llama
9
  from huggingface_hub import hf_hub_download
10
  from string import Template
 
11
 
12
  DESCRIPTIONS = """
13
  We present ***TIGERScore***, a **T**rained metric that follows **I**nstruction **G**uidance to perform **E**xplainable, and **R**eference-free evaluation over a wide spectrum of text generation tasks. Different from other automatic evaluation methods that only provide arcane scores, TIGERScore is guided by the natural language instruction to provide error analysis to pinpoint the mistakes in the generated text.
14
 
15
- ### [**Website**](https://tiger-ai-lab.github.io/TIGERScore/) [**Paper**](https://arxiv.org/abs/2310.00752) [**Code**](https://github.com/TIGER-AI-Lab/TIGERScore) [**TIGERScore-7B**](https://huggingface.co/TIGER-Lab/TIGERScore-7B) [**TIGERScore-13B**](https://huggingface.co/TIGER-Lab/TIGERScore-13B)
 
 
 
 
16
 
17
  """
18
 
@@ -44,16 +50,25 @@ For each error you give in the response, please also elaborate the following inf
44
  Your evaluation output:
45
  """
46
 
47
- llm = Llama(
48
- model_path=hf_hub_download(
49
- repo_id=os.environ.get("REPO_ID", "TIGER-Lab/TIGERScore-7B-GGUF"),
50
- filename=os.environ.get("MODEL_FILE", "ggml-model-q4_0.gguf"),
51
- ),
52
- n_ctx=2048,
53
- # n_gpu_layers=50, # change n_gpu_layers if you have more or less VRAM
54
- )
 
 
 
 
 
 
 
 
55
 
56
- def generate_text(input_context, generation_instruction, hypo_output, max_new_tokens=1024, temperature=0.7, top_p=1.0):
 
57
  prompt_template = Template(TEMPLATE)
58
  prompt = prompt_template.substitute(
59
  generation_instruction=generation_instruction,
@@ -76,7 +91,6 @@ def generate_text(input_context, generation_instruction, hypo_output, max_new_to
76
  stream = copy.deepcopy(out)
77
  temp += stream["choices"][0]["text"]
78
  yield temp
79
-
80
 
81
  def get_examples(inst_textbox, input_textbox, hypo_output_textbox):
82
  return inst_textbox, input_textbox, hypo_output_textbox
@@ -128,7 +142,7 @@ with gr.Blocks(theme='gradio/soft') as demo:
128
 
129
 
130
  submit_button.click(
131
- fn=generate_text,
132
  inputs=[input_textbox, inst_textbox, hypo_output_textbox, max_new_tokens, temperature, top_p],
133
  outputs=evaluation_output_textbox,
134
  )
 
3
  import gradio as gr
4
  import sys
5
  import copy
6
+ import spaces
7
  from datasets import load_dataset
8
  from typing import List
9
  from llama_cpp import Llama
10
  from huggingface_hub import hf_hub_download
11
  from string import Template
12
+ from tigerscore import TIGERScorer
13
 
14
  DESCRIPTIONS = """
15
  We present ***TIGERScore***, a **T**rained metric that follows **I**nstruction **G**uidance to perform **E**xplainable, and **R**eference-free evaluation over a wide spectrum of text generation tasks. Different from other automatic evaluation methods that only provide arcane scores, TIGERScore is guided by the natural language instruction to provide error analysis to pinpoint the mistakes in the generated text.
16
 
17
+ [**Website**](https://tiger-ai-lab.github.io/TIGERScore/) |
18
+ [**Paper**](https://arxiv.org/abs/2310.00752) |
19
+ [**Code**](https://github.com/TIGER-AI-Lab/TIGERScore) |
20
+ [**TIGERScore-7B**](https://huggingface.co/TIGER-Lab/TIGERScore-7B) |
21
+ [**TIGERScore-13B**](https://huggingface.co/TIGER-Lab/TIGERScore-13B)
22
 
23
  """
24
 
 
50
  Your evaluation output:
51
  """
52
 
53
+ # llm = Llama(
54
+ # model_path=hf_hub_download(
55
+ # repo_id=os.environ.get("REPO_ID", "TIGER-Lab/TIGERScore-13B-GGUF"),
56
+ # filename=os.environ.get("MODEL_FILE", "ggml-model-q4_0.gguf"),
57
+ # ),
58
+ # n_ctx=2048,
59
+ # n_gpu_layers=50, # change n_gpu_layers if you have more or less VRAM
60
+ # )
61
+ scorer = TIGERScorer(model_name="TIGER-Lab/TIGERScore-13B")
62
+
63
+ def generate_text_hf(input_context, generation_instruction, hypo_output, max_new_tokens=1024, temperature=0.7, top_p=1.0):
64
+ global scorer
65
+ scorer.model = scorer.model.to("cuda")
66
+
67
+ for output in scorer.generate_stream(generation_instruction, hypo_output, input_context, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p):
68
+ yield output
69
 
70
+ def generate_text_llamacpp(input_context, generation_instruction, hypo_output, max_new_tokens=1024, temperature=0.7, top_p=1.0):
71
+ global llm
72
  prompt_template = Template(TEMPLATE)
73
  prompt = prompt_template.substitute(
74
  generation_instruction=generation_instruction,
 
91
  stream = copy.deepcopy(out)
92
  temp += stream["choices"][0]["text"]
93
  yield temp
 
94
 
95
  def get_examples(inst_textbox, input_textbox, hypo_output_textbox):
96
  return inst_textbox, input_textbox, hypo_output_textbox
 
142
 
143
 
144
  submit_button.click(
145
+ fn=generate_text_hf,
146
  inputs=[input_textbox, inst_textbox, hypo_output_textbox, max_new_tokens, temperature, top_p],
147
  outputs=evaluation_output_textbox,
148
  )
requirements.txt CHANGED
@@ -1,4 +1,6 @@
1
  datasets==2.14.5
2
  torch
3
  transformers
4
- llama-cpp-python
 
 
 
1
  datasets==2.14.5
2
  torch
3
  transformers
4
+ git+https://github.com/TIGER-AI-Lab/TIGERScore.git
5
+ gradio==4.24.0
6
+ spaces