Spaces:
Running
Running
nicholasKluge
commited on
Commit
•
33cc93c
1
Parent(s):
8651bfd
Update app.py
Browse files
app.py
CHANGED
@@ -7,68 +7,50 @@ from sklearn.metrics.pairwise import cosine_similarity
|
|
7 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
8 |
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequenceClassification
|
9 |
|
10 |
-
|
11 |
-
# download the instruct-aira-dataset
|
12 |
dataset = load_dataset("nicholasKluge/instruct-aira-dataset", split='english')
|
13 |
|
14 |
-
# convert the dataset to a pandas dataframe
|
15 |
df = dataset.to_pandas()
|
16 |
|
17 |
-
# rename the columns
|
18 |
df.columns = ['Prompt', 'Completion']
|
19 |
|
20 |
-
# add a column to store the cosine similarity
|
21 |
df['Cosine Similarity'] = None
|
22 |
|
23 |
-
# Load the saved prompt TfidfVectorizer
|
24 |
prompt_tfidf_vectorizer = joblib.load('prompt_vectorizer.pkl')
|
25 |
-
|
26 |
-
# load the prompt tfidf_matrix
|
27 |
prompt_tfidf_matrix = joblib.load('prompt_tfidf_matrix.pkl')
|
28 |
|
29 |
-
# Load the saved completion TfidfVectorizer
|
30 |
completion_tfidf_vectorizer = joblib.load('completion_vectorizer.pkl')
|
31 |
-
|
32 |
-
# load the completion tfidf_matrix
|
33 |
completion_tfidf_matrix = joblib.load('completion_tfidf_matrix.pkl')
|
34 |
|
35 |
-
# specify the model's ids
|
36 |
model_id = "nicholasKluge/Aira-OPT-125M"
|
37 |
rewardmodel_id = "nicholasKluge/RewardModel"
|
38 |
toxicitymodel_id = "nicholasKluge/ToxicityModel"
|
39 |
|
40 |
-
# specify the device (cuda if available)
|
41 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
42 |
|
43 |
-
# load the models (chatbot, reward model, toxicity model)
|
44 |
model = AutoModelForCausalLM.from_pretrained(model_id)
|
45 |
rewardModel = AutoModelForSequenceClassification.from_pretrained(rewardmodel_id)
|
46 |
toxicityModel = AutoModelForSequenceClassification.from_pretrained(toxicitymodel_id)
|
47 |
|
48 |
-
# set the models to evaluation mode
|
49 |
model.eval()
|
50 |
rewardModel.eval()
|
51 |
toxicityModel.eval()
|
52 |
|
53 |
-
# set the models to the device
|
54 |
model.to(device)
|
55 |
rewardModel.to(device)
|
56 |
toxicityModel.to(device)
|
57 |
|
58 |
-
# load the tokenizers
|
59 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
60 |
rewardTokenizer = AutoTokenizer.from_pretrained(rewardmodel_id)
|
61 |
toxiciyTokenizer = AutoTokenizer.from_pretrained(toxicitymodel_id)
|
62 |
|
63 |
-
|
64 |
intro = """
|
65 |
-
## What is
|
66 |
|
67 |
-
[
|
68 |
|
69 |
## Limitations
|
70 |
|
71 |
-
We developed our
|
72 |
|
73 |
**Hallucinations:** This model can produce content that can be mistaken for truth but is, in fact, misleading or entirely false, i.e., hallucination.
|
74 |
|
@@ -78,9 +60,9 @@ We developed our open-domain conversational chatbots via instruction-tuning. Thi
|
|
78 |
|
79 |
## Intended Use
|
80 |
|
81 |
-
|
82 |
|
83 |
-
## How this demo
|
84 |
|
85 |
For this demo, we use the lighter model we have trained from the OPT series (`Aira-OPT-125M`). This demo employs a [`reward model`](https://huggingface.co/nicholasKluge/RewardModel) and a [`toxicity model`](https://huggingface.co/nicholasKluge/ToxicityModel) to evaluate the score of each candidate's response, considering its alignment with the user's message and its level of toxicity. The generation function arranges the candidate responses in order of their reward scores and eliminates any responses deemed toxic or harmful. Subsequently, the generation function returns the candidate response with the highest score that surpasses the safety threshold, or a default message if no safe candidates are identified.
|
86 |
"""
|
@@ -88,15 +70,15 @@ For this demo, we use the lighter model we have trained from the OPT series (`Ai
|
|
88 |
search_intro ="""
|
89 |
<h2><center>Explore Aira's Dataset 🔍</h2></center>
|
90 |
|
91 |
-
Here, users can look for instances in Aira's fine-tuning dataset where a given prompt or completion resembles an instruction in its dataset.
|
92 |
|
93 |
-
Users can use this to explore how the model interpolates on the fine-tuning data and if it
|
94 |
"""
|
95 |
|
96 |
disclaimer = """
|
97 |
**Disclaimer:** You should use this demo for research purposes only. Moderators do not censor the model output, and the authors do not endorse the opinions generated by this model.
|
98 |
|
99 |
-
If you would like to complain about any message produced by
|
100 |
"""
|
101 |
|
102 |
with gr.Blocks(theme='freddyaboulton/dracula_revamped') as demo:
|
@@ -114,7 +96,7 @@ with gr.Blocks(theme='freddyaboulton/dracula_revamped') as demo:
|
|
114 |
likeable=False,
|
115 |
layout='panel')
|
116 |
|
117 |
-
msg = gr.Textbox(label="Write a question or instruction
|
118 |
|
119 |
# Parameters to control the generation
|
120 |
with gr.Accordion(label="Parameters ⚙️", open=False):
|
|
|
7 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
8 |
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequenceClassification
|
9 |
|
|
|
|
|
10 |
dataset = load_dataset("nicholasKluge/instruct-aira-dataset", split='english')
|
11 |
|
|
|
12 |
df = dataset.to_pandas()
|
13 |
|
|
|
14 |
df.columns = ['Prompt', 'Completion']
|
15 |
|
|
|
16 |
df['Cosine Similarity'] = None
|
17 |
|
|
|
18 |
prompt_tfidf_vectorizer = joblib.load('prompt_vectorizer.pkl')
|
|
|
|
|
19 |
prompt_tfidf_matrix = joblib.load('prompt_tfidf_matrix.pkl')
|
20 |
|
|
|
21 |
completion_tfidf_vectorizer = joblib.load('completion_vectorizer.pkl')
|
|
|
|
|
22 |
completion_tfidf_matrix = joblib.load('completion_tfidf_matrix.pkl')
|
23 |
|
|
|
24 |
model_id = "nicholasKluge/Aira-OPT-125M"
|
25 |
rewardmodel_id = "nicholasKluge/RewardModel"
|
26 |
toxicitymodel_id = "nicholasKluge/ToxicityModel"
|
27 |
|
|
|
28 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
29 |
|
|
|
30 |
model = AutoModelForCausalLM.from_pretrained(model_id)
|
31 |
rewardModel = AutoModelForSequenceClassification.from_pretrained(rewardmodel_id)
|
32 |
toxicityModel = AutoModelForSequenceClassification.from_pretrained(toxicitymodel_id)
|
33 |
|
|
|
34 |
model.eval()
|
35 |
rewardModel.eval()
|
36 |
toxicityModel.eval()
|
37 |
|
|
|
38 |
model.to(device)
|
39 |
rewardModel.to(device)
|
40 |
toxicityModel.to(device)
|
41 |
|
|
|
42 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
43 |
rewardTokenizer = AutoTokenizer.from_pretrained(rewardmodel_id)
|
44 |
toxiciyTokenizer = AutoTokenizer.from_pretrained(toxicitymodel_id)
|
45 |
|
|
|
46 |
intro = """
|
47 |
+
## What is Aira?
|
48 |
|
49 |
+
[Aira](https://huggingface.co/nicholasKluge/Aira-OPT-125M) is a series of open-domain chatbots (Portuguese and English) achieved via supervised fine-tuning and DPO. Aira-2 is the second version of the Aira series. The Aira series was developed to help researchers explore the challenges related to the Alignment problem.
|
50 |
|
51 |
## Limitations
|
52 |
|
53 |
+
We developed our chatbots via supervised fine-tuning and DPO. This approach has a lot of limitations. Even though we can make a chatbot that can answer questions about anything, forcing the model to produce good-quality responses is hard. And by good, we mean **factual** and **nontoxic** text. This leads us to some problems:
|
54 |
|
55 |
**Hallucinations:** This model can produce content that can be mistaken for truth but is, in fact, misleading or entirely false, i.e., hallucination.
|
56 |
|
|
|
60 |
|
61 |
## Intended Use
|
62 |
|
63 |
+
Aira is intended only for academic research. For more information, read our [model card](https://huggingface.co/nicholasKluge/Aira-OPT-125M).
|
64 |
|
65 |
+
## How does this demo work?
|
66 |
|
67 |
For this demo, we use the lighter model we have trained from the OPT series (`Aira-OPT-125M`). This demo employs a [`reward model`](https://huggingface.co/nicholasKluge/RewardModel) and a [`toxicity model`](https://huggingface.co/nicholasKluge/ToxicityModel) to evaluate the score of each candidate's response, considering its alignment with the user's message and its level of toxicity. The generation function arranges the candidate responses in order of their reward scores and eliminates any responses deemed toxic or harmful. Subsequently, the generation function returns the candidate response with the highest score that surpasses the safety threshold, or a default message if no safe candidates are identified.
|
68 |
"""
|
|
|
70 |
search_intro ="""
|
71 |
<h2><center>Explore Aira's Dataset 🔍</h2></center>
|
72 |
|
73 |
+
Here, users can look for instances in Aira's fine-tuning dataset where a given prompt or completion resembles an instruction in its dataset. We use the Term Frequency-Inverse Document Frequency (TF-IDF) representation and cosine similarity to enable a fast search to explore the dataset. The pre-trained TF-IDF vectorizers and corresponding TF-IDF matrices are available in this repository. Below, we present the top five most similar instances in Aira's dataset for every search query.
|
74 |
|
75 |
+
Users can use this to explore how the model interpolates on the fine-tuning data and if it can follow instructions that are out of the fine-tuning distribution.
|
76 |
"""
|
77 |
|
78 |
disclaimer = """
|
79 |
**Disclaimer:** You should use this demo for research purposes only. Moderators do not censor the model output, and the authors do not endorse the opinions generated by this model.
|
80 |
|
81 |
+
If you would like to complain about any message produced by Aira, please contact [nicholas@airespucrs.org](mailto:nicholas@airespucrs.org).
|
82 |
"""
|
83 |
|
84 |
with gr.Blocks(theme='freddyaboulton/dracula_revamped') as demo:
|
|
|
96 |
likeable=False,
|
97 |
layout='panel')
|
98 |
|
99 |
+
msg = gr.Textbox(label="Write a question or instruction ...", placeholder="What is the capital of Brazil?")
|
100 |
|
101 |
# Parameters to control the generation
|
102 |
with gr.Accordion(label="Parameters ⚙️", open=False):
|