Update README.md
Browse files
README.md
CHANGED
@@ -14,12 +14,16 @@ language:
|
|
14 |
This is a [TRL language model](https://github.com/huggingface/trl) that has been fine-tuned with reinforcement learning to
|
15 |
guide the model outputs according to a simulated human feedback. The model was fine-tuned for classification of cancer / diabetes based on clinical notes.
|
16 |
|
|
|
|
|
|
|
17 |
|
18 |
## Usage
|
19 |
|
20 |
```python
|
21 |
from transformers import AutoTokenizer
|
22 |
from trl import AutoModelForCausalLMWithValueHead
|
|
|
23 |
|
24 |
tokenizer_kwargs = {
|
25 |
"padding": "max_length",
|
@@ -42,11 +46,22 @@ generation_kwargs = {
|
|
42 |
"repetition_penalty":1.2
|
43 |
}
|
44 |
|
45 |
-
model = AutoModelForCausalLMWithValueHead.from_pretrained("hanyinwang/layer-project-diagnostic-mistral")
|
46 |
|
47 |
-
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
-
|
51 |
-
|
|
|
|
|
|
|
|
|
|
|
52 |
```
|
|
|
14 |
This is a [TRL language model](https://github.com/huggingface/trl) that has been fine-tuned with reinforcement learning to
|
15 |
guide the model outputs according to a simulated human feedback. The model was fine-tuned for classification of cancer / diabetes based on clinical notes.
|
16 |
|
17 |
+
```bash
|
18 |
+
pip install torch transformers trl peft
|
19 |
+
```
|
20 |
|
21 |
## Usage
|
22 |
|
23 |
```python
|
24 |
from transformers import AutoTokenizer
|
25 |
from trl import AutoModelForCausalLMWithValueHead
|
26 |
+
from peft import LoraConfig
|
27 |
|
28 |
tokenizer_kwargs = {
|
29 |
"padding": "max_length",
|
|
|
46 |
"repetition_penalty":1.2
|
47 |
}
|
48 |
|
49 |
+
model = AutoModelForCausalLMWithValueHead.from_pretrained("hanyinwang/layer-project-diagnostic-mistral").cuda()
|
50 |
|
51 |
+
def format_prompt_mistral(text, condition):
|
52 |
+
prompt = """<s>[INST]You are a medical doctor specialized in %s diagnosis.
|
53 |
+
From the provided document, assert if the patient historically and currently has %s.
|
54 |
+
For each condition, only pick from "YES", "NO", or "MAYBE". And you must follow format without anything further. The results have to be directly parseable with python json.loads().
|
55 |
+
Sample output: {"%s": "MAYBE"}
|
56 |
+
Never output anything beyond the format.[/INST]
|
57 |
+
Provided document: %s"""%(condition, condition, condition, text)
|
58 |
+
return prompt
|
59 |
|
60 |
+
query_tensors = tokenizer.encode(format_prompt_mistral(<note>, <condition>), return_tensors="pt")
|
61 |
+
# <note>: clinical note
|
62 |
+
# <condition>: "cancer" or "diabetes"
|
63 |
+
prompt_length = query_tensors.shape[1]
|
64 |
+
|
65 |
+
outputs = model.generate(query_tensors.cuda(), **generation_kwargs)
|
66 |
+
response = tokenizer.decode(outputs[0][prompt_length:])
|
67 |
```
|