Update README.md
Browse files
README.md
CHANGED
@@ -33,10 +33,7 @@ datasets:
|
|
33 |
max_seq_length = 4096
|
34 |
dtype = None
|
35 |
load_in_4bit = True # Use 4bit quantization to reduce memory usage.
|
36 |
-
```
|
37 |
-
|
38 |
|
39 |
-
```py
|
40 |
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
|
41 |
|
42 |
### Instruction:
|
@@ -49,31 +46,53 @@ alpaca_prompt = """Below is an instruction that describes a task, paired with an
|
|
49 |
{}"""
|
50 |
```
|
51 |
|
|
|
52 |
```py
|
53 |
-
|
54 |
-
|
55 |
-
model, tokenizer = FastLanguageModel.from_pretrained(
|
56 |
model_name = "Svngoku/Llama-3.1-8B-AlpaCare-MedInstruct",
|
57 |
max_seq_length = max_seq_length,
|
58 |
dtype = dtype,
|
59 |
load_in_4bit = load_in_4bit,
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
# alpaca_prompt = You MUST copy from above!
|
64 |
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
|
74 |
-
|
75 |
-
|
76 |
-
|
|
|
|
|
77 |
```
|
78 |
|
79 |
This llama model was trained 2x faster with [Unsloth](https://github.com/unslothai/unsloth) and Huggingface's TRL library.
|
|
|
33 |
max_seq_length = 4096
|
34 |
dtype = None
|
35 |
load_in_4bit = True # Use 4bit quantization to reduce memory usage.
|
|
|
|
|
36 |
|
|
|
37 |
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
|
38 |
|
39 |
### Instruction:
|
|
|
46 |
{}"""
|
47 |
```
|
48 |
|
49 |
+
|
50 |
```py
|
51 |
+
from unsloth import FastLanguageModel
|
52 |
+
model, tokenizer = FastLanguageModel.from_pretrained(
|
|
|
53 |
model_name = "Svngoku/Llama-3.1-8B-AlpaCare-MedInstruct",
|
54 |
max_seq_length = max_seq_length,
|
55 |
dtype = dtype,
|
56 |
load_in_4bit = load_in_4bit,
|
57 |
+
)
|
58 |
+
FastLanguageModel.for_inference(model)
|
59 |
+
```
|
|
|
60 |
|
61 |
+
```py
|
62 |
+
def generate_medical_answer(input: str = "", instruction: str = ""):
|
63 |
+
inputs = tokenizer(
|
64 |
+
[
|
65 |
+
alpaca_prompt.format(
|
66 |
+
instruction,
|
67 |
+
input,
|
68 |
+
"",
|
69 |
+
)
|
70 |
+
], return_tensors = "pt").to("cuda")
|
71 |
+
text_streamer = TextStreamer(tokenizer)
|
72 |
+
# _ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 800)
|
73 |
+
# Generate the response
|
74 |
+
output = model.generate(**inputs, max_new_tokens=1024)
|
75 |
+
|
76 |
+
# Decode the generated response
|
77 |
+
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
|
78 |
+
|
79 |
+
# Extract the response part if needed (assuming the response starts after "### Response:")
|
80 |
+
response_start = generated_text.find("### Response:") + len("### Response:")
|
81 |
+
response = generated_text[response_start:].strip()
|
82 |
+
|
83 |
+
# Format the response in Markdown
|
84 |
+
# markdown_response = f"{response}"
|
85 |
+
|
86 |
+
# Render the markdown response
|
87 |
+
# display(Markdown(markdown_response))
|
88 |
+
return response
|
89 |
+
```
|
90 |
|
91 |
+
```py
|
92 |
+
generate_medical_answer(
|
93 |
+
instruction = "What are the pharmacodynamics of Omeprazole?",
|
94 |
+
input="Writte the text in plain markdown."
|
95 |
+
)
|
96 |
```
|
97 |
|
98 |
This llama model was trained 2x faster with [Unsloth](https://github.com/unslothai/unsloth) and Huggingface's TRL library.
|