Svngoku commited on
Commit
f7ce86a
1 Parent(s): ebe4439

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +40 -21
README.md CHANGED
@@ -33,10 +33,7 @@ datasets:
33
  max_seq_length = 4096
34
  dtype = None
35
  load_in_4bit = True # Use 4bit quantization to reduce memory usage.
36
- ```
37
-
38
 
39
- ```py
40
  alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
41
 
42
  ### Instruction:
@@ -49,31 +46,53 @@ alpaca_prompt = """Below is an instruction that describes a task, paired with an
49
  {}"""
50
  ```
51
 
 
52
  ```py
53
- if True:
54
- from unsloth import FastLanguageModel
55
- model, tokenizer = FastLanguageModel.from_pretrained(
56
  model_name = "Svngoku/Llama-3.1-8B-AlpaCare-MedInstruct",
57
  max_seq_length = max_seq_length,
58
  dtype = dtype,
59
  load_in_4bit = load_in_4bit,
60
- )
61
- FastLanguageModel.for_inference(model) # Enable native 2x faster inference
62
-
63
- # alpaca_prompt = You MUST copy from above!
64
 
65
- inputs = tokenizer(
66
- [
67
- alpaca_prompt.format(
68
- "Write an argument emphasizing the importance of ethical considerations in medical research.", # instruction
69
- "", # input
70
- "", # output - leave this blank for generation!
71
- )
72
- ], return_tensors = "pt").to("cuda")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
- from transformers import TextStreamer
75
- text_streamer = TextStreamer(tokenizer)
76
- _ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 800)
 
 
77
  ```
78
 
79
  This llama model was trained 2x faster with [Unsloth](https://github.com/unslothai/unsloth) and Huggingface's TRL library.
 
33
  max_seq_length = 4096
34
  dtype = None
35
  load_in_4bit = True # Use 4bit quantization to reduce memory usage.
 
 
36
 
 
37
  alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
38
 
39
  ### Instruction:
 
46
  {}"""
47
  ```
48
 
49
+
50
  ```py
51
+ from unsloth import FastLanguageModel
52
+ model, tokenizer = FastLanguageModel.from_pretrained(
 
53
  model_name = "Svngoku/Llama-3.1-8B-AlpaCare-MedInstruct",
54
  max_seq_length = max_seq_length,
55
  dtype = dtype,
56
  load_in_4bit = load_in_4bit,
57
+ )
58
+ FastLanguageModel.for_inference(model)
59
+ ```
 
60
 
61
+ ```py
62
+ def generate_medical_answer(input: str = "", instruction: str = ""):
63
+ inputs = tokenizer(
64
+ [
65
+ alpaca_prompt.format(
66
+ instruction,
67
+ input,
68
+ "",
69
+ )
70
+ ], return_tensors = "pt").to("cuda")
71
+ text_streamer = TextStreamer(tokenizer)
72
+ # _ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 800)
73
+ # Generate the response
74
+ output = model.generate(**inputs, max_new_tokens=1024)
75
+
76
+ # Decode the generated response
77
+ generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
78
+
79
+ # Extract the response part if needed (assuming the response starts after "### Response:")
80
+ response_start = generated_text.find("### Response:") + len("### Response:")
81
+ response = generated_text[response_start:].strip()
82
+
83
+ # Format the response in Markdown
84
+ # markdown_response = f"{response}"
85
+
86
+ # Render the markdown response
87
+ # display(Markdown(markdown_response))
88
+ return response
89
+ ```
90
 
91
+ ```py
92
+ generate_medical_answer(
93
+ instruction = "What are the pharmacodynamics of Omeprazole?",
94
+ input="Writte the text in plain markdown."
95
+ )
96
  ```
97
 
98
  This llama model was trained 2x faster with [Unsloth](https://github.com/unslothai/unsloth) and Huggingface's TRL library.