AmirMohseni commited on
Commit
b3e5c9d
1 Parent(s): db7541c

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +17 -19
README.md CHANGED
@@ -68,39 +68,37 @@ The model is not intended for tasks requiring deep reasoning, complex multi-turn
68
  Here is how you can use this model:
69
 
70
  ```python
71
- from peft import PeftModel
72
  from transformers import AutoModelForCausalLM, AutoTokenizer
73
  import torch
74
 
75
- # Define the base model and the adapter model
76
- base_model = "meta-llama/Meta-Llama-3.1-8B-Instruct"
77
- adapter_model = "AmirMohseni/Llama-3.1-8B-Instruct-Persian-finetuned-sft"
78
 
79
- # Load the base model and apply the adapter model using PEFT
80
- model = AutoModelForCausalLM.from_pretrained(base_model, device_map={"": 0})
81
- model = PeftModel.from_pretrained(model, adapter_model)
 
 
 
 
82
 
83
  # Check if CUDA is available, otherwise use CPU
84
  device = "cuda" if torch.cuda.is_available() else "cpu"
85
  model = model.to(device)
86
 
87
- # Load the tokenizer
88
- tokenizer = AutoTokenizer.from_pretrained(base_model)
89
-
90
- # Add a new pad token if necessary
91
- if tokenizer.pad_token is None:
92
- tokenizer.add_special_tokens({'pad_token': '[PAD]'}) # Adding a distinct pad token
93
-
94
  # Example usage
95
  input_text = "چطوری میتونم به اطلاعات درباره ی سهام شرکت های آمریکایی دست پیدا کنم؟"
96
 
97
- # Tokenize the input and get both input IDs and attention mask
98
- inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True)
99
- input_ids = inputs['input_ids'].to(device)
100
- attention_mask = inputs['attention_mask'].to(device)
101
 
102
  # Generate text
103
- outputs = model.generate(input_ids, attention_mask=attention_mask, max_length=512, pad_token_id=tokenizer.pad_token_id)
 
 
 
 
 
104
 
105
  # Decode and print the output
106
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
68
  Here is how you can use this model:
69
 
70
  ```python
 
71
  from transformers import AutoModelForCausalLM, AutoTokenizer
72
  import torch
73
 
74
+ # Specify the combined model
75
+ model_name = "AmirMohseni/Llama-3.1-8B-Instruct-Persian-finetuned-sft"
 
76
 
77
+ # Load the model and tokenizer
78
+ model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")
79
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
80
+
81
+ # Ensure pad_token is set (if not already set)
82
+ if tokenizer.pad_token is None:
83
+ tokenizer.add_special_tokens({'pad_token': tokenizer.eos_token})
84
 
85
  # Check if CUDA is available, otherwise use CPU
86
  device = "cuda" if torch.cuda.is_available() else "cpu"
87
  model = model.to(device)
88
 
 
 
 
 
 
 
 
89
  # Example usage
90
  input_text = "چطوری میتونم به اطلاعات درباره ی سهام شرکت های آمریکایی دست پیدا کنم؟"
91
 
92
+ # Tokenize the input
93
+ inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True).to(device)
 
 
94
 
95
  # Generate text
96
+ outputs = model.generate(
97
+ inputs['input_ids'],
98
+ attention_mask=inputs['attention_mask'],
99
+ max_length=512,
100
+ pad_token_id=tokenizer.pad_token_id
101
+ )
102
 
103
  # Decode and print the output
104
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)