ekshat commited on
Commit
e9185ce
1 Parent(s): 560da6f

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +45 -2
README.md CHANGED
@@ -11,9 +11,9 @@ tags:
11
  - text2sql
12
  ---
13
  # Model Description
14
-
15
  Our Model is fine tuned on Llama-2 7B model on text-2-sql Dataset on alpaca format described by Meta. The dataset is provided by "b-mc2/sql-create-context" present on Huggingface . We have used QLora, Bits&Bytes, Accelerate and Transformers Library to implement PEFT concept. We have fine-tuned this model based on pre-trained llama-2 7B model provided by 'NousResearch/Llama-2-7b-chat-hf'.
16
 
 
17
  # Inference
18
  ```python
19
  !pip install transformers accelerate xformers bitsandbytes
@@ -37,4 +37,47 @@ prompt = f"""Below is an context that describes a sql query, paired with an ques
37
  pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)
38
  result = pipe(prompt)
39
  print(result[0]['generated_text'])
40
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  - text2sql
12
  ---
13
  # Model Description
 
14
  Our Model is fine tuned on Llama-2 7B model on text-2-sql Dataset on alpaca format described by Meta. The dataset is provided by "b-mc2/sql-create-context" present on Huggingface . We have used QLora, Bits&Bytes, Accelerate and Transformers Library to implement PEFT concept. We have fine-tuned this model based on pre-trained llama-2 7B model provided by 'NousResearch/Llama-2-7b-chat-hf'.
15
 
16
+
17
  # Inference
18
  ```python
19
  !pip install transformers accelerate xformers bitsandbytes
 
37
  pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)
38
  result = pipe(prompt)
39
  print(result[0]['generated_text'])
40
+ ```
41
+
42
+
43
+ # Model Information
44
+ model_name = "NousResearch/Llama-2-7b-chat-hf"
45
+ dataset_name = "b-mc2/sql-create-context"
46
+
47
+
48
+ # QLoRA parameters
49
+ lora_r = 64
50
+ lora_alpha = 16
51
+ lora_dropout = 0.1
52
+
53
+
54
+ # bitsandbytes parameters
55
+ use_4bit = True
56
+ bnb_4bit_compute_dtype = "float16"
57
+ bnb_4bit_quant_type = "nf4"
58
+ use_nested_quant = False
59
+
60
+
61
+ # TrainingArguments parameters
62
+ num_train_epochs = 1
63
+ fp16 = False
64
+ bf16 = False
65
+ per_device_train_batch_size = 8
66
+ per_device_eval_batch_size = 4
67
+ gradient_accumulation_steps = 1
68
+ gradient_checkpointing = True
69
+ max_grad_norm = 0.3
70
+ learning_rate = 2e-4
71
+ weight_decay = 0.001
72
+ optim = "paged_adamw_32bit"
73
+ lr_scheduler_type = "cosine"
74
+ max_steps = -1
75
+ warmup_ratio = 0.03
76
+ group_by_length = True
77
+ save_steps = 0
78
+ logging_steps = 25
79
+
80
+
81
+ # SFT parameters
82
+ max_seq_length = None
83
+ packing = False