matt-tries-dl commited on
Commit
8f8b1a3
1 Parent(s): dd87728

wikisql download

Browse files
Files changed (4) hide show
  1. .gitignore +3 -1
  2. README.md +1 -0
  3. llama_test.ipynb +62 -2
  4. requirements.txt +6 -1
.gitignore CHANGED
@@ -1 +1,3 @@
1
- .venv
 
 
 
1
+ .venv
2
+ WikiSQL
3
+ data
README.md CHANGED
@@ -15,6 +15,7 @@ I had to uninstall some cuda stuff that torch installed to make this work.
15
  https://github.com/tloen/alpaca-lora
16
  https://huggingface.co/docs/transformers/main/en/model_doc/llama#llama
17
  https://huggingface.co/docs/transformers/index
 
18
 
19
 
20
  https://arxiv.org/pdf/1910.13461.pdf
 
15
  https://github.com/tloen/alpaca-lora
16
  https://huggingface.co/docs/transformers/main/en/model_doc/llama#llama
17
  https://huggingface.co/docs/transformers/index
18
+ https://github.com/salesforce/WikiSQL
19
 
20
 
21
  https://arxiv.org/pdf/1910.13461.pdf
llama_test.ipynb CHANGED
@@ -22,6 +22,14 @@
22
  "torch.cuda.is_available()"
23
  ]
24
  },
 
 
 
 
 
 
 
 
25
  {
26
  "cell_type": "code",
27
  "execution_count": 2,
@@ -39,7 +47,7 @@
39
  {
40
  "data": {
41
  "application/vnd.jupyter.widget-view+json": {
42
- "model_id": "37df56d6ddb747f3a91bc9100b33f47b",
43
  "version_major": 2,
44
  "version_minor": 0
45
  },
@@ -65,11 +73,27 @@
65
  ")\n"
66
  ]
67
  },
 
 
 
 
 
 
 
 
68
  {
69
  "cell_type": "code",
70
- "execution_count": 9,
71
  "metadata": {},
72
  "outputs": [
 
 
 
 
 
 
 
 
73
  {
74
  "name": "stdout",
75
  "output_type": "stream",
@@ -97,6 +121,42 @@
97
  "r = ask('whats the capital of georgia?')\n",
98
  "print(r)\n"
99
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  }
101
  ],
102
  "metadata": {
 
22
  "torch.cuda.is_available()"
23
  ]
24
  },
25
+ {
26
+ "attachments": {},
27
+ "cell_type": "markdown",
28
+ "metadata": {},
29
+ "source": [
30
+ "Check that we can load the tokenizer and the model. The first time this runs it will take a while. The files go under ~/.cache/huggingface"
31
+ ]
32
+ },
33
  {
34
  "cell_type": "code",
35
  "execution_count": 2,
 
47
  {
48
  "data": {
49
  "application/vnd.jupyter.widget-view+json": {
50
+ "model_id": "fc0f4312aa7c4009a912c66dd1443763",
51
  "version_major": 2,
52
  "version_minor": 0
53
  },
 
73
  ")\n"
74
  ]
75
  },
76
+ {
77
+ "attachments": {},
78
+ "cell_type": "markdown",
79
+ "metadata": {},
80
+ "source": [
81
+ "Construct a basic example to interact with the model"
82
+ ]
83
+ },
84
  {
85
  "cell_type": "code",
86
+ "execution_count": 3,
87
  "metadata": {},
88
  "outputs": [
89
+ {
90
+ "name": "stderr",
91
+ "output_type": "stream",
92
+ "text": [
93
+ "/home/matt/hf/sqllama-V0/.venv/lib/python3.7/site-packages/transformers/generation/utils.py:1220: UserWarning: You have modified the pretrained model configuration to control generation. This is a deprecated strategy to control generation and will be removed soon, in a future version. Please use a generation configuration file (see https://huggingface.co/docs/transformers/main_classes/text_generation)\n",
94
+ " \"You have modified the pretrained model configuration to control generation. This is a\"\n"
95
+ ]
96
+ },
97
  {
98
  "name": "stdout",
99
  "output_type": "stream",
 
121
  "r = ask('whats the capital of georgia?')\n",
122
  "print(r)\n"
123
  ]
124
+ },
125
+ {
126
+ "attachments": {},
127
+ "cell_type": "markdown",
128
+ "metadata": {},
129
+ "source": [
130
+ "Get the WikiSQL project so we can get the dataset"
131
+ ]
132
+ },
133
+ {
134
+ "cell_type": "code",
135
+ "execution_count": 5,
136
+ "metadata": {},
137
+ "outputs": [
138
+ {
139
+ "name": "stdout",
140
+ "output_type": "stream",
141
+ "text": [
142
+ "fatal: destination path 'WikiSQL' already exists and is not an empty directory.\n",
143
+ "data/\n",
144
+ "data/train.jsonl\n",
145
+ "data/test.tables.jsonl\n",
146
+ "data/test.db\n",
147
+ "data/dev.tables.jsonl\n",
148
+ "data/dev.db\n",
149
+ "data/test.jsonl\n",
150
+ "data/train.tables.jsonl\n",
151
+ "data/train.db\n",
152
+ "data/dev.jsonl\n"
153
+ ]
154
+ }
155
+ ],
156
+ "source": [
157
+ "! git clone https://github.com/salesforce/WikiSQL\n",
158
+ "! tar xvjf WikiSQL/data.tar.bz2"
159
+ ]
160
  }
161
  ],
162
  "metadata": {
requirements.txt CHANGED
@@ -5,4 +5,9 @@ torch
5
  sentencepiece
6
  transformers
7
  accelerate
8
- bitsandbytes
 
 
 
 
 
 
5
  sentencepiece
6
  transformers
7
  accelerate
8
+ bitsandbytes
9
+ peft
10
+ tqdm
11
+ records
12
+ babel
13
+ tabulate