sqllama
/

sqllama-V0

Model card Files Files and versions Community

matt-tries-dl commited on Apr 28, 2023

Commit

b444d89

•

1 Parent(s): 357d6d7

update

Browse files

Files changed (3) hide show

alpaca-lora +1 -0
llama_test.ipynb +209 -29
requirements.txt +2 -1

alpaca-lora ADDED Viewed

	@@ -0,0 +1 @@


1	+ Subproject commit 8bb8579e403dc78e37fe81ffbb253c413007323f

llama_test.ipynb CHANGED Viewed

@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
@@ -11,7 +11,7 @@
        "True"
       ]
      },
-     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -32,7 +32,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
@@ -47,7 +47,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "ca1fb983d9884b91a3c0feed1e207d0e",
        "version_major": 2,
        "version_minor": 0
       },
@@ -83,7 +83,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
@@ -132,7 +132,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
@@ -168,7 +168,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
@@ -232,7 +232,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 56,
    "metadata": {},
    "outputs": [
     {
@@ -240,25 +240,30 @@
      "output_type": "stream",
      "text": [
       "\n",
-      "Respond to the following data request with a SQL query.\n",
-      "Q: Table 2-16763320-1 has columns Tournament (text),Surface (text),Week (text),Winner (text),Finalist (text),Semifinalists (text).  Which finalist has Semifinalists of andre agassi (1) lleyton hewitt (14)?\n",
-      "A: SELECT  Finalist FROM 2-16763320-1 WHERE Semifinalists = 'andre agassi (1) lleyton hewitt (14)'\n",
       "\n",
-      "Respond to the following data request with a SQL query.\n",
-      "Q: Table 1-27755784-10 has columns Game (real),Date (text),Team (text),Score (text),High points (text),High rebounds (text),High assists (text),Location Attendance (text),Record (text).  What is the highest game number?\n",
-      "A: SELECT MAX Game FROM 1-27755784-10\n",
       "\n",
-      "Respond to the following data request with a SQL query.\n",
-      "Q: Table 2-17231086-5 has columns Place (text),Player (text),Country (text),Score (text),To par (text).  What place is the United States in that has a score of 68-73-68=209?\n",
-      "A: SELECT  Place FROM 2-17231086-5 WHERE Country = 'united states' AND Score = '68-73-68=209'\n",
       "\n",
-      "Respond to the following data request with a SQL query.\n",
-      "Q: Table 2-1302729-1 has columns Season (real),Overall (text),Slalom (text),Giant Slalom (text),Super G (text),Downhill (text),Combined (text).  What is the combined of 2 overalls and 5 slaloms?\n",
-      "A: SELECT  Combined FROM 2-1302729-1 WHERE Overall = '2' AND Slalom = '5'\n",
       "\n",
-      "Respond to the following data request with a SQL query.\n",
-      "Q: Table 2-15295737-56 has columns Nation (text),Skip (text),Third (text),Second (text),Lead (text),Alternate (text).  Who is the alternate for the team for which Monika Wagner is the third?\n",
-      "A: SELECT  Alternate FROM 2-15295737-56 WHERE Third = 'monika wagner'\n"
      ]
     }
    ],
@@ -303,11 +308,11 @@
     "tbl_types = {}\n",
     "tbl_str = {}\n",
     "\n",
-    "prefix = 'Respond to the following data request with a SQL query.\\n'\n",
     "\n",
     "def tbl_def_to_string(id, header, types):\n",
     "    ht = [f'{header[i]} ({types[i]})' for i in range(len(header))]\n",
-    "    s = f'Q: Table {id} has columns ' + ','.join(ht) + '.  '\n",
     "    return s\n",
     "\n",
     "with open('data/train.tables.jsonl') as f:\n",
@@ -330,26 +335,201 @@
     "        id = js['table_id']\n",
     "        s = tbl_str[id]\n",
     "        qst = js['question']\n",
-    "        nl = prefix + s + qst\n",
     "        nl_q.append(nl)\n",
     "\n",
     "        sql = js['sql']\n",
     "        a = fix_repr(sql,tbl_cols[id],tbl_types[id],id)\n",
-    "        a = 'A: ' + a\n",
     "        sql_a.append(a)\n",
     "\n",
     "\n",
     "M = len(nl_q)\n",
     "\n",
     "\n",
     "for i in range(5):\n",
     "    j = random.randint(0,M-1)\n",
     "    print()\n",
-    "    print(nl_q[j])\n",
-    "    print(sql_a[j])    \n",
     "        \n",
     "   "
    ]
   }
  ],
  "metadata": {

  "cells": [
   {
    "cell_type": "code",
+   "execution_count": 1,
    "metadata": {},
    "outputs": [
     {
        "True"
       ]
      },
+     "execution_count": 1,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "3ab80e2a1c0744e0af747ba63429a2af",
        "version_major": 2,
        "version_minor": 0
       },
   },
   {
    "cell_type": "code",
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
   },
   {
    "cell_type": "code",
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
   },
   {
    "cell_type": "code",
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
   },
   {
    "cell_type": "code",
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
      "output_type": "stream",
      "text": [
       "\n",
+      "Below is a question that describes a data request, paired with an input that describes a SQL table.  Write a SQL query that retrieves the data.\n",
+      "### Question: What is the Displacement of the Iveco F1CE3481E Engine?\n",
+      "### Input: Table 2-1415821-6 has columns Model (text),Engine (text),Displacement (text),Valvetrain (text),Fuel system (text),Max. power at rpm (text),Max. torque at rpm (text).  \n",
+      "### Answer: SELECT  Displacement FROM 2-1415821-6 WHERE Engine = 'iveco f1ce3481e'\n",
       "\n",
+      "Below is a question that describes a data request, paired with an input that describes a SQL table.  Write a SQL query that retrieves the data.\n",
+      "### Question: What is the record of team utah?\n",
+      "### Input: Table 2-17355628-9 has columns Game (real),Date (text),Team (text),Score (text),High points (text),High rebounds (text),High assists (text),Location Attendance (text),Record (text).  \n",
+      "### Answer: SELECT  Record FROM 2-17355628-9 WHERE Team = 'utah'\n",
       "\n",
+      "Below is a question that describes a data request, paired with an input that describes a SQL table.  Write a SQL query that retrieves the data.\n",
+      "### Question: What is the home of the team with a 16-8 record?\n",
+      "### Input: Table 2-16188254-4 has columns Date (text),Visitor (text),Score (text),Home (text),Leading scorer (text),Attendance (text),Record (text).  \n",
+      "### Answer: SELECT  Home FROM 2-16188254-4 WHERE Record = '16-8'\n",
       "\n",
+      "Below is a question that describes a data request, paired with an input that describes a SQL table.  Write a SQL query that retrieves the data.\n",
+      "### Question: What week did the Galaxy play the Amsterdam Admirals?\n",
+      "### Input: Table 1-24814477-2 has columns Week (real),Date (text),Kickoff (text),Opponent (text),Final score (text),Team record (text),Game site (text),Attendance (real).  \n",
+      "### Answer: SELECT  Week FROM 1-24814477-2 WHERE Opponent = 'Amsterdam Admirals'\n",
       "\n",
+      "Below is a question that describes a data request, paired with an input that describes a SQL table.  Write a SQL query that retrieves the data.\n",
+      "### Question: How many caps did Mitchell Duke have overall?\n",
+      "### Input: Table 2-1257177-1 has columns Player (text),Country (text),Caps (real),Goals (text),Years Active (text).  \n",
+      "### Answer: SELECT COUNT Caps FROM 2-1257177-1 WHERE Player = 'mitchell duke'\n"
      ]
     }
    ],
     "tbl_types = {}\n",
     "tbl_str = {}\n",
     "\n",
+    "prefix = 'Below is a question that describes a data request, paired with an input that describes a SQL table.  Write a SQL query that retrieves the data.'\n",
     "\n",
     "def tbl_def_to_string(id, header, types):\n",
     "    ht = [f'{header[i]} ({types[i]})' for i in range(len(header))]\n",
+    "    s = f'\\n### Input: Table {id} has columns ' + ','.join(ht) + '.  '\n",
     "    return s\n",
     "\n",
     "with open('data/train.tables.jsonl') as f:\n",
     "        id = js['table_id']\n",
     "        s = tbl_str[id]\n",
     "        qst = js['question']\n",
+    "        nl = prefix + \"\\n### Question: \" + qst + s\n",
     "        nl_q.append(nl)\n",
     "\n",
     "        sql = js['sql']\n",
     "        a = fix_repr(sql,tbl_cols[id],tbl_types[id],id)\n",
+    "        a = '\\n### Answer: ' + a\n",
     "        sql_a.append(a)\n",
     "\n",
     "\n",
     "M = len(nl_q)\n",
     "\n",
+    "data_txt = [nl_q[i] + sql_a[i] for i in range(len(nl_q))]\n",
     "\n",
     "for i in range(5):\n",
     "    j = random.randint(0,M-1)\n",
     "    print()\n",
+    "    print(data_txt[j]) \n",
     "        \n",
     "   "
    ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Set up the details for the model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "4f44918087484dd58b958a64cabdecb6",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Map:   0%|          | 0/56355 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "from peft import LoraConfig, get_peft_model\n",
+    "import transformers\n",
+    "import datasets\n",
+    "\n",
+    "LORA_R = 4\n",
+    "LORA_ALPHA = 16\n",
+    "LORA_DROPOUT = .1\n",
+    "CUTOFF_LEN = 256\n",
+    "BATCH = 128\n",
+    "MICRO_BATCH = 4\n",
+    "N_GAS = BATCH//MICRO_BATCH\n",
+    "EPOCHS = 1\n",
+    "LR = 1e-5\n",
+    "\n",
+    "lora_cfg = LoraConfig(\n",
+    "    r = LORA_R,\n",
+    "    lora_alpha=LORA_ALPHA,\n",
+    "    lora_dropout=LORA_DROPOUT,\n",
+    "    task_type='CASUAL_LM',\n",
+    "    target_modules=['q_proj','v_proj']\n",
+    ")\n",
+    "\n",
+    "modad = get_peft_model(model,lora_cfg)\n",
+    "\n",
+    "tokenizer.pad_token_id = 0\n",
+    "\n",
+    "d = {'prompt': data_txt}\n",
+    "\n",
+    "data = datasets.Dataset.from_dict(d)\n",
+    "data = data.map(lambda x:\n",
+    "        tokenizer(\n",
+    "        x['prompt'],\n",
+    "        truncation=True,\n",
+    "        max_length=CUTOFF_LEN,\n",
+    "        padding=\"max_length\"\n",
+    "        ))\n",
+    "\n",
+    "#data.remove_columns('prompt')\n",
+    "\n",
+    "targs = transformers.TrainingArguments(\n",
+    "    per_device_train_batch_size=MICRO_BATCH,\n",
+    "    gradient_accumulation_steps=N_GAS,\n",
+    "    warmup_steps=0,\n",
+    "    num_train_epochs=EPOCHS,\n",
+    "    learning_rate=LR,\n",
+    "    fp16=True,\n",
+    "    logging_steps=1,\n",
+    "    output_dir='sqllama-out',\n",
+    "    save_total_limit=3,\n",
+    "    remove_unused_columns=False\n",
+    ")\n",
+    "\n",
+    "\n",
+    "modad.config.use_cache = False"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "ignore - just trying to figure out huggingface datasets"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Dataset({\n",
+      "    features: ['prompt', 'input_ids', 'attention_mask'],\n",
+      "    num_rows: 56355\n",
+      "})\n",
+      "{'prompt': \"Below is a question that describes a data request, paired with an input that describes a SQL table.  Write a SQL query that retrieves the data.\\n### Question: Tell me what the notes are for South Australia \\n### Input: Table 1-1000181-1 has columns State/territory (text),Text/background colour (text),Format (text),Current slogan (text),Current series (text),Notes (text).  \\n### Answer: SELECT  Notes FROM 1-1000181-1 WHERE Current slogan = 'SOUTH AUSTRALIA'\", 'input_ids': [0, 13866, 338, 263, 1139, 393, 16612, 263, 848, 2009, 29892, 3300, 2859, 411, 385, 1881, 393, 16612, 263, 3758, 1591, 29889, 29871, 14350, 263, 3758, 2346, 393, 5663, 17180, 278, 848, 29889, 13, 2277, 29937, 894, 29901, 24948, 592, 825, 278, 11486, 526, 363, 4275, 8314, 29871, 13, 2277, 29937, 10567, 29901, 6137, 29871, 29896, 29899, 29896, 29900, 29900, 29900, 29896, 29947, 29896, 29899, 29896, 756, 4341, 4306, 29914, 357, 768, 706, 313, 726, 511, 1626, 29914, 7042, 12384, 313, 726, 511, 5809, 313, 726, 511, 7583, 269, 1188, 273, 313, 726, 511, 7583, 3652, 313, 726, 511, 3664, 267, 313, 726, 467, 259, 13, 2277, 29937, 673, 29901, 5097, 29871, 8695, 3895, 29871, 29896, 29899, 29896, 29900, 29900, 29900, 29896, 29947, 29896, 29899, 29896, 5754, 9626, 269, 1188, 273, 353, 525, 6156, 2692, 29950, 319, 29965, 10810, 1964, 10764, 29915, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(data)\n",
+    "print(data[0])\n",
+    "\n",
+    "#from datasets import load_dataset\n",
+    "\n",
+    "\n",
+    "#!git clone https://github.com/tloen/alpaca-lora.git\n",
+    "#dalp = load_dataset(\"json\", data_files=\"alpaca-lora/alpaca_data.json\")\n",
+    "#print(dalp)\n",
+    "\n",
+    "#dalp = dalp.map(lambda x : {'blah':'blah'})\n",
+    "#print(dalp)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/matt/hf/sqllama-V0/.venv/lib/python3.7/site-packages/transformers/optimization.py:395: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
+      "  FutureWarning,\n"
+     ]
+    },
+    {
+     "ename": "ValueError",
+     "evalue": "Unable to create tensor, you should probably activate truncation and/or padding with 'padding=True' 'truncation=True' to have batched tensors with the same length. Perhaps your features (`prompt` in this case) have excessive nesting (inputs type `list` where type `int` is expected).",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "\u001b[0;32m~/hf/sqllama-V0/.venv/lib/python3.7/site-packages/transformers/tokenization_utils_base.py\u001b[0m in \u001b[0;36mconvert_to_tensors\u001b[0;34m(self, tensor_type, prepend_batch_axis)\u001b[0m\n\u001b[1;32m    716\u001b[0m                 \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mis_tensor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 717\u001b[0;31m                     \u001b[0mtensor\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mas_tensor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    718\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mValueError\u001b[0m: too many dimensions 'str'",
+      "\nThe above exception was the direct cause of the following exception:\n",
+      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "\u001b[0;32m/var/tmp/ipykernel_2309/3549391384.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      5\u001b[0m     \u001b[0mdata_collator\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtransformers\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDataCollatorForLanguageModeling\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtokenizer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmlm\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      6\u001b[0m )\n\u001b[0;32m----> 7\u001b[0;31m \u001b[0mtrainer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresume_from_checkpoint\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      8\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msave_pretrained\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'sqllama-out'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/hf/sqllama-V0/.venv/lib/python3.7/site-packages/transformers/trainer.py\u001b[0m in \u001b[0;36mtrain\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m   1664\u001b[0m             \u001b[0mresume_from_checkpoint\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mresume_from_checkpoint\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1665\u001b[0m             \u001b[0mtrial\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtrial\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1666\u001b[0;31m             \u001b[0mignore_keys_for_eval\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mignore_keys_for_eval\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1667\u001b[0m         )\n\u001b[1;32m   1668\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/hf/sqllama-V0/.venv/lib/python3.7/site-packages/transformers/trainer.py\u001b[0m in \u001b[0;36m_inner_training_loop\u001b[0;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m   1897\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1898\u001b[0m             \u001b[0mstep\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1899\u001b[0;31m             \u001b[0;32mfor\u001b[0m \u001b[0mstep\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mepoch_iterator\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1900\u001b[0m                 \u001b[0mtotal_batched_samples\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1901\u001b[0m                 \u001b[0;32mif\u001b[0m \u001b[0mrng_to_sync\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/hf/sqllama-V0/.venv/lib/python3.7/site-packages/torch/utils/data/dataloader.py\u001b[0m in \u001b[0;36m__next__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    626\u001b[0m                 \u001b[0;31m# TODO(https://github.com/pytorch/pytorch/issues/76750)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    627\u001b[0m                 \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_reset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m  \u001b[0;31m# type: ignore[call-arg]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 628\u001b[0;31m             \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_next_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    629\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_num_yielded\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    630\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_dataset_kind\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0m_DatasetKind\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mIterable\u001b[0m \u001b[0;32mand\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/hf/sqllama-V0/.venv/lib/python3.7/site-packages/torch/utils/data/dataloader.py\u001b[0m in \u001b[0;36m_next_data\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    669\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m_next_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    670\u001b[0m         \u001b[0mindex\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_next_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m  \u001b[0;31m# may raise StopIteration\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 671\u001b[0;31m         \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_dataset_fetcher\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfetch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m)\u001b[0m  \u001b[0;31m# may raise StopIteration\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    672\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_pin_memory\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    673\u001b[0m             \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_utils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpin_memory\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpin_memory\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_pin_memory_device\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/hf/sqllama-V0/.venv/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py\u001b[0m in \u001b[0;36mfetch\u001b[0;34m(self, possibly_batched_index)\u001b[0m\n\u001b[1;32m     59\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     60\u001b[0m             \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdataset\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mpossibly_batched_index\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 61\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcollate_fn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[0;32m~/hf/sqllama-V0/.venv/lib/python3.7/site-packages/transformers/data/data_collator.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, features, return_tensors)\u001b[0m\n\u001b[1;32m     43\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtf_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfeatures\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     44\u001b[0m         \u001b[0;32melif\u001b[0m \u001b[0mreturn_tensors\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"pt\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 45\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtorch_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfeatures\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     46\u001b[0m         \u001b[0;32melif\u001b[0m \u001b[0mreturn_tensors\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"np\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     47\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnumpy_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfeatures\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/hf/sqllama-V0/.venv/lib/python3.7/site-packages/transformers/data/data_collator.py\u001b[0m in \u001b[0;36mtorch_call\u001b[0;34m(self, examples)\u001b[0m\n\u001b[1;32m    727\u001b[0m         \u001b[0;31m# Handle dict or lists with proper padding and conversion to tensor.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    728\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexamples\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mMapping\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 729\u001b[0;31m             \u001b[0mbatch\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtokenizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpad\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexamples\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreturn_tensors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"pt\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpad_to_multiple_of\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpad_to_multiple_of\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    730\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    731\u001b[0m             batch = {\n",
+      "\u001b[0;32m~/hf/sqllama-V0/.venv/lib/python3.7/site-packages/transformers/tokenization_utils_base.py\u001b[0m in \u001b[0;36mpad\u001b[0;34m(self, encoded_inputs, padding, max_length, pad_to_multiple_of, return_attention_mask, return_tensors, verbose)\u001b[0m\n\u001b[1;32m   3033\u001b[0m                 \u001b[0mbatch_outputs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3034\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3035\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mBatchEncoding\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbatch_outputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtensor_type\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mreturn_tensors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   3036\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3037\u001b[0m     def create_token_type_ids_from_sequences(\n",
+      "\u001b[0;32m~/hf/sqllama-V0/.venv/lib/python3.7/site-packages/transformers/tokenization_utils_base.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, data, encoding, tensor_type, prepend_batch_axis, n_sequences)\u001b[0m\n\u001b[1;32m    208\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_n_sequences\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mn_sequences\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    209\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 210\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconvert_to_tensors\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtensor_type\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtensor_type\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mprepend_batch_axis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mprepend_batch_axis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    211\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    212\u001b[0m     \u001b[0;34m@\u001b[0m\u001b[0mproperty\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/hf/sqllama-V0/.venv/lib/python3.7/site-packages/transformers/tokenization_utils_base.py\u001b[0m in \u001b[0;36mconvert_to_tensors\u001b[0;34m(self, tensor_type, prepend_batch_axis)\u001b[0m\n\u001b[1;32m    736\u001b[0m                     \u001b[0;34mf\" features (`{key}` in this case) have excessive nesting (inputs type `list` where type `int` is\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    737\u001b[0m                     \u001b[0;34m\" expected).\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 738\u001b[0;31m                 ) from e\n\u001b[0m\u001b[1;32m    739\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    740\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mValueError\u001b[0m: Unable to create tensor, you should probably activate truncation and/or padding with 'padding=True' 'truncation=True' to have batched tensors with the same length. Perhaps your features (`prompt` in this case) have excessive nesting (inputs type `list` where type `int` is expected)."
+     ]
+    }
+   ],
+   "source": [
+    "trainer = transformers.Trainer(\n",
+    "    model = modad,\n",
+    "    train_dataset = data,\n",
+    "    args = targs,\n",
+    "    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)\n",
+    ")\n",
+    "trainer.train(resume_from_checkpoint=False)\n",
+    "model.save_pretrained('sqllama-out')"
+   ]
   }
  ],
  "metadata": {

requirements.txt CHANGED Viewed

@@ -5,8 +5,9 @@ torch
 sentencepiece
 transformers
 accelerate
-bitsandbytes
 peft
 tqdm
 records
 babel

 sentencepiece
 transformers
 accelerate
+bitsandbytes==0.37.2
 peft
+datasets
 tqdm
 records
 babel