{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyOpe6URbmt5CbVG1CVVUIr6"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"code","execution_count":1,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"e-NVlZMV8Oh7","executionInfo":{"status":"ok","timestamp":1723254070685,"user_tz":-345,"elapsed":31968,"user":{"displayName":"Basab Jha","userId":"01698150105745770629"}},"outputId":"f96aa546-1cf5-4890-ebdd-12197488c657"},"outputs":[{"output_type":"stream","name":"stdout","text":["Mounted at /content/drive\n"]}],"source":["from google.colab import drive\n","drive.mount('/content/drive')"]},{"cell_type":"code","source":["%cd /content/drive/MyDrive/GEM_Project/"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"3aTqbSUT8UZs","executionInfo":{"status":"ok","timestamp":1723254982318,"user_tz":-345,"elapsed":484,"user":{"displayName":"Basab Jha","userId":"01698150105745770629"}},"outputId":"0f7b31c2-329c-43f5-8c5d-06f7cf54663d"},"execution_count":16,"outputs":[{"output_type":"stream","name":"stdout","text":["/content/drive/MyDrive/GEM_Project\n"]}]},{"cell_type":"code","source":["%%writefile /GEM_Project/Testings/testing.py\n","import torch\n","from transformers import AutoTokenizer, AutoModelForCausalLM\n","\n","# Model and tokenizer paths\n","model_path = \"/GEM_Project/GEM_1o_Aug.pt\"\n","tokenizer_path = \"/GEM_Project/tokenizer/tokenizer\"\n","\n","# Load the tokenizer\n","tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)\n","\n","# Load the model\n","model = AutoModelForCausalLM.from_pretrained(model_path)\n","\n","# Set the model to evaluation mode\n","model.eval()\n","\n","# Set device to GPU if available\n","device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n","model.to(device)\n","\n","# Define a function to generate text based on a prompt\n","def generate_text(prompt, max_length=50, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7):\n"," # Tokenize the input prompt\n"," input_ids = tokenizer.encode(prompt, return_tensors='pt').to(device)\n","\n"," # Generate output from the model\n"," output = model.generate(\n"," input_ids,\n"," max_length=max_length,\n"," num_return_sequences=num_return_sequences,\n"," no_repeat_ngram_size=no_repeat_ngram_size,\n"," do_sample=True,\n"," top_k=top_k,\n"," top_p=top_p,\n"," temperature=temperature\n"," )\n","\n"," # Decode the generated output\n"," generated_text = tokenizer.decode(output[0], skip_special_tokens=True)\n"," return generated_text\n","\n","# Test prompts\n","prompts = [\n"," \"The future of artificial intelligence is\",\n"," \"Once upon a time in a land far away,\",\n"," \"In the field of natural language processing,\",\n"," \"The concept of creativity in machines is\"\n","]\n","\n","# Generate and print outputs for each prompt\n","for prompt in prompts:\n"," print(f\"Prompt: {prompt}\")\n"," generated_text = generate_text(prompt)\n"," print(f\"Generated: {generated_text}\\n\")\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":530},"collapsed":true,"id":"h46NWxXr8oee","executionInfo":{"status":"error","timestamp":1723255000696,"user_tz":-345,"elapsed":503,"user":{"displayName":"Basab Jha","userId":"01698150105745770629"}},"outputId":"22a59c5f-b5a2-4eef-b30e-3d3be9c3a780"},"execution_count":17,"outputs":[{"output_type":"stream","name":"stdout","text":["Writing /GEM_Project/Testings/testing.py\n"]},{"output_type":"error","ename":"FileNotFoundError","evalue":"[Errno 2] No such file or directory: '/GEM_Project/Testings/testing.py'","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)","\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mget_ipython\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun_cell_magic\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'writefile'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'/GEM_Project/Testings/testing.py'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'import torch\\nfrom transformers import AutoTokenizer, AutoModelForCausalLM\\n\\n# Model and tokenizer paths\\nmodel_path = \"/GEM_Project/GEM_1o_Aug.pt\"\\ntokenizer_path = \"/GEM_Project/tokenizer/tokenizer\"\\n\\n# Load the tokenizer\\ntokenizer = AutoTokenizer.from_pretrained(tokenizer_path)\\n\\n# Load the model\\nmodel = AutoModelForCausalLM.from_pretrained(model_path)\\n\\n# Set the model to evaluation mode\\nmodel.eval()\\n\\n# Set device to GPU if available\\ndevice = torch.device(\\'cuda\\' if torch.cuda.is_available() else \\'cpu\\')\\nmodel.to(device)\\n\\n# Define a function to generate text based on a prompt\\ndef generate_text(prompt, max_length=50, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7):\\n # Tokenize the input prompt\\n input_ids = tokenizer.encode(prompt, return_tensors=\\'pt\\').to(device)\\n\\n # Generate output from the model\\n output = model.generate(\\n input_ids,\\n max_length=max_length,\\n num_return_sequences=num_return_sequences,\\n no_repeat_ngram_size=no_repeat_ngram_size,\\n do_sample=True,\\n top_k=top_k,\\n top_p=top_p,\\n temperature=temperature\\n )\\n\\n # Decode the generated output\\n generated_text = tokenizer.decode(output[0], skip_special_tokens=Tr...\n\u001b[0m","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/google/colab/_shell.py\u001b[0m in \u001b[0;36mrun_cell_magic\u001b[0;34m(self, magic_name, line, cell)\u001b[0m\n\u001b[1;32m 332\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mline\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mcell\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 333\u001b[0m \u001b[0mcell\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m' '\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 334\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun_cell_magic\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmagic_name\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mline\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcell\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 335\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 336\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\u001b[0m in \u001b[0;36mrun_cell_magic\u001b[0;34m(self, magic_name, line, cell)\u001b[0m\n\u001b[1;32m 2471\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuiltin_trap\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2472\u001b[0m \u001b[0margs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mmagic_arg_s\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcell\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2473\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2474\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2475\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m\u001b[0m in \u001b[0;36mwritefile\u001b[0;34m(self, line, cell)\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/IPython/core/magic.py\u001b[0m in \u001b[0;36m\u001b[0;34m(f, *a, **k)\u001b[0m\n\u001b[1;32m 185\u001b[0m \u001b[0;31m# but it's overkill for just that one bit of state.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 186\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mmagic_deco\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 187\u001b[0;31m \u001b[0mcall\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mlambda\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 188\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 189\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcallable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/IPython/core/magics/osm.py\u001b[0m in \u001b[0;36mwritefile\u001b[0;34m(self, line, cell)\u001b[0m\n\u001b[1;32m 854\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 855\u001b[0m \u001b[0mmode\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'a'\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;34m'w'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 856\u001b[0;31m \u001b[0;32mwith\u001b[0m \u001b[0mio\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilename\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencoding\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'utf-8'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 857\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwrite\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcell\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '/GEM_Project/Testings/testing.py'"]}]},{"cell_type":"code","source":["!python testing.py"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"X_02SGwK_BY6","executionInfo":{"status":"ok","timestamp":1723254885616,"user_tz":-345,"elapsed":7324,"user":{"displayName":"Basab Jha","userId":"01698150105745770629"}},"outputId":"e30ed0b8-e9c5-486d-c912-1a58701c6ae4"},"execution_count":14,"outputs":[{"output_type":"stream","name":"stdout","text":["Traceback (most recent call last):\n"," File \"/usr/local/lib/python3.10/dist-packages/transformers/utils/hub.py\", line 402, in cached_file\n"," resolved_file = hf_hub_download(\n"," File \"/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_validators.py\", line 106, in _inner_fn\n"," validate_repo_id(arg_value)\n"," File \"/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_validators.py\", line 154, in validate_repo_id\n"," raise HFValidationError(\n","huggingface_hub.errors.HFValidationError: Repo id must be in the form 'repo_name' or 'namespace/repo_name': './tokenizer/gem_tokenizer'. Use `repo_type` argument if needed.\n","\n","The above exception was the direct cause of the following exception:\n","\n","Traceback (most recent call last):\n"," File \"/content/drive/MyDrive/GEM_Project/Testings/testing.py\", line 9, in \n"," tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)\n"," File \"/usr/local/lib/python3.10/dist-packages/transformers/models/auto/tokenization_auto.py\", line 826, in from_pretrained\n"," tokenizer_config = get_tokenizer_config(pretrained_model_name_or_path, **kwargs)\n"," File \"/usr/local/lib/python3.10/dist-packages/transformers/models/auto/tokenization_auto.py\", line 658, in get_tokenizer_config\n"," resolved_config_file = cached_file(\n"," File \"/usr/local/lib/python3.10/dist-packages/transformers/utils/hub.py\", line 466, in cached_file\n"," raise EnvironmentError(\n","OSError: Incorrect path_or_model_id: './tokenizer/gem_tokenizer'. Please provide either the path to a local folder or the repo_id of a model on the Hub.\n"]}]}]}