diff --git "a/notebooks/Untitled6.ipynb" "b/notebooks/Untitled6.ipynb" new file mode 100644--- /dev/null +++ "b/notebooks/Untitled6.ipynb" @@ -0,0 +1,2254 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "05cf28f1372b412cbdd746a3e4cccc49": { + "model_module": "@jupyter-widgets/output", + "model_name": "OutputModel", + "model_module_version": "1.0.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_69c5ba9915874166ab377ee53176837b", + "msg_id": "", + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "Working... \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[35m100%\u001b[0m \u001b[33m0:02:43\u001b[0m\n", + "text/html": "
Working... ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% 0:02:43\n\n" + }, + "metadata": {} + } + ] + } + }, + "69c5ba9915874166ab377ee53176837b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "605fade297054a1fa24eca4435921ece": { + "model_module": "@jupyter-widgets/output", + "model_name": "OutputModel", + "model_module_version": "1.0.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_648b2e772d21470e82d1ac1994338051", + "msg_id": "", + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "Working... \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[35m100%\u001b[0m \u001b[33m0:38:59\u001b[0m\n", + "text/html": "
Working... ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% 0:38:59\n\n" + }, + "metadata": {} + } + ] + } + }, + "648b2e772d21470e82d1ac1994338051": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9442bc3cd4f44d74b5328659affd8b90": { + "model_module": "@jupyter-widgets/output", + "model_name": "OutputModel", + "model_module_version": "1.0.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_ee636c2ffe9a4c3ca77d88fd497b29da", + "msg_id": "", + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "Working... \u001b[38;2;209;42;102m━\u001b[0m\u001b[38;2;183;44;94m━\u001b[0m\u001b[38;2;153;48;86m━\u001b[0m\u001b[38;2;123;51;77m━\u001b[0m\u001b[38;2;97;53;69m━\u001b[0m\u001b[38;2;76;56;63m━\u001b[0m\u001b[38;2;62;57;59m━\u001b[0m\u001b[38;2;58;58;58m━\u001b[0m\u001b[38;2;62;57;59m━\u001b[0m\u001b[38;2;76;56;63m━\u001b[0m\u001b[38;2;97;53;69m━\u001b[0m\u001b[38;2;123;51;77m━\u001b[0m\u001b[38;2;153;48;86m━\u001b[0m\u001b[38;2;183;44;94m━\u001b[0m\u001b[38;2;209;42;102m━\u001b[0m\u001b[38;2;230;39;108m━\u001b[0m\u001b[38;2;244;38;112m━\u001b[0m\u001b[38;2;249;38;114m━\u001b[0m\u001b[38;2;244;38;112m━\u001b[0m\u001b[38;2;230;39;108m━\u001b[0m\u001b[38;2;209;42;102m━\u001b[0m\u001b[38;2;183;44;94m━\u001b[0m\u001b[38;2;153;48;86m━\u001b[0m\u001b[38;2;123;51;77m━\u001b[0m\u001b[38;2;97;53;69m━\u001b[0m\u001b[38;2;76;56;63m━\u001b[0m\u001b[38;2;62;57;59m━\u001b[0m\u001b[38;2;58;58;58m━\u001b[0m\u001b[38;2;62;57;59m━\u001b[0m\u001b[38;2;76;56;63m━\u001b[0m\u001b[38;2;97;53;69m━\u001b[0m\u001b[38;2;123;51;77m━\u001b[0m\u001b[38;2;153;48;86m━\u001b[0m\u001b[38;2;183;44;94m━\u001b[0m\u001b[38;2;209;42;102m━\u001b[0m\u001b[38;2;230;39;108m━\u001b[0m\u001b[38;2;244;38;112m━\u001b[0m\u001b[38;2;249;38;114m━\u001b[0m\u001b[38;2;244;38;112m━\u001b[0m\u001b[38;2;230;39;108m━\u001b[0m \u001b[36m \u001b[0m\n", + "text/html": "
Working... ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ \n\n" + }, + "metadata": {} + } + ] + } + }, + "ee636c2ffe9a4c3ca77d88fd497b29da": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + } + } + } + }, + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Twj8A17VYm13" + }, + "outputs": [], + "source": [ + "import requests\n", + "from bs4 import BeautifulSoup" + ] + }, + { + "cell_type": "code", + "source": [ + "url = \"https://lightning.ai/docs/pytorch/latest/starter/introduction.html\"\n", + "\n", + "response = requests.get(url)\n", + "\n", + "if response.status_code == 200:\n", + " soup = BeautifulSoup(response.text, 'html.parser')\n", + "\n", + " div_content = soup.find('div', class_='rst-content')\n", + "\n", + " if div_content:\n", + " sections = div_content.find_all('section')\n", + "\n", + " for section in sections[1:]:\n", + "\n", + " section_content = section.get_text()\n", + " print(section_content)\n", + " print('-------------------')\n", + " else:\n", + " print(\"Div element with class 'rst-content' not found. Check the HTML structure of the page.\")\n", + "\n", + "else:\n", + " print(f\"Failed to retrieve the page. Status code: {response.status_code}\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "5a6evxW3ZRB0", + "outputId": "317b92e9-c08f-4d84-bfe3-68f5afeee75a" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "1: Install PyTorch Lightning¶\n", + "\n", + "For pip users\n", + "pip install lightning\n", + "\n", + "\n", + "\n", + "For conda users\n", + "conda install lightning -c conda-forge\n", + "\n", + "\n", + "\n", + "Or read the advanced install guide\n", + "\n", + "\n", + "-------------------\n", + "\n", + "2: Define a LightningModule¶\n", + "A LightningModule enables your PyTorch nn.Module to play together in complex ways inside the training_step (there is also an optional validation_step and test_step).\n", + "import os\n", + "from torch import optim, nn, utils, Tensor\n", + "from torchvision.datasets import MNIST\n", + "from torchvision.transforms import ToTensor\n", + "import lightning as L\n", + "\n", + "# define any number of nn.Modules (or use your current ones)\n", + "encoder = nn.Sequential(nn.Linear(28 * 28, 64), nn.ReLU(), nn.Linear(64, 3))\n", + "decoder = nn.Sequential(nn.Linear(3, 64), nn.ReLU(), nn.Linear(64, 28 * 28))\n", + "\n", + "\n", + "# define the LightningModule\n", + "class LitAutoEncoder(L.LightningModule):\n", + " def __init__(self, encoder, decoder):\n", + " super().__init__()\n", + " self.encoder = encoder\n", + " self.decoder = decoder\n", + "\n", + " def training_step(self, batch, batch_idx):\n", + " # training_step defines the train loop.\n", + " # it is independent of forward\n", + " x, y = batch\n", + " x = x.view(x.size(0), -1)\n", + " z = self.encoder(x)\n", + " x_hat = self.decoder(z)\n", + " loss = nn.functional.mse_loss(x_hat, x)\n", + " # Logging to TensorBoard (if installed) by default\n", + " self.log(\"train_loss\", loss)\n", + " return loss\n", + "\n", + " def configure_optimizers(self):\n", + " optimizer = optim.Adam(self.parameters(), lr=1e-3)\n", + " return optimizer\n", + "\n", + "\n", + "# init the autoencoder\n", + "autoencoder = LitAutoEncoder(encoder, decoder)\n", + "\n", + "\n", + "\n", + "-------------------\n", + "\n", + "3: Define a dataset¶\n", + "Lightning supports ANY iterable (DataLoader, numpy, etc…) for the train/val/test/predict splits.\n", + "# setup data\n", + "dataset = MNIST(os.getcwd(), download=True, transform=ToTensor())\n", + "train_loader = utils.data.DataLoader(dataset)\n", + "\n", + "\n", + "\n", + "-------------------\n", + "\n", + "4: Train the model¶\n", + "The Lightning Trainer “mixes” any LightningModule with any dataset and abstracts away all the engineering complexity needed for scale.\n", + "# train the model (hint: here are some helpful Trainer arguments for rapid idea iteration)\n", + "trainer = L.Trainer(limit_train_batches=100, max_epochs=1)\n", + "trainer.fit(model=autoencoder, train_dataloaders=train_loader)\n", + "\n", + "\n", + "The Lightning Trainer automates 40+ tricks including:\n", + "\n", + "Epoch and batch iteration\n", + "optimizer.step(), loss.backward(), optimizer.zero_grad() calls\n", + "Calling of model.eval(), enabling/disabling grads during evaluation\n", + "Checkpoint Saving and Loading\n", + "Tensorboard (see loggers options)\n", + "Multi-GPU support\n", + "TPU\n", + "16-bit precision AMP support\n", + "\n", + "\n", + "-------------------\n", + "\n", + "5: Use the model¶\n", + "Once you’ve trained the model you can export to onnx, torchscript and put it into production or simply load the weights and run predictions.\n", + "# load checkpoint\n", + "checkpoint = \"./lightning_logs/version_0/checkpoints/epoch=0-step=100.ckpt\"\n", + "autoencoder = LitAutoEncoder.load_from_checkpoint(checkpoint, encoder=encoder, decoder=decoder)\n", + "\n", + "# choose your trained nn.Module\n", + "encoder = autoencoder.encoder\n", + "encoder.eval()\n", + "\n", + "# embed 4 fake images!\n", + "fake_image_batch = torch.rand(4, 28 * 28, device=autoencoder.device)\n", + "embeddings = encoder(fake_image_batch)\n", + "print(\"⚡\" * 20, \"\\nPredictions (4 image embeddings):\\n\", embeddings, \"\\n\", \"⚡\" * 20)\n", + "\n", + "\n", + "\n", + "-------------------\n", + "\n", + "6: Visualize training¶\n", + "If you have tensorboard installed, you can use it for visualizing experiments.\n", + "Run this on your commandline and open your browser to http://localhost:6006/\n", + "tensorboard --logdir .\n", + "\n", + "\n", + "\n", + "-------------------\n", + "\n", + "7: Supercharge training¶\n", + "Enable advanced training features using Trainer arguments. These are state-of-the-art techniques that are automatically integrated into your training loop without changes to your code.\n", + "# train on 4 GPUs\n", + "trainer = Trainer(\n", + " devices=4,\n", + " accelerator=\"gpu\",\n", + " )\n", + "\n", + "# train 1TB+ parameter models with Deepspeed/fsdp\n", + "trainer = L.Trainer(\n", + " devices=4,\n", + " accelerator=\"gpu\",\n", + " strategy=\"deepspeed_stage_2\",\n", + " precision=16\n", + " )\n", + "\n", + "# 20+ helpful flags for rapid idea iteration\n", + "trainer = L.Trainer(\n", + " max_epochs=10,\n", + " min_epochs=5,\n", + " overfit_batches=1\n", + " )\n", + "\n", + "# access the latest state of the art techniques\n", + "trainer = Trainer(callbacks=[StochasticWeightAveraging(...)])\n", + "\n", + "\n", + "\n", + "-------------------\n", + "\n", + "Maximize flexibility¶\n", + "Lightning’s core guiding principle is to always provide maximal flexibility without ever hiding any of the PyTorch.\n", + "Lightning offers 5 added degrees of flexibility depending on your project’s complexity.\n", + "\n", + "\n", + "Customize training loop¶\n", + "\n", + "Inject custom code anywhere in the Training loop using any of the 20+ methods (Hooks) available in the LightningModule.\n", + "class LitAutoEncoder(L.LightningModule):\n", + " def backward(self, loss):\n", + " loss.backward()\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Extend the Trainer¶\n", + "If you have multiple lines of code with similar functionalities, you can use callbacks to easily group them together and toggle all of those lines on or off at the same time.\n", + "trainer = Trainer(callbacks=[AWSCheckpoints()])\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Use a raw PyTorch loop¶\n", + "For certain types of work at the bleeding-edge of research, Lightning offers experts full control of optimization or the training loop in various ways.\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Manual optimization\n", + "Automated training loop, but you own the optimization steps.\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "-------------------\n", + "\n", + "Customize training loop¶\n", + "\n", + "Inject custom code anywhere in the Training loop using any of the 20+ methods (Hooks) available in the LightningModule.\n", + "class LitAutoEncoder(L.LightningModule):\n", + " def backward(self, loss):\n", + " loss.backward()\n", + "\n", + "\n", + "\n", + "-------------------\n", + "\n", + "Extend the Trainer¶\n", + "If you have multiple lines of code with similar functionalities, you can use callbacks to easily group them together and toggle all of those lines on or off at the same time.\n", + "trainer = Trainer(callbacks=[AWSCheckpoints()])\n", + "\n", + "\n", + "\n", + "-------------------\n", + "\n", + "Use a raw PyTorch loop¶\n", + "For certain types of work at the bleeding-edge of research, Lightning offers experts full control of optimization or the training loop in various ways.\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Manual optimization\n", + "Automated training loop, but you own the optimization steps.\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "-------------------\n", + "\n", + "Next steps¶\n", + "Depending on your use case, you might want to check one of these out next.\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Level 2: Add a validation and test set\n", + "Add validation and test sets to avoid over/underfitting.\n", + "\n", + "\n", + "basic\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "See more examples\n", + "See examples across computer vision, NLP, RL, etc...\n", + "\n", + "\n", + "basic\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Deploy your model\n", + "Learn how to predict or put your model into production\n", + "\n", + "\n", + "basic\n", + "\n", + "\n", + "\n", + "-------------------\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "from urllib.parse import urlparse, urlunparse,urljoin" + ], + "metadata": { + "id": "Grj3oLv8ZRD9" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "def remove_fragment(url):\n", + " parser_url = urlparse(url)\n", + "\n", + " new_url = urlunparse(parser_url._replace(fragment=\"\"))\n", + " return new_url" + ], + "metadata": { + "id": "ee_vyuuRNajS" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "import requests\n", + "from bs4 import BeautifulSoup\n", + "from urllib.parse import urlparse, urljoin\n", + "\n", + "crawled_urls = set()\n", + "\n", + "\n", + "# Function to fetch and extract links from a page\n", + "def get_links(url):\n", + " try:\n", + " response = requests.get(url)\n", + " if response.status_code == 200:\n", + " soup = BeautifulSoup(response.text, \"html.parser\")\n", + " links = []\n", + " for a_tag in soup.find_all(\"a\", href=True):\n", + " link = a_tag[\"href\"]\n", + " links.append(link)\n", + " return links\n", + " except Exception as e:\n", + " print(f\"Failed to fetch links from {url}: {e}\")\n", + " return []\n", + "\n", + "\n", + "# Function to recursively fetch links within the same domain\n", + "def fetch_links_recursive(base_url, current_url, visited_urls, max_depth=4):\n", + " if current_url in visited_urls or max_depth == 0:\n", + " return\n", + "\n", + " visited_urls.add(current_url)\n", + " links = get_links(current_url)\n", + " for link in links:\n", + " absolute_url = urljoin(current_url, link)\n", + " parsed_url = urlparse(absolute_url)\n", + " if parsed_url.netloc == base_url.netloc:\n", + " crawled_urls.add(remove_fragment(absolute_url))\n", + " fetch_links_recursive(base_url, absolute_url, visited_urls, max_depth - 1)" + ], + "metadata": { + "id": "X5k3ts0aNal3" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "base_url = urlparse(\n", + " \"https://lightning.ai/docs/pytorch/latest/starter/introduction.html\"\n", + ")\n", + "visited_urls = set()\n", + "fetch_links_recursive(base_url, base_url.geturl(), visited_urls)" + ], + "metadata": { + "id": "l2y7AqXCNaoj" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "len(crawled_urls)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "WLgfwo4PNarH", + "outputId": "cf1dca6e-6b6f-4ff8-aa4c-ed8f3f780539" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "275" + ] + }, + "metadata": {}, + "execution_count": 7 + } + ] + }, + { + "cell_type": "code", + "source": [ + "crawled_urls" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "jh-iuNqjNatp", + "outputId": "f39462ca-36de-4061-d48c-a724bf10271e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "{'https://lightning.ai/docs/fabric/',\n", + " 'https://lightning.ai/docs/pytorch/latest/_images/custom_loop.png',\n", + " 'https://lightning.ai/docs/pytorch/latest/_images/ddp.gif',\n", + " 'https://lightning.ai/docs/pytorch/latest/_modules/lightning/fabric/utilities/throughput.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/_modules/lightning/pytorch/core/module.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/_modules/lightning/pytorch/trainer/trainer.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/accelerators/gpu.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/accelerators/tpu.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/advanced/speed.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/api_references.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/common/checkpointing.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/common/index.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/common/lightning_module.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/common/trainer.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/community/index.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/deploy/production.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/glossary/index.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/index.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/levels/advanced.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/levels/basic_level_2.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/levels/core_skills.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/levels/expert.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/levels/intermediate.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/model/build_model_advanced.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/starter/installation.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/starter/introduction.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/tutorials.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/upgrade/from_1_4.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/upgrade/from_1_5.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/upgrade/from_1_6.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/upgrade/from_1_7.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/upgrade/from_1_8.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/upgrade/from_1_9.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/upgrade/from_2_0.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/upgrade/migration_guide.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/visualize/loggers.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/accelerators/accelerator_prepare.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/accelerators/gpu.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/accelerators/gpu_advanced.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/accelerators/gpu_basic.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/accelerators/gpu_expert.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/accelerators/gpu_faq.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/accelerators/gpu_intermediate.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/accelerators/mps.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/accelerators/tpu.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/accelerators/tpu_advanced.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/accelerators/tpu_basic.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/accelerators/tpu_faq.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/accelerators/tpu_intermediate.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/advanced/ddp_optimizations.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/advanced/finetuning.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/advanced/model_init.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/advanced/model_parallel.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/advanced/model_parallel/deepspeed.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/advanced/model_parallel/fsdp.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/advanced/post_training_quantization.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/advanced/pretrained.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/advanced/pruning_quantization.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/advanced/speed.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/advanced/strategy_registry.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/advanced/training_tricks.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/advanced/transfer_learning.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/advanced/warnings.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.accelerators.Accelerator.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.accelerators.CPUAccelerator.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.accelerators.CUDAAccelerator.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.accelerators.XLAAccelerator.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.BackboneFinetuning.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.BaseFinetuning.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.BasePredictionWriter.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.BatchSizeFinder.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.Callback.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.DeviceStatsMonitor.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.EarlyStopping.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.GradientAccumulationScheduler.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.LambdaCallback.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.LearningRateFinder.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.LearningRateMonitor.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.ModelCheckpoint.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.ModelPruning.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.ModelSummary.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.OnExceptionCheckpoint.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.ProgressBar.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.RichModelSummary.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.RichProgressBar.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.SpikeDetection.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.StochasticWeightAveraging.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.TQDMProgressBar.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.ThroughputMonitor.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.Timer.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.cli.LightningArgumentParser.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.cli.LightningCLI.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.cli.SaveConfigCallback.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.core.LightningDataModule.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.core.LightningModule.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.core.hooks.CheckpointHooks.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.core.hooks.DataHooks.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.core.hooks.ModelHooks.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.core.mixins.HyperparametersMixin.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.core.optimizer.LightningOptimizer.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.loggers.comet.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.loggers.csv_logs.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.loggers.logger.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.loggers.mlflow.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.loggers.neptune.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.loggers.tensorboard.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.loggers.wandb.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.LayerSync.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.TorchSyncBatchNorm.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.environments.ClusterEnvironment.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.environments.KubeflowEnvironment.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.environments.LSFEnvironment.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.environments.LightningEnvironment.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.environments.MPIEnvironment.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.environments.SLURMEnvironment.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.environments.TorchElasticEnvironment.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.environments.XLAEnvironment.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.io.AsyncCheckpointIO.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.io.CheckpointIO.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.io.TorchCheckpointIO.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.io.XLACheckpointIO.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.precision.BitsandbytesPrecision.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.precision.DeepSpeedPrecision.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.precision.DoublePrecision.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.precision.FSDPPrecision.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.precision.HalfPrecision.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.precision.MixedPrecision.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.precision.Precision.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.precision.TransformerEnginePrecision.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.precision.XLAPrecision.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.profilers.AdvancedProfiler.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.profilers.PassThroughProfiler.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.profilers.Profiler.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.profilers.PyTorchProfiler.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.profilers.SimpleProfiler.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.profilers.XLAProfiler.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.strategies.DDPStrategy.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.strategies.DeepSpeedStrategy.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.strategies.FSDPStrategy.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.strategies.ParallelStrategy.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.strategies.SingleDeviceStrategy.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.strategies.SingleDeviceXLAStrategy.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.strategies.Strategy.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.strategies.XLAStrategy.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.trainer.trainer.Trainer.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.tuner.tuning.Tuner.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.utilities.combined_loader.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.utilities.data.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.utilities.deepspeed.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.utilities.memory.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.utilities.model_summary.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.utilities.parsing.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.utilities.rank_zero.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.utilities.seed.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.utilities.warnings.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api_references.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/benchmarking/benchmarks.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/cli/lightning_cli.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/clouds/cluster.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/clouds/cluster_advanced.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/clouds/cluster_intermediate_2.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/common/checkpointing.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/common/checkpointing_advanced.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/common/checkpointing_basic.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/common/checkpointing_expert.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/common/checkpointing_intermediate.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/common/checkpointing_migration.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/common/console_logs.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/common/early_stopping.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/common/evaluation.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/common/evaluation_basic.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/common/hyperparameters.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/common/index.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/common/lightning_module.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/common/notebooks.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/common/optimization.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/common/precision.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/common/precision_expert.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/common/progress_bar.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/common/remote_fs.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/common/trainer.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/common_usecases.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/community/governance.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/community/index.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/data/data.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/data/datamodule.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/data/iterables.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/debug/debugging.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/deploy/production.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/deploy/production_advanced.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/deploy/production_advanced_2.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/deploy/production_basic.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/deploy/production_intermediate.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/expertise_levels.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/extensions/accelerator.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/extensions/callbacks.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/extensions/logging.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/extensions/plugins.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/extensions/strategy.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/generated/BECOMING_A_CORE_CONTRIBUTOR.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/generated/CHANGELOG.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/generated/CODE_OF_CONDUCT.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/generated/CONTRIBUTING.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/glossary/index.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/index.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/integrations/hpu/index.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/integrations/ipu/index.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/integrations/strategies/colossalai.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/advanced.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/advanced_level_15.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/advanced_level_16.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/advanced_level_17.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/advanced_level_18.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/advanced_level_19.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/advanced_level_20.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/advanced_level_21.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/advanced_level_22.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/basic_level_2.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/basic_level_5.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/core_level_6.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/core_skills.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/expert.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/expert_level_23.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/expert_level_24.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/expert_level_27.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/intermediate.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/intermediate_level_10.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/intermediate_level_11.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/intermediate_level_12.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/intermediate_level_13.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/intermediate_level_14.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/intermediate_level_7.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/intermediate_level_9.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/model/build_model.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/model/build_model_advanced.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/model/manual_optimization.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/model/own_your_loop.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/model/train_model_basic.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/notebooks/course_UvA-DL/01-introduction-to-pytorch.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/notebooks/course_UvA-DL/02-activation-functions.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/notebooks/course_UvA-DL/03-initialization-and-optimization.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/notebooks/course_UvA-DL/04-inception-resnet-densenet.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/notebooks/course_UvA-DL/05-transformers-and-MH-attention.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/notebooks/course_UvA-DL/06-graph-neural-networks.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/notebooks/course_UvA-DL/07-deep-energy-based-generative-models.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/notebooks/course_UvA-DL/08-deep-autoencoders.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/notebooks/course_UvA-DL/09-normalizing-flows.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/notebooks/course_UvA-DL/10-autoregressive-image-modeling.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/notebooks/course_UvA-DL/11-vision-transformer.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/notebooks/lightning_examples/cifar10-baseline.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/notebooks/lightning_examples/datamodules.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/notebooks/lightning_examples/finetuning-scheduler.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/past_versions.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/starter/converting.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/starter/installation.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/starter/introduction.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/starter/style_guide.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/tuning/profiler.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/tuning/profiler_expert.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/tutorials.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/upgrade/from_1_4.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/upgrade/from_1_5.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/upgrade/from_1_6.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/upgrade/from_1_7.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/upgrade/from_1_8.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/upgrade/from_1_9.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/upgrade/from_2_0.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/upgrade/migration_guide.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/versioning.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/visualize/experiment_managers.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/visualize/loggers.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/visualize/logging_advanced.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/visualize/logging_basic.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/visualize/logging_expert.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/visualize/logging_intermediate.html'}" + ] + }, + "metadata": {}, + "execution_count": 8 + } + ] + }, + { + "cell_type": "code", + "source": [ + "import requests\n", + "from bs4 import BeautifulSoup\n", + "import pandas as pd\n", + "\n", + "\n", + "def extract_sections_to_csv(url, output_file):\n", + " # Send an HTTP GET request to the URL\n", + " response = requests.get(url)\n", + "\n", + " # Check if the request was successful\n", + " if response.status_code == 200:\n", + " # Parse the HTML content of the page\n", + " soup = BeautifulSoup(response.text, \"html.parser\")\n", + "\n", + " # Find the div element with class \"rst-content\"\n", + " div_content = soup.find(\"div\", class_=\"rst-content\")\n", + "\n", + " if div_content:\n", + " # Find all section tags within the div_content\n", + " sections = div_content.find_all(\"section\")\n", + "\n", + " # Create a list to store the sections\n", + " section_contents = []\n", + "\n", + " for section in sections[1:]:\n", + " # Extract the content of each section\n", + " section_content = section.get_text()\n", + " section_contents.append(section_content)\n", + "\n", + " # Create a DataFrame with URL and Section Content columns\n", + " df = pd.DataFrame(\n", + " {\n", + " \"URL\": [url] * len(section_contents),\n", + " \"Section Content\": section_contents,\n", + " }\n", + " )\n", + "\n", + " # Save the DataFrame to a CSV file\n", + " df.to_csv(output_file, index=False)\n", + "\n", + " else:\n", + " print(\n", + " \"Div element with class 'rst-content' not found. Check the HTML structure of the page.\"\n", + " )\n", + "\n", + " else:\n", + " print(f\"Failed to retrieve the page. Status code: {response.status_code}\")\n", + "\n", + "\n", + "# Example usage\n", + "url = \"https://lightning.ai/docs/pytorch/latest\"\n", + "output_file = \"sections.csv\"\n", + "extract_sections_to_csv(url, output_file)" + ], + "metadata": { + "id": "ghP6aLgoNawQ" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "!mkdir crawled" + ], + "metadata": { + "id": "iChHRe56NuPn" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "from rich.progress import track\n", + "\n", + "for i, url in enumerate(track(crawled_urls)):\n", + " output_file = f\"crawled/{i}.csv\"\n", + " extract_sections_to_csv(url, output_file)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000, + "referenced_widgets": [ + "05cf28f1372b412cbdd746a3e4cccc49", + "69c5ba9915874166ab377ee53176837b" + ] + }, + "id": "4KjTZfStNayx", + "outputId": "242ba475-55e1-411d-d6e9-ddd3e158d9ba" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Output()" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "05cf28f1372b412cbdd746a3e4cccc49" + } + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
<ipython-input-9-fa348c37a00e>:13: MarkupResemblesLocatorWarning: The input looks more like a filename than markup.\n", + "You may want to open this file and pass the filehandle into Beautiful Soup.\n", + " soup = BeautifulSoup(response.text, \"html.parser\")\n", + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ], + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n", + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ], + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n", + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ], + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n", + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ], + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n", + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ], + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n", + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ], + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n", + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ], + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n", + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ], + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n", + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
<ipython-input-9-fa348c37a00e>:13: MarkupResemblesLocatorWarning: The input looks more like a filename than markup.\n", + "You may want to open this file and pass the filehandle into Beautiful Soup.\n", + " soup = BeautifulSoup(response.text, \"html.parser\")\n", + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ], + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n", + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ], + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n", + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ], + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n", + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ], + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n", + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ], + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n", + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ], + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n", + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ], + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n", + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ], + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n", + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ], + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n", + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ], + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n", + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ], + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n", + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ], + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n", + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ], + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n", + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ], + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n", + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ], + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n", + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ], + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n", + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ], + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n", + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ], + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n", + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ], + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n", + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ], + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n", + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ], + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n", + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ], + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n", + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ], + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n", + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ], + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n", + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ], + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n", + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [], + "text/html": [ + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "\n" + ], + "text/html": [ + "
\n", + "\n" + ] + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "source": [ + "!pip install -q langchain chromadb" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "9ZcwG8hkN5LM", + "outputId": "ab1a0f3b-6923-4b80-8d44-e99ad297d5a0" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m593.7/593.7 kB\u001b[0m \u001b[31m8.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m14.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m72.6/72.6 kB\u001b[0m \u001b[31m7.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.4/49.4 kB\u001b[0m \u001b[31m4.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.0/67.0 kB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m143.8/143.8 kB\u001b[0m \u001b[31m14.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m3.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.8/50.8 kB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m5.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m341.4/341.4 kB\u001b[0m \u001b[31m13.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.4/3.4 MB\u001b[0m \u001b[31m22.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m26.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m130.2/130.2 kB\u001b[0m \u001b[31m10.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m8.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Building wheel for pypika (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "lida 0.0.10 requires kaleido, which is not installed.\n", + "lida 0.0.10 requires python-multipart, which is not installed.\n", + "tensorflow-probability 0.22.0 requires typing-extensions<4.6.0, but you have typing-extensions 4.8.0 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0m" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "from langchain.embeddings import HuggingFaceEmbeddings\n", + "import pandas as pd\n", + "from glob import glob\n", + "from chromadb.utils import embedding_functions" + ], + "metadata": { + "id": "eybYplJBZRGV" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "import chromadb\n", + "chroma_client = chromadb.PersistentClient(path=\"db\")\n", + "\n", + "collection = chroma_client.create_collection(name=\"test\")\n", + "collection = chroma_client.get_collection(name=\"test\")" + ], + "metadata": { + "id": "XzO2M13IN2PY" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "csvs = glob(\"crawled/*.csv\")" + ], + "metadata": { + "id": "DOarYPa0N2SU" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "from rich.progress import track\n", + "from rich import print\n", + "from os.path import basename" + ], + "metadata": { + "id": "A4xJCTkIN2U-" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "!pip install sentence_transformers" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "eNeZdEotRHYY", + "outputId": "ec6c45bb-f809-4425-fa77-e4b2d98e2850" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting sentence_transformers\n", + " Downloading sentence-transformers-2.2.2.tar.gz (85 kB)\n", + "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/86.0 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[90m╺\u001b[0m\u001b[90m━\u001b[0m \u001b[32m81.9/86.0 kB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.0/86.0 kB\u001b[0m \u001b[31m2.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: transformers<5.0.0,>=4.6.0 in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (4.35.2)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (4.66.1)\n", + "Requirement already satisfied: torch>=1.6.0 in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (2.1.0+cu118)\n", + "Requirement already satisfied: torchvision in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (0.16.0+cu118)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (1.23.5)\n", + "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (1.2.2)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (1.11.3)\n", + "Requirement already satisfied: nltk in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (3.8.1)\n", + "Collecting sentencepiece (from sentence_transformers)\n", + " Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m24.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: huggingface-hub>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (0.19.4)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.4.0->sentence_transformers) (3.13.1)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.4.0->sentence_transformers) (2023.6.0)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.4.0->sentence_transformers) (2.31.0)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.4.0->sentence_transformers) (6.0.1)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.4.0->sentence_transformers) (4.8.0)\n", + "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.4.0->sentence_transformers) (23.2)\n", + "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.6.0->sentence_transformers) (1.12)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.6.0->sentence_transformers) (3.2.1)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.6.0->sentence_transformers) (3.1.2)\n", + "Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.6.0->sentence_transformers) (2.1.0)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.6.0->sentence_transformers) (2023.6.3)\n", + "Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.6.0->sentence_transformers) (0.15.0)\n", + "Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.6.0->sentence_transformers) (0.4.0)\n", + "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk->sentence_transformers) (8.1.7)\n", + "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from nltk->sentence_transformers) (1.3.2)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->sentence_transformers) (3.2.0)\n", + "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /usr/local/lib/python3.10/dist-packages (from torchvision->sentence_transformers) (9.4.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.6.0->sentence_transformers) (2.1.3)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.4.0->sentence_transformers) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.4.0->sentence_transformers) (3.4)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.4.0->sentence_transformers) (1.26.18)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.4.0->sentence_transformers) (2023.7.22)\n", + "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.6.0->sentence_transformers) (1.3.0)\n", + "Building wheels for collected packages: sentence_transformers\n", + " Building wheel for sentence_transformers (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for sentence_transformers: filename=sentence_transformers-2.2.2-py3-none-any.whl size=125923 sha256=9667e1f11d505d052cd0a56f426a3658692611a85c61185dd2ef11856e558faf\n", + " Stored in directory: /root/.cache/pip/wheels/62/f2/10/1e606fd5f02395388f74e7462910fe851042f97238cbbd902f\n", + "Successfully built sentence_transformers\n", + "Installing collected packages: sentencepiece, sentence_transformers\n", + "Successfully installed sentence_transformers-2.2.2 sentencepiece-0.1.99\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=\"BAAI/llm-embedder\")\n", + "data=[]\n", + "for csv in track(csvs):\n", + " df=pd.read_csv(csv)\n", + " if len(df)==0:\n", + " continue\n", + " urls, documents = df[\"URL\"].tolist(), df[\"Section Content\"].tolist()\n", + " embeddings = sentence_transformer_ef(documents)\n", + " assert len(urls)==len(documents) == len(embeddings)\n", + " base = basename(urls[0])\n", + " collection.add(\n", + " embeddings=embeddings,\n", + " documents=documents,\n", + " metadatas = [{\"source\":url} for url in urls],\n", + " ids = [f\"{base}_{i}\" for i in range(len(documents))]\n", + " )" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 33, + "referenced_widgets": [ + "605fade297054a1fa24eca4435921ece", + "648b2e772d21470e82d1ac1994338051" + ] + }, + "id": "AEL9y86FN2Xs", + "outputId": "f100356f-bbdb-4563-a318-f9ef9ff65bfb" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Output()" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "605fade297054a1fa24eca4435921ece" + } + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [], + "text/html": [ + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "\n" + ], + "text/html": [ + "
\n", + "\n" + ] + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "source": [ + "query = \"\"\"How would I add an input argument to the lightning module core function and use them from the trainer?\n", + "something like this:\n", + "def predict_step(self, batch, batch_idx, n_tokens=32)\"\"\"\n", + "\n", + "query = \"NeurIPS 2023 LLM Efficiency Challenge Quickstart Guide\"\n", + "\n", + "query_texts=[query]\n", + "query_embeddings = sentence_transformer_ef(query_texts)\n", + "result = collection.query(query_embeddings=query_embeddings, n_results=2)" + ], + "metadata": { + "id": "T6KFHDQkdg9s" + }, + "execution_count": 32, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "result" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "lmLGKRPndkKl", + "outputId": "f574253b-3162-4e10-cab1-3e0d85d3c09d" + }, + "execution_count": 33, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "{'ids': [['callbacks.html_45', 'lightning_module.html_97']],\n", + " 'distances': [[0.31527483463287354, 0.321233868598938]],\n", + " 'metadatas': [[{'source': 'https://lightning.ai/docs/pytorch/latest/extensions/callbacks.html'},\n", + " {'source': 'https://lightning.ai/docs/pytorch/latest/common/lightning_module.html'}]],\n", + " 'embeddings': None,\n", + " 'documents': [['\\non_before_optimizer_step¶\\n\\n\\nCallback.on_before_optimizer_step(trainer, pl_module, optimizer)[source]\\nCalled before optimizer.step().\\n\\nReturn type:\\nNone\\n\\n\\n\\n',\n", + " '\\noptimizer_step¶\\n\\n\\nLightningModule.optimizer_step(epoch, batch_idx, optimizer, optimizer_closure=None)[source]\\nOverride this method to adjust the default way the Trainer calls\\nthe optimizer.\\nBy default, Lightning calls step() and zero_grad() as shown in the example.\\nThis method (and zero_grad()) won’t be called during the accumulation phase when\\nTrainer(accumulate_grad_batches != 1). Overriding this hook has no benefit with manual optimization.\\n\\nParameters:\\n\\nepoch¶ (int) – Current epoch\\nbatch_idx¶ (int) – Index of current batch\\noptimizer¶ (Union[Optimizer, LightningOptimizer]) – A PyTorch optimizer\\noptimizer_closure¶ (Optional[Callable[[], Any]]) – The optimizer closure. This closure must be executed as it includes the\\ncalls to training_step(), optimizer.zero_grad(), and backward().\\n\\n\\nReturn type:\\nNone\\n\\n\\nExamples:\\n# DEFAULT\\ndef optimizer_step(self, epoch, batch_idx, optimizer, optimizer_closure):\\n optimizer.step(closure=optimizer_closure)\\n\\n# Learning rate warm-up\\ndef optimizer_step(self, epoch, batch_idx, optimizer, optimizer_closure):\\n # update params\\n optimizer.step(closure=optimizer_closure)\\n\\n # manually warm up lr without a scheduler\\n if self.trainer.global_step < 500:\\n lr_scale = min(1.0, float(self.trainer.global_step + 1) / 500.0)\\n for pg in optimizer.param_groups:\\n pg[\"lr\"] = lr_scale * self.learning_rate\\n\\n\\n\\n']],\n", + " 'uris': None,\n", + " 'data': None}" + ] + }, + "metadata": {}, + "execution_count": 33 + } + ] + }, + { + "cell_type": "code", + "source": [ + "def read_md(file):\n", + " with open(file, \"r\") as fr:\n", + " data = fr.read()\n", + " return data\n", + "\n", + "files = glob(\"crawled/tutorials/*.md\")\n", + "data = []\n", + "for file in track(files):\n", + " document = read_md(file)\n", + " embedding = sentence_transformer_ef([document])\n", + " base = basename(file)\n", + " a = collection.add(\n", + " embeddings=embedding,\n", + " documents=[document],\n", + " metadatas=[{\"source\": base}],\n", + " ids=[base]\n", + " )" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 33, + "referenced_widgets": [ + "9442bc3cd4f44d74b5328659affd8b90", + "ee636c2ffe9a4c3ca77d88fd497b29da" + ] + }, + "id": "1MU2fHk2dnVB", + "outputId": "8428ff03-d577-4dbc-cece-c2b96af0d62c" + }, + "execution_count": 34, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Output()" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "9442bc3cd4f44d74b5328659affd8b90" + } + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [], + "text/html": [ + "\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "\n" + ], + "text/html": [ + "
\n", + "\n" + ] + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "source": [ + "!pip install llama-cpp-python" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "WH88LKXJN2ag", + "outputId": "34e122c2-a39c-4fe4-b033-e9d2541e779e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting llama-cpp-python\n", + " Downloading llama_cpp_python-0.2.19.tar.gz (7.8 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m13.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Installing backend dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: typing-extensions>=4.5.0 in /usr/local/lib/python3.10/dist-packages (from llama-cpp-python) (4.8.0)\n", + "Requirement already satisfied: numpy>=1.20.0 in /usr/local/lib/python3.10/dist-packages (from llama-cpp-python) (1.23.5)\n", + "Requirement already satisfied: diskcache>=5.6.1 in /usr/local/lib/python3.10/dist-packages (from llama-cpp-python) (5.6.3)\n", + "Building wheels for collected packages: llama-cpp-python\n", + " Building wheel for llama-cpp-python (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for llama-cpp-python: filename=llama_cpp_python-0.2.19-cp310-cp310-manylinux_2_35_x86_64.whl size=1978774 sha256=47d53a8c7f3c84e05ae637ca3a9369e04fd475bd1154baac102653d280445430\n", + " Stored in directory: /root/.cache/pip/wheels/c7/39/87/39c101006774e09d62a2210a52cee6e93e390ee8eda5e36a6f\n", + "Successfully built llama-cpp-python\n", + "Installing collected packages: llama-cpp-python\n", + "Successfully installed llama-cpp-python-0.2.19\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "from llama_cpp import Llama" + ], + "metadata": { + "id": "HFp3GqYnN2dQ" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "!wget https://huggingface.co/TheBloke/Mistral-7B-v0.1-GGUF/resolve/main/mistral-7b-v0.1.Q4_K_M.gguf" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "GKS9v-t7bcZo", + "outputId": "a1bb9921-7e30-4226-cc33-d315d0bd9ea6" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2023-11-27 10:58:42-- https://huggingface.co/TheBloke/Mistral-7B-v0.1-GGUF/resolve/main/mistral-7b-v0.1.Q4_K_M.gguf\n", + "Resolving huggingface.co (huggingface.co)... 65.8.178.118, 65.8.178.12, 65.8.178.93, ...\n", + "Connecting to huggingface.co (huggingface.co)|65.8.178.118|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://cdn-lfs.huggingface.co/repos/a2/c6/a2c63827017d81931777a84eb0e153b8b34902e46289c684623d88c2e6243782/ce6253d2e91adea0c35924b38411b0434fa18fcb90c52980ce68187dbcbbe40c?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27mistral-7b-v0.1.Q4_K_M.gguf%3B+filename%3D%22mistral-7b-v0.1.Q4_K_M.gguf%22%3B&Expires=1701341922&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcwMTM0MTkyMn19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy9hMi9jNi9hMmM2MzgyNzAxN2Q4MTkzMTc3N2E4NGViMGUxNTNiOGIzNDkwMmU0NjI4OWM2ODQ2MjNkODhjMmU2MjQzNzgyL2NlNjI1M2QyZTkxYWRlYTBjMzU5MjRiMzg0MTFiMDQzNGZhMThmY2I5MGM1Mjk4MGNlNjgxODdkYmNiYmU0MGM%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=elgQ3MLznWBvBgJ1KVez9rrfA1XG9Vw7Ix%7EYd0hI8ZA0Dxd9%7EFShCYVWqbutqZ-DAiSXgmmoIrAntIRT9ms32%7E8Vy-G3GWuAOkh4t7esGvzk92D5TaZJAuYWlrAIT%7EGULX8I8sGaccxk5IN2qzU85G1Mqj%7E4R%7ECPlLqaOF8ckbhsBMC6JDPanWJUHGh3hUWqfBX8UF4bYr-WJFeMKbpd962hEA2VeoIRhnxO9cqILlD09v9xcGeLFwFZIm3FHMJyWVbcAPDQgwsc1qaLPCoKiKuMoHFYGpCi-IpIac4iCPrB5gk7N5xrsQaHRXCqaV7lgqVZvvGO615X9aR37edm9g__&Key-Pair-Id=KVTP0A1DKRTAX [following]\n", + "--2023-11-27 10:58:42-- https://cdn-lfs.huggingface.co/repos/a2/c6/a2c63827017d81931777a84eb0e153b8b34902e46289c684623d88c2e6243782/ce6253d2e91adea0c35924b38411b0434fa18fcb90c52980ce68187dbcbbe40c?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27mistral-7b-v0.1.Q4_K_M.gguf%3B+filename%3D%22mistral-7b-v0.1.Q4_K_M.gguf%22%3B&Expires=1701341922&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcwMTM0MTkyMn19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy9hMi9jNi9hMmM2MzgyNzAxN2Q4MTkzMTc3N2E4NGViMGUxNTNiOGIzNDkwMmU0NjI4OWM2ODQ2MjNkODhjMmU2MjQzNzgyL2NlNjI1M2QyZTkxYWRlYTBjMzU5MjRiMzg0MTFiMDQzNGZhMThmY2I5MGM1Mjk4MGNlNjgxODdkYmNiYmU0MGM%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=elgQ3MLznWBvBgJ1KVez9rrfA1XG9Vw7Ix%7EYd0hI8ZA0Dxd9%7EFShCYVWqbutqZ-DAiSXgmmoIrAntIRT9ms32%7E8Vy-G3GWuAOkh4t7esGvzk92D5TaZJAuYWlrAIT%7EGULX8I8sGaccxk5IN2qzU85G1Mqj%7E4R%7ECPlLqaOF8ckbhsBMC6JDPanWJUHGh3hUWqfBX8UF4bYr-WJFeMKbpd962hEA2VeoIRhnxO9cqILlD09v9xcGeLFwFZIm3FHMJyWVbcAPDQgwsc1qaLPCoKiKuMoHFYGpCi-IpIac4iCPrB5gk7N5xrsQaHRXCqaV7lgqVZvvGO615X9aR37edm9g__&Key-Pair-Id=KVTP0A1DKRTAX\n", + "Resolving cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)... 108.157.162.99, 108.157.162.27, 108.157.162.58, ...\n", + "Connecting to cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)|108.157.162.99|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 4368438912 (4.1G) [binary/octet-stream]\n", + "Saving to: ‘mistral-7b-v0.1.Q4_K_M.gguf’\n", + "\n", + "mistral-7b-v0.1.Q4_ 100%[===================>] 4.07G 66.6MB/s in 50s \n", + "\n", + "2023-11-27 10:59:32 (83.4 MB/s) - ‘mistral-7b-v0.1.Q4_K_M.gguf’ saved [4368438912/4368438912]\n", + "\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "from contextlib import redirect_stdout, redirect_stderr\n", + "from os import devnull\n", + "from llama_cpp import Llama\n", + "from contextlib import suppress\n", + "\n", + "# Redirect stdout and stderr to /dev/null\n", + "with redirect_stdout(open(devnull, 'w')):\n", + " with redirect_stderr(open(devnull, 'w')):\n", + " llm = Llama(model_path=\"mistral-7b-v0.1.Q4_K_M.gguf\", main_gpu=1, n_ctx=1028, verbose=False)\n" + ], + "metadata": { + "id": "B9EPUDEFN2f-" + }, + "execution_count": 30, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "prompt = f\"\"\"Answer the given question based on the context. If you don't know the answer then respond with I don't know.\n", + "Context: {result['documents'][0][0][1024:]}\n", + "----\n", + "Q: {query}\n", + "A:\"\"\"" + ], + "metadata": { + "id": "d8e5msU5N2iw" + }, + "execution_count": 35, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "print(prompt)" + ], + "metadata": { + "id": "4Z1Cd5rqN2li", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 112 + }, + "outputId": "dec8ff5f-bb12-4e53-cf3f-7ed7bc357d85" + }, + "execution_count": 36, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Answer the given question based on the context. If you don't know the answer then respond with I don't know.\n", + "Context: \n", + "----\n", + "Q: NeurIPS \u001b[1;36m2023\u001b[0m LLM Efficiency Challenge Quickstart Guide\n", + "A:\n" + ], + "text/html": [ + "
Answer the given question based on the context. If you don't know the answer then respond with I don't know.\n",
+ "Context: \n",
+ "----\n",
+ "Q: NeurIPS 2023 LLM Efficiency Challenge Quickstart Guide\n",
+ "A:\n",
+ "
\n"
+ ]
+ },
+ "metadata": {}
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "output = llm(prompt, echo=False, stop=[\"\\n\\n\"], temperature=0.1,)"
+ ],
+ "metadata": {
+ "id": "jUk7K-QidwCo"
+ },
+ "execution_count": 47,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "llm(\n",
+ " \"Format the question and do not deviate too much - How do I read the results of the Learning Rate finder when using it like a callback? \",\n",
+ " echo=False, stop=[\"\\n\\n\"],\n",
+ " temperature=0.1,\n",
+ ")"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "UcrVj9N2dwE-",
+ "outputId": "11069789-9eeb-4cf0-e0c5-11baa853b5ff"
+ },
+ "execution_count": 46,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "{'id': 'cmpl-8f073b1c-1bee-4343-a6a8-436e6b239feb',\n",
+ " 'object': 'text_completion',\n",
+ " 'created': 1701083777,\n",
+ " 'model': 'mistral-7b-v0.1.Q4_K_M.gguf',\n",
+ " 'choices': [{'text': '1. What is the difference between the learning rate finder and the learning rate scheduler? 2. When should I use one over the other? 3. How do I read the results of the learning rate finder when using it like a callback?',\n",
+ " 'index': 0,\n",
+ " 'logprobs': None,\n",
+ " 'finish_reason': 'stop'}],\n",
+ " 'usage': {'prompt_tokens': 32, 'completion_tokens': 55, 'total_tokens': 87}}"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 46
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "print(output)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 272
+ },
+ "id": "MccY4wp-dwHX",
+ "outputId": "f882a993-fc71-4d12-9872-439b9960a669"
+ },
+ "execution_count": 48,
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "\u001b[1m{\u001b[0m\n",
+ " \u001b[32m'id'\u001b[0m: \u001b[32m'cmpl-1ced6c2e-1cf8-4975-a9fb-7ab4c22a76b8'\u001b[0m,\n",
+ " \u001b[32m'object'\u001b[0m: \u001b[32m'text_completion'\u001b[0m,\n",
+ " \u001b[32m'created'\u001b[0m: \u001b[1;36m1701083874\u001b[0m,\n",
+ " \u001b[32m'model'\u001b[0m: \u001b[32m'mistral-7b-v0.1.Q4_K_M.gguf'\u001b[0m,\n",
+ " \u001b[32m'choices'\u001b[0m: \u001b[1m[\u001b[0m\n",
+ " \u001b[1m{\u001b[0m\n",
+ " \u001b[32m'text'\u001b[0m: \u001b[32m' https://github.com/llm-efficiency/challenge/blob/main/quickstart.md\\n----'\u001b[0m,\n",
+ " \u001b[32m'index'\u001b[0m: \u001b[1;36m0\u001b[0m,\n",
+ " \u001b[32m'logprobs'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n",
+ " \u001b[32m'finish_reason'\u001b[0m: \u001b[32m'stop'\u001b[0m\n",
+ " \u001b[1m}\u001b[0m\n",
+ " \u001b[1m]\u001b[0m,\n",
+ " \u001b[32m'usage'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'prompt_tokens'\u001b[0m: \u001b[1;36m55\u001b[0m, \u001b[32m'completion_tokens'\u001b[0m: \u001b[1;36m28\u001b[0m, \u001b[32m'total_tokens'\u001b[0m: \u001b[1;36m83\u001b[0m\u001b[1m}\u001b[0m\n",
+ "\u001b[1m}\u001b[0m\n"
+ ],
+ "text/html": [
+ "{\n", + " 'id': 'cmpl-1ced6c2e-1cf8-4975-a9fb-7ab4c22a76b8',\n", + " 'object': 'text_completion',\n", + " 'created': 1701083874,\n", + " 'model': 'mistral-7b-v0.1.Q4_K_M.gguf',\n", + " 'choices': [\n", + " {\n", + " 'text': ' https://github.com/llm-efficiency/challenge/blob/main/quickstart.md\\n----',\n", + " 'index': 0,\n", + " 'logprobs': None,\n", + " 'finish_reason': 'stop'\n", + " }\n", + " ],\n", + " 'usage': {'prompt_tokens': 55, 'completion_tokens': 28, 'total_tokens': 83}\n", + "}\n", + "\n" + ] + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "source": [ + "print(output[\"choices\"][0][\"text\"])" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 49 + }, + "id": "00kznMb9dwJh", + "outputId": "a0ec26b1-0fbf-404d-f579-b8a68f38695d" + }, + "execution_count": 49, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + " \u001b[4;94mhttps://github.com/llm-efficiency/challenge/blob/main/quickstart.md\u001b[0m\n", + "----\n" + ], + "text/html": [ + "
https://github.com/llm-efficiency/challenge/blob/main/quickstart.md\n",
+ "----\n",
+ "
\n"
+ ]
+ },
+ "metadata": {}
+ }
+ ]
+ }
+ ]
+}
\ No newline at end of file