{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "from bs4 import BeautifulSoup"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "1: Install PyTorch Lightning¶\n",
      "\n",
      "For pip users\n",
      "pip install lightning\n",
      "\n",
      "\n",
      "\n",
      "For conda users\n",
      "conda install lightning -c conda-forge\n",
      "\n",
      "\n",
      "\n",
      "Or read the advanced install guide\n",
      "\n",
      "\n",
      "-------------------\n",
      "\n",
      "2: Define a LightningModule¶\n",
      "A LightningModule enables your PyTorch nn.Module to play together in complex ways inside the training_step (there is also an optional validation_step and test_step).\n",
      "import os\n",
      "from torch import optim, nn, utils, Tensor\n",
      "from torchvision.datasets import MNIST\n",
      "from torchvision.transforms import ToTensor\n",
      "import lightning as L\n",
      "\n",
      "# define any number of nn.Modules (or use your current ones)\n",
      "encoder = nn.Sequential(nn.Linear(28 * 28, 64), nn.ReLU(), nn.Linear(64, 3))\n",
      "decoder = nn.Sequential(nn.Linear(3, 64), nn.ReLU(), nn.Linear(64, 28 * 28))\n",
      "\n",
      "\n",
      "# define the LightningModule\n",
      "class LitAutoEncoder(L.LightningModule):\n",
      "    def __init__(self, encoder, decoder):\n",
      "        super().__init__()\n",
      "        self.encoder = encoder\n",
      "        self.decoder = decoder\n",
      "\n",
      "    def training_step(self, batch, batch_idx):\n",
      "        # training_step defines the train loop.\n",
      "        # it is independent of forward\n",
      "        x, y = batch\n",
      "        x = x.view(x.size(0), -1)\n",
      "        z = self.encoder(x)\n",
      "        x_hat = self.decoder(z)\n",
      "        loss = nn.functional.mse_loss(x_hat, x)\n",
      "        # Logging to TensorBoard (if installed) by default\n",
      "        self.log(\"train_loss\", loss)\n",
      "        return loss\n",
      "\n",
      "    def configure_optimizers(self):\n",
      "        optimizer = optim.Adam(self.parameters(), lr=1e-3)\n",
      "        return optimizer\n",
      "\n",
      "\n",
      "# init the autoencoder\n",
      "autoencoder = LitAutoEncoder(encoder, decoder)\n",
      "\n",
      "\n",
      "\n",
      "-------------------\n",
      "\n",
      "3: Define a dataset¶\n",
      "Lightning supports ANY iterable (DataLoader, numpy, etc…) for the train/val/test/predict splits.\n",
      "# setup data\n",
      "dataset = MNIST(os.getcwd(), download=True, transform=ToTensor())\n",
      "train_loader = utils.data.DataLoader(dataset)\n",
      "\n",
      "\n",
      "\n",
      "-------------------\n",
      "\n",
      "4: Train the model¶\n",
      "The Lightning Trainer “mixes” any LightningModule with any dataset and abstracts away all the engineering complexity needed for scale.\n",
      "# train the model (hint: here are some helpful Trainer arguments for rapid idea iteration)\n",
      "trainer = L.Trainer(limit_train_batches=100, max_epochs=1)\n",
      "trainer.fit(model=autoencoder, train_dataloaders=train_loader)\n",
      "\n",
      "\n",
      "The Lightning Trainer automates 40+ tricks including:\n",
      "\n",
      "Epoch and batch iteration\n",
      "optimizer.step(), loss.backward(), optimizer.zero_grad() calls\n",
      "Calling of model.eval(), enabling/disabling grads during evaluation\n",
      "Checkpoint Saving and Loading\n",
      "Tensorboard (see loggers options)\n",
      "Multi-GPU support\n",
      "TPU\n",
      "16-bit precision AMP support\n",
      "\n",
      "\n",
      "-------------------\n",
      "\n",
      "5: Use the model¶\n",
      "Once you’ve trained the model you can export to onnx, torchscript and put it into production or simply load the weights and run predictions.\n",
      "# load checkpoint\n",
      "checkpoint = \"./lightning_logs/version_0/checkpoints/epoch=0-step=100.ckpt\"\n",
      "autoencoder = LitAutoEncoder.load_from_checkpoint(checkpoint, encoder=encoder, decoder=decoder)\n",
      "\n",
      "# choose your trained nn.Module\n",
      "encoder = autoencoder.encoder\n",
      "encoder.eval()\n",
      "\n",
      "# embed 4 fake images!\n",
      "fake_image_batch = torch.rand(4, 28 * 28, device=autoencoder.device)\n",
      "embeddings = encoder(fake_image_batch)\n",
      "print(\"⚡\" * 20, \"\\nPredictions (4 image embeddings):\\n\", embeddings, \"\\n\", \"⚡\" * 20)\n",
      "\n",
      "\n",
      "\n",
      "-------------------\n",
      "\n",
      "6: Visualize training¶\n",
      "If you have tensorboard installed, you can use it for visualizing experiments.\n",
      "Run this on your commandline and open your browser to http://localhost:6006/\n",
      "tensorboard --logdir .\n",
      "\n",
      "\n",
      "\n",
      "-------------------\n",
      "\n",
      "7: Supercharge training¶\n",
      "Enable advanced training features using Trainer arguments. These are state-of-the-art techniques that are automatically integrated into your training loop without changes to your code.\n",
      "# train on 4 GPUs\n",
      "trainer = Trainer(\n",
      "    devices=4,\n",
      "    accelerator=\"gpu\",\n",
      " )\n",
      "\n",
      "# train 1TB+ parameter models with Deepspeed/fsdp\n",
      "trainer = L.Trainer(\n",
      "    devices=4,\n",
      "    accelerator=\"gpu\",\n",
      "    strategy=\"deepspeed_stage_2\",\n",
      "    precision=16\n",
      " )\n",
      "\n",
      "# 20+ helpful flags for rapid idea iteration\n",
      "trainer = L.Trainer(\n",
      "    max_epochs=10,\n",
      "    min_epochs=5,\n",
      "    overfit_batches=1\n",
      " )\n",
      "\n",
      "# access the latest state of the art techniques\n",
      "trainer = Trainer(callbacks=[StochasticWeightAveraging(...)])\n",
      "\n",
      "\n",
      "\n",
      "-------------------\n",
      "\n",
      "Maximize flexibility¶\n",
      "Lightning’s core guiding principle is to always provide maximal flexibility without ever hiding any of the PyTorch.\n",
      "Lightning offers 5 added degrees of flexibility depending on your project’s complexity.\n",
      "\n",
      "\n",
      "Customize training loop¶\n",
      "\n",
      "Inject custom code anywhere in the Training loop using any of the 20+ methods (Hooks) available in the LightningModule.\n",
      "class LitAutoEncoder(L.LightningModule):\n",
      "    def backward(self, loss):\n",
      "        loss.backward()\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "Extend the Trainer¶\n",
      "If you have multiple lines of code with similar functionalities, you can use callbacks to easily group them together and toggle all of those lines on or off at the same time.\n",
      "trainer = Trainer(callbacks=[AWSCheckpoints()])\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "Use a raw PyTorch loop¶\n",
      "For certain types of work at the bleeding-edge of research, Lightning offers experts full control of optimization or the training loop in various ways.\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "Manual optimization\n",
      "Automated training loop, but you own the optimization steps.\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "-------------------\n",
      "\n",
      "Customize training loop¶\n",
      "\n",
      "Inject custom code anywhere in the Training loop using any of the 20+ methods (Hooks) available in the LightningModule.\n",
      "class LitAutoEncoder(L.LightningModule):\n",
      "    def backward(self, loss):\n",
      "        loss.backward()\n",
      "\n",
      "\n",
      "\n",
      "-------------------\n",
      "\n",
      "Extend the Trainer¶\n",
      "If you have multiple lines of code with similar functionalities, you can use callbacks to easily group them together and toggle all of those lines on or off at the same time.\n",
      "trainer = Trainer(callbacks=[AWSCheckpoints()])\n",
      "\n",
      "\n",
      "\n",
      "-------------------\n",
      "\n",
      "Use a raw PyTorch loop¶\n",
      "For certain types of work at the bleeding-edge of research, Lightning offers experts full control of optimization or the training loop in various ways.\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "Manual optimization\n",
      "Automated training loop, but you own the optimization steps.\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "-------------------\n",
      "\n",
      "Next steps¶\n",
      "Depending on your use case, you might want to check one of these out next.\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "Level 2: Add a validation and test set\n",
      "Add validation and test sets to avoid over/underfitting.\n",
      "\n",
      "\n",
      "basic\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "See more examples\n",
      "See examples across computer vision, NLP, RL, etc...\n",
      "\n",
      "\n",
      "basic\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "Deploy your model\n",
      "Learn how to predict or put your model into production\n",
      "\n",
      "\n",
      "basic\n",
      "\n",
      "\n",
      "\n",
      "-------------------\n"
     ]
    }
   ],
   "source": [
    "url = \"https://lightning.ai/docs/pytorch/latest/starter/introduction.html\"\n",
    "\n",
    "response = requests.get(url)\n",
    "\n",
    "if response.status_code == 200:\n",
    "    soup = BeautifulSoup(response.text, 'html.parser')\n",
    "\n",
    "    div_content = soup.find('div', class_='rst-content')\n",
    "\n",
    "    if div_content:\n",
    "        sections = div_content.find_all('section')\n",
    "\n",
    "        for section in sections[1:]:\n",
    "\n",
    "            section_content = section.get_text()\n",
    "            print(section_content)\n",
    "            print('-------------------')\n",
    "    else:\n",
    "        print(\"Div element with class 'rst-content' not found. Check the HTML structure of the page.\")\n",
    "\n",
    "else:\n",
    "    print(f\"Failed to retrieve the page. Status code: {response.status_code}\")\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "from urllib.parse import urlparse, urlunparse,urljoin"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def remove_fragment(url):\n",
    "    parser_url = urlparse(url)\n",
    "\n",
    "    new_url = urlunparse(parser_url._replace(fragment=\"\"))\n",
    "    return new_url"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "from bs4 import BeautifulSoup\n",
    "from urllib.parse import urlparse, urljoin\n",
    "\n",
    "crawled_urls = set()\n",
    "\n",
    "\n",
    "# Function to fetch and extract links from a page\n",
    "def get_links(url):\n",
    "    try:\n",
    "        response = requests.get(url)\n",
    "        if response.status_code == 200:\n",
    "            soup = BeautifulSoup(response.text, \"html.parser\")\n",
    "            links = []\n",
    "            for a_tag in soup.find_all(\"a\", href=True):\n",
    "                link = a_tag[\"href\"]\n",
    "                links.append(link)\n",
    "            return links\n",
    "    except Exception as e:\n",
    "        print(f\"Failed to fetch links from {url}: {e}\")\n",
    "    return []\n",
    "\n",
    "\n",
    "# Function to recursively fetch links within the same domain\n",
    "def fetch_links_recursive(base_url, current_url, visited_urls, max_depth=4):\n",
    "    if current_url in visited_urls or max_depth == 0:\n",
    "        return\n",
    "\n",
    "    visited_urls.add(current_url)\n",
    "    links = get_links(current_url)\n",
    "    for link in links:\n",
    "        absolute_url = urljoin(current_url, link)\n",
    "        parsed_url = urlparse(absolute_url)\n",
    "        if parsed_url.netloc == base_url.netloc:\n",
    "            crawled_urls.add(remove_fragment(absolute_url))\n",
    "            fetch_links_recursive(base_url, absolute_url, visited_urls, max_depth - 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "base_url = urlparse(\n",
    "    \"https://lightning.ai/docs/pytorch/latest/starter/introduction.html\"\n",
    ")\n",
    "visited_urls = set()\n",
    "fetch_links_recursive(base_url, base_url.geturl(), visited_urls)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "275"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(crawled_urls)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'https://lightning.ai/docs/fabric/',\n",
       " 'https://lightning.ai/docs/pytorch/latest/_images/custom_loop.png',\n",
       " 'https://lightning.ai/docs/pytorch/latest/_images/ddp.gif',\n",
       " 'https://lightning.ai/docs/pytorch/latest/_modules/lightning/fabric/utilities/throughput.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/_modules/lightning/pytorch/core/module.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/_modules/lightning/pytorch/trainer/trainer.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/_sources/accelerators/gpu.rst.txt',\n",
       " 'https://lightning.ai/docs/pytorch/latest/_sources/accelerators/tpu.rst.txt',\n",
       " 'https://lightning.ai/docs/pytorch/latest/_sources/advanced/speed.rst.txt',\n",
       " 'https://lightning.ai/docs/pytorch/latest/_sources/api_references.rst.txt',\n",
       " 'https://lightning.ai/docs/pytorch/latest/_sources/common/checkpointing.rst.txt',\n",
       " 'https://lightning.ai/docs/pytorch/latest/_sources/common/index.rst.txt',\n",
       " 'https://lightning.ai/docs/pytorch/latest/_sources/common/lightning_module.rst.txt',\n",
       " 'https://lightning.ai/docs/pytorch/latest/_sources/common/trainer.rst.txt',\n",
       " 'https://lightning.ai/docs/pytorch/latest/_sources/community/index.rst.txt',\n",
       " 'https://lightning.ai/docs/pytorch/latest/_sources/deploy/production.rst.txt',\n",
       " 'https://lightning.ai/docs/pytorch/latest/_sources/glossary/index.rst.txt',\n",
       " 'https://lightning.ai/docs/pytorch/latest/_sources/index.rst.txt',\n",
       " 'https://lightning.ai/docs/pytorch/latest/_sources/levels/advanced.rst.txt',\n",
       " 'https://lightning.ai/docs/pytorch/latest/_sources/levels/basic_level_2.rst.txt',\n",
       " 'https://lightning.ai/docs/pytorch/latest/_sources/levels/core_skills.rst.txt',\n",
       " 'https://lightning.ai/docs/pytorch/latest/_sources/levels/expert.rst.txt',\n",
       " 'https://lightning.ai/docs/pytorch/latest/_sources/levels/intermediate.rst.txt',\n",
       " 'https://lightning.ai/docs/pytorch/latest/_sources/model/build_model_advanced.rst.txt',\n",
       " 'https://lightning.ai/docs/pytorch/latest/_sources/starter/installation.rst.txt',\n",
       " 'https://lightning.ai/docs/pytorch/latest/_sources/starter/introduction.rst.txt',\n",
       " 'https://lightning.ai/docs/pytorch/latest/_sources/tutorials.rst.txt',\n",
       " 'https://lightning.ai/docs/pytorch/latest/_sources/upgrade/from_1_4.rst.txt',\n",
       " 'https://lightning.ai/docs/pytorch/latest/_sources/upgrade/from_1_5.rst.txt',\n",
       " 'https://lightning.ai/docs/pytorch/latest/_sources/upgrade/from_1_6.rst.txt',\n",
       " 'https://lightning.ai/docs/pytorch/latest/_sources/upgrade/from_1_7.rst.txt',\n",
       " 'https://lightning.ai/docs/pytorch/latest/_sources/upgrade/from_1_8.rst.txt',\n",
       " 'https://lightning.ai/docs/pytorch/latest/_sources/upgrade/from_1_9.rst.txt',\n",
       " 'https://lightning.ai/docs/pytorch/latest/_sources/upgrade/from_2_0.rst.txt',\n",
       " 'https://lightning.ai/docs/pytorch/latest/_sources/upgrade/migration_guide.rst.txt',\n",
       " 'https://lightning.ai/docs/pytorch/latest/_sources/visualize/loggers.rst.txt',\n",
       " 'https://lightning.ai/docs/pytorch/latest/accelerators/accelerator_prepare.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/accelerators/gpu.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/accelerators/gpu_advanced.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/accelerators/gpu_basic.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/accelerators/gpu_expert.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/accelerators/gpu_faq.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/accelerators/gpu_intermediate.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/accelerators/mps.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/accelerators/tpu.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/accelerators/tpu_advanced.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/accelerators/tpu_basic.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/accelerators/tpu_faq.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/accelerators/tpu_intermediate.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/advanced/ddp_optimizations.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/advanced/finetuning.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/advanced/model_init.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/advanced/model_parallel.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/advanced/model_parallel/deepspeed.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/advanced/model_parallel/fsdp.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/advanced/post_training_quantization.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/advanced/pretrained.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/advanced/pruning_quantization.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/advanced/speed.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/advanced/strategy_registry.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/advanced/training_tricks.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/advanced/transfer_learning.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/advanced/warnings.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.accelerators.Accelerator.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.accelerators.CPUAccelerator.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.accelerators.CUDAAccelerator.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.accelerators.XLAAccelerator.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.BackboneFinetuning.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.BaseFinetuning.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.BasePredictionWriter.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.BatchSizeFinder.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.Callback.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.DeviceStatsMonitor.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.EarlyStopping.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.GradientAccumulationScheduler.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.LambdaCallback.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.LearningRateFinder.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.LearningRateMonitor.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.ModelCheckpoint.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.ModelPruning.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.ModelSummary.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.OnExceptionCheckpoint.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.ProgressBar.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.RichModelSummary.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.RichProgressBar.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.SpikeDetection.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.StochasticWeightAveraging.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.TQDMProgressBar.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.ThroughputMonitor.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.Timer.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.cli.LightningArgumentParser.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.cli.LightningCLI.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.cli.SaveConfigCallback.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.core.LightningDataModule.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.core.LightningModule.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.core.hooks.CheckpointHooks.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.core.hooks.DataHooks.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.core.hooks.ModelHooks.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.core.mixins.HyperparametersMixin.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.core.optimizer.LightningOptimizer.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.loggers.comet.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.loggers.csv_logs.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.loggers.logger.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.loggers.mlflow.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.loggers.neptune.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.loggers.tensorboard.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.loggers.wandb.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.LayerSync.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.TorchSyncBatchNorm.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.environments.ClusterEnvironment.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.environments.KubeflowEnvironment.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.environments.LSFEnvironment.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.environments.LightningEnvironment.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.environments.MPIEnvironment.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.environments.SLURMEnvironment.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.environments.TorchElasticEnvironment.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.environments.XLAEnvironment.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.io.AsyncCheckpointIO.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.io.CheckpointIO.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.io.TorchCheckpointIO.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.io.XLACheckpointIO.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.precision.BitsandbytesPrecision.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.precision.DeepSpeedPrecision.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.precision.DoublePrecision.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.precision.FSDPPrecision.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.precision.HalfPrecision.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.precision.MixedPrecision.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.precision.Precision.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.precision.TransformerEnginePrecision.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.precision.XLAPrecision.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.profilers.AdvancedProfiler.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.profilers.PassThroughProfiler.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.profilers.Profiler.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.profilers.PyTorchProfiler.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.profilers.SimpleProfiler.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.profilers.XLAProfiler.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.strategies.DDPStrategy.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.strategies.DeepSpeedStrategy.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.strategies.FSDPStrategy.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.strategies.ParallelStrategy.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.strategies.SingleDeviceStrategy.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.strategies.SingleDeviceXLAStrategy.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.strategies.Strategy.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.strategies.XLAStrategy.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.trainer.trainer.Trainer.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.tuner.tuning.Tuner.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.utilities.combined_loader.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.utilities.data.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.utilities.deepspeed.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.utilities.memory.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.utilities.model_summary.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.utilities.parsing.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.utilities.rank_zero.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.utilities.seed.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.utilities.warnings.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/api_references.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/benchmarking/benchmarks.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/cli/lightning_cli.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/clouds/cluster.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/clouds/cluster_advanced.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/clouds/cluster_intermediate_2.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/common/checkpointing.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/common/checkpointing_advanced.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/common/checkpointing_basic.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/common/checkpointing_expert.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/common/checkpointing_intermediate.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/common/checkpointing_migration.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/common/console_logs.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/common/early_stopping.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/common/evaluation.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/common/evaluation_basic.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/common/hyperparameters.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/common/index.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/common/lightning_module.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/common/notebooks.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/common/optimization.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/common/precision.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/common/precision_expert.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/common/progress_bar.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/common/remote_fs.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/common/trainer.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/common_usecases.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/community/governance.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/community/index.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/data/data.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/data/datamodule.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/data/iterables.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/debug/debugging.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/deploy/production.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/deploy/production_advanced.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/deploy/production_advanced_2.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/deploy/production_basic.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/deploy/production_intermediate.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/expertise_levels.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/extensions/accelerator.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/extensions/callbacks.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/extensions/logging.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/extensions/plugins.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/extensions/strategy.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/generated/BECOMING_A_CORE_CONTRIBUTOR.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/generated/CHANGELOG.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/generated/CODE_OF_CONDUCT.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/generated/CONTRIBUTING.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/glossary/index.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/index.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/integrations/hpu/index.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/integrations/ipu/index.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/integrations/strategies/colossalai.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/levels/advanced.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/levels/advanced_level_15.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/levels/advanced_level_16.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/levels/advanced_level_17.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/levels/advanced_level_18.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/levels/advanced_level_19.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/levels/advanced_level_20.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/levels/advanced_level_21.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/levels/advanced_level_22.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/levels/basic_level_2.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/levels/basic_level_5.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/levels/core_level_6.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/levels/core_skills.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/levels/expert.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/levels/expert_level_23.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/levels/expert_level_24.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/levels/expert_level_27.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/levels/intermediate.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/levels/intermediate_level_10.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/levels/intermediate_level_11.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/levels/intermediate_level_12.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/levels/intermediate_level_13.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/levels/intermediate_level_14.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/levels/intermediate_level_7.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/levels/intermediate_level_9.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/model/build_model.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/model/build_model_advanced.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/model/manual_optimization.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/model/own_your_loop.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/model/train_model_basic.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/notebooks/course_UvA-DL/01-introduction-to-pytorch.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/notebooks/course_UvA-DL/02-activation-functions.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/notebooks/course_UvA-DL/03-initialization-and-optimization.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/notebooks/course_UvA-DL/04-inception-resnet-densenet.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/notebooks/course_UvA-DL/05-transformers-and-MH-attention.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/notebooks/course_UvA-DL/06-graph-neural-networks.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/notebooks/course_UvA-DL/07-deep-energy-based-generative-models.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/notebooks/course_UvA-DL/08-deep-autoencoders.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/notebooks/course_UvA-DL/09-normalizing-flows.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/notebooks/course_UvA-DL/10-autoregressive-image-modeling.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/notebooks/course_UvA-DL/11-vision-transformer.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/notebooks/lightning_examples/cifar10-baseline.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/notebooks/lightning_examples/datamodules.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/notebooks/lightning_examples/finetuning-scheduler.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/past_versions.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/starter/converting.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/starter/installation.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/starter/introduction.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/starter/style_guide.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/tuning/profiler.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/tuning/profiler_expert.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/tutorials.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/upgrade/from_1_4.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/upgrade/from_1_5.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/upgrade/from_1_6.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/upgrade/from_1_7.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/upgrade/from_1_8.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/upgrade/from_1_9.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/upgrade/from_2_0.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/upgrade/migration_guide.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/versioning.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/visualize/experiment_managers.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/visualize/loggers.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/visualize/logging_advanced.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/visualize/logging_basic.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/visualize/logging_expert.html',\n",
       " 'https://lightning.ai/docs/pytorch/latest/visualize/logging_intermediate.html'}"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "crawled_urls"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "from bs4 import BeautifulSoup\n",
    "import pandas as pd\n",
    "\n",
    "\n",
    "def extract_sections_to_csv(url, output_file):\n",
    "    # Send an HTTP GET request to the URL\n",
    "    response = requests.get(url)\n",
    "\n",
    "    # Check if the request was successful\n",
    "    if response.status_code == 200:\n",
    "        # Parse the HTML content of the page\n",
    "        soup = BeautifulSoup(response.text, \"html.parser\")\n",
    "\n",
    "        # Find the div element with class \"rst-content\"\n",
    "        div_content = soup.find(\"div\", class_=\"rst-content\")\n",
    "\n",
    "        if div_content:\n",
    "            # Find all section tags within the div_content\n",
    "            sections = div_content.find_all(\"section\")\n",
    "\n",
    "            # Create a list to store the sections\n",
    "            section_contents = []\n",
    "\n",
    "            for section in sections[1:]:\n",
    "                # Extract the content of each section\n",
    "                section_content = section.get_text()\n",
    "                section_contents.append(section_content)\n",
    "\n",
    "            # Create a DataFrame with URL and Section Content columns\n",
    "            df = pd.DataFrame(\n",
    "                {\n",
    "                    \"URL\": [url] * len(section_contents),\n",
    "                    \"Section Content\": section_contents,\n",
    "                }\n",
    "            )\n",
    "\n",
    "            # Save the DataFrame to a CSV file\n",
    "            df.to_csv(output_file, index=False)\n",
    "\n",
    "        else:\n",
    "            print(\n",
    "                \"Div element with class 'rst-content' not found. Check the HTML structure of the page.\"\n",
    "            )\n",
    "\n",
    "    else:\n",
    "        print(f\"Failed to retrieve the page. Status code: {response.status_code}\")\n",
    "\n",
    "\n",
    "# Example usage\n",
    "url = \"https://lightning.ai/docs/pytorch/latest\"\n",
    "output_file = \"sections.csv\"\n",
    "extract_sections_to_csv(url, output_file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "212333e70eb6447e84384a2364a8f179",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Output()"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
       "</pre>\n"
      ],
      "text/plain": [
       "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
       "</pre>\n"
      ],
      "text/plain": [
       "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
       "</pre>\n"
      ],
      "text/plain": [
       "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
       "</pre>\n"
      ],
      "text/plain": [
       "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
       "</pre>\n"
      ],
      "text/plain": [
       "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
       "</pre>\n"
      ],
      "text/plain": [
       "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
       "</pre>\n"
      ],
      "text/plain": [
       "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
       "</pre>\n"
      ],
      "text/plain": [
       "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
       "</pre>\n"
      ],
      "text/plain": [
       "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
       "</pre>\n"
      ],
      "text/plain": [
       "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">C:\\Users\\alfat\\AppData\\Local\\Temp\\ipykernel_7584\\215768005.py:13: MarkupResemblesLocatorWarning: The input looks \n",
       "more like a filename than markup. You may want to open this file and pass the filehandle into Beautiful Soup.\n",
       "  soup = BeautifulSoup(response.text, \"html.parser\")\n",
       "</pre>\n"
      ],
      "text/plain": [
       "C:\\Users\\alfat\\AppData\\Local\\Temp\\ipykernel_7584\\215768005.py:13: MarkupResemblesLocatorWarning: The input looks \n",
       "more like a filename than markup. You may want to open this file and pass the filehandle into Beautiful Soup.\n",
       "  soup = BeautifulSoup(response.text, \"html.parser\")\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
       "</pre>\n"
      ],
      "text/plain": [
       "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
       "</pre>\n"
      ],
      "text/plain": [
       "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
       "</pre>\n"
      ],
      "text/plain": [
       "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
       "</pre>\n"
      ],
      "text/plain": [
       "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
       "</pre>\n"
      ],
      "text/plain": [
       "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
       "</pre>\n"
      ],
      "text/plain": [
       "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
       "</pre>\n"
      ],
      "text/plain": [
       "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
       "</pre>\n"
      ],
      "text/plain": [
       "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
       "</pre>\n"
      ],
      "text/plain": [
       "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
       "</pre>\n"
      ],
      "text/plain": [
       "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
       "</pre>\n"
      ],
      "text/plain": [
       "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
       "</pre>\n"
      ],
      "text/plain": [
       "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
       "</pre>\n"
      ],
      "text/plain": [
       "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
       "</pre>\n"
      ],
      "text/plain": [
       "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
       "</pre>\n"
      ],
      "text/plain": [
       "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
       "</pre>\n"
      ],
      "text/plain": [
       "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
       "</pre>\n"
      ],
      "text/plain": [
       "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
       "</pre>\n"
      ],
      "text/plain": [
       "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
       "</pre>\n"
      ],
      "text/plain": [
       "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
       "</pre>\n"
      ],
      "text/plain": [
       "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
       "</pre>\n"
      ],
      "text/plain": [
       "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
       "</pre>\n"
      ],
      "text/plain": [
       "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
       "</pre>\n"
      ],
      "text/plain": [
       "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"></pre>\n"
      ],
      "text/plain": []
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from rich.progress import track\n",
    "\n",
    "for i, url in enumerate(track(crawled_urls)):\n",
    "    output_file = f\"E:/Projects/Hackathons/StudyBot/crawled/{i}.csv\"\n",
    "    extract_sections_to_csv(url, output_file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "d:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\pinecone\\index.py:4: TqdmExperimentalWarning: Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)\n",
      "  from tqdm.autonotebook import tqdm\n"
     ]
    }
   ],
   "source": [
    "from langchain.embeddings import HuggingFaceEmbeddings\n",
    "import pandas as pd\n",
    "from glob import glob\n",
    "import pinecone"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "pinecone.init(api_key=\"2c94d392-e7de-40d6-8dbc-b485fac62af2\", environment=\"gcp-starter\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "from chromadb.utils import embedding_functions\n",
    "\n",
    "\n",
    "import pandas as pd\n",
    "from glob import glob"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "import chromadb\n",
    "\n",
    "chroma_client = chromadb.PersistentClient(path=\"db\")\n",
    "\n",
    "# collection = chroma_client.create_collection(name=\"test\")\n",
    "collection = chroma_client.get_collection(name=\"test\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "csvs = glob(\"crawled/*.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "from rich.progress import track\n",
    "from rich import print\n",
    "from os.path import basename"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "False\n",
      "\n",
      "===================================BUG REPORT===================================\n",
      "================================================================================\n",
      "The following directories listed in your path were found to be non-existent: {WindowsPath('/Anaconda3/envs/pytorchML/lib'), WindowsPath('D')}\n",
      "The following directories listed in your path were found to be non-existent: {WindowsPath('vs/workbench/api/node/extensionHostProcess')}\n",
      "The following directories listed in your path were found to be non-existent: {WindowsPath('module'), WindowsPath('/matplotlib_inline.backend_inline')}\n",
      "CUDA_SETUP: WARNING! libcudart.so not found in any environmental path. Searching in backup paths...\n",
      "The following directories listed in your path were found to be non-existent: {WindowsPath('/usr/local/cuda/lib64')}\n",
      "DEBUG: Possible options found for libcudart.so: set()\n",
      "CUDA SETUP: PyTorch settings found: CUDA_VERSION=117, Highest Compute Capability: 7.5.\n",
      "CUDA SETUP: To manually override the PyTorch CUDA version please see:https://github.com/TimDettmers/bitsandbytes/blob/main/how_to_use_nonpytorch_cuda.md\n",
      "CUDA SETUP: Loading binary d:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\bitsandbytes\\libbitsandbytes_cuda117.so...\n",
      "argument of type 'WindowsPath' is not iterable\n",
      "CUDA SETUP: Problem: The main issue seems to be that the main CUDA runtime library was not detected.\n",
      "CUDA SETUP: Solution 1: To solve the issue the libcudart.so location needs to be added to the LD_LIBRARY_PATH variable\n",
      "CUDA SETUP: Solution 1a): Find the cuda runtime library via: find / -name libcudart.so 2>/dev/null\n",
      "CUDA SETUP: Solution 1b): Once the library is found add it to the LD_LIBRARY_PATH: export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:FOUND_PATH_FROM_1a\n",
      "CUDA SETUP: Solution 1c): For a permanent solution add the export from 1b into your .bashrc file, located at ~/.bashrc\n",
      "CUDA SETUP: Solution 2: If no library was found in step 1a) you need to install CUDA.\n",
      "CUDA SETUP: Solution 2a): Download CUDA install script: wget https://github.com/TimDettmers/bitsandbytes/blob/main/cuda_install.sh\n",
      "CUDA SETUP: Solution 2b): Install desired CUDA version to desired location. The syntax is bash cuda_install.sh CUDA_VERSION PATH_TO_INSTALL_INTO.\n",
      "CUDA SETUP: Solution 2b): For example, \"bash cuda_install.sh 113 ~/local/\" will download CUDA 11.3 and install into the folder ~/local\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "d:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\bitsandbytes\\cuda_setup\\main.py:166: UserWarning: Welcome to bitsandbytes. For bug reports, please run\n",
      "\n",
      "python -m bitsandbytes\n",
      "\n",
      "\n",
      "  warn(msg)\n",
      "d:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\bitsandbytes\\cuda_setup\\main.py:166: UserWarning: D:\\Anaconda3\\envs\\pytorchML did not contain ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] as expected! Searching further paths...\n",
      "  warn(msg)\n"
     ]
    },
    {
     "ename": "RuntimeError",
     "evalue": "Failed to import transformers.models.bert.modeling_bert because of the following error (look up to see its traceback):\n\n        CUDA Setup failed despite GPU being available. Please run the following command to get more information:\n\n        python -m bitsandbytes\n\n        Inspect the output of the command and see if you can locate CUDA libraries. You might need to add them\n        to your LD_LIBRARY_PATH. If you suspect a bug, please take the information from python -m bitsandbytes\n        and open an issue at: https://github.com/TimDettmers/bitsandbytes/issues",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
      "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\transformers\\utils\\import_utils.py:1130\u001b[0m, in \u001b[0;36m_LazyModule._get_module\u001b[1;34m(self, module_name)\u001b[0m\n\u001b[0;32m   1129\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m-> 1130\u001b[0m     \u001b[39mreturn\u001b[39;00m importlib\u001b[39m.\u001b[39;49mimport_module(\u001b[39m\"\u001b[39;49m\u001b[39m.\u001b[39;49m\u001b[39m\"\u001b[39;49m \u001b[39m+\u001b[39;49m module_name, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m\u001b[39m__name__\u001b[39;49m)\n\u001b[0;32m   1131\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m e:\n",
      "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\importlib\\__init__.py:126\u001b[0m, in \u001b[0;36mimport_module\u001b[1;34m(name, package)\u001b[0m\n\u001b[0;32m    125\u001b[0m         level \u001b[39m+\u001b[39m\u001b[39m=\u001b[39m \u001b[39m1\u001b[39m\n\u001b[1;32m--> 126\u001b[0m \u001b[39mreturn\u001b[39;00m _bootstrap\u001b[39m.\u001b[39;49m_gcd_import(name[level:], package, level)\n",
      "File \u001b[1;32m<frozen importlib._bootstrap>:1050\u001b[0m, in \u001b[0;36m_gcd_import\u001b[1;34m(name, package, level)\u001b[0m\n",
      "File \u001b[1;32m<frozen importlib._bootstrap>:1027\u001b[0m, in \u001b[0;36m_find_and_load\u001b[1;34m(name, import_)\u001b[0m\n",
      "File \u001b[1;32m<frozen importlib._bootstrap>:1006\u001b[0m, in \u001b[0;36m_find_and_load_unlocked\u001b[1;34m(name, import_)\u001b[0m\n",
      "File \u001b[1;32m<frozen importlib._bootstrap>:688\u001b[0m, in \u001b[0;36m_load_unlocked\u001b[1;34m(spec)\u001b[0m\n",
      "File \u001b[1;32m<frozen importlib._bootstrap_external>:883\u001b[0m, in \u001b[0;36mexec_module\u001b[1;34m(self, module)\u001b[0m\n",
      "File \u001b[1;32m<frozen importlib._bootstrap>:241\u001b[0m, in \u001b[0;36m_call_with_frames_removed\u001b[1;34m(f, *args, **kwds)\u001b[0m\n",
      "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\transformers\\models\\bert\\modeling_bert.py:42\u001b[0m\n\u001b[0;32m     31\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mmodeling_outputs\u001b[39;00m \u001b[39mimport\u001b[39;00m (\n\u001b[0;32m     32\u001b[0m     BaseModelOutputWithPastAndCrossAttentions,\n\u001b[0;32m     33\u001b[0m     BaseModelOutputWithPoolingAndCrossAttentions,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m     40\u001b[0m     TokenClassifierOutput,\n\u001b[0;32m     41\u001b[0m )\n\u001b[1;32m---> 42\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mmodeling_utils\u001b[39;00m \u001b[39mimport\u001b[39;00m PreTrainedModel\n\u001b[0;32m     43\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mpytorch_utils\u001b[39;00m \u001b[39mimport\u001b[39;00m apply_chunking_to_forward, find_pruneable_heads_and_indices, prune_linear_layer\n",
      "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\transformers\\modeling_utils.py:88\u001b[0m\n\u001b[0;32m     87\u001b[0m \u001b[39mif\u001b[39;00m is_accelerate_available():\n\u001b[1;32m---> 88\u001b[0m     \u001b[39mfrom\u001b[39;00m \u001b[39maccelerate\u001b[39;00m \u001b[39mimport\u001b[39;00m dispatch_model, infer_auto_device_map, init_empty_weights\n\u001b[0;32m     89\u001b[0m     \u001b[39mfrom\u001b[39;00m \u001b[39maccelerate\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mhooks\u001b[39;00m \u001b[39mimport\u001b[39;00m add_hook_to_module\n",
      "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\accelerate\\__init__.py:3\u001b[0m\n\u001b[0;32m      1\u001b[0m __version__ \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39m0.21.0\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m----> 3\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39maccelerator\u001b[39;00m \u001b[39mimport\u001b[39;00m Accelerator\n\u001b[0;32m      4\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39mbig_modeling\u001b[39;00m \u001b[39mimport\u001b[39;00m (\n\u001b[0;32m      5\u001b[0m     cpu_offload,\n\u001b[0;32m      6\u001b[0m     cpu_offload_with_hook,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m     11\u001b[0m     load_checkpoint_and_dispatch,\n\u001b[0;32m     12\u001b[0m )\n",
      "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\accelerate\\accelerator.py:35\u001b[0m\n\u001b[0;32m     33\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mtorch\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mutils\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mhooks\u001b[39;00m \u001b[39mas\u001b[39;00m \u001b[39mhooks\u001b[39;00m\n\u001b[1;32m---> 35\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39mcheckpointing\u001b[39;00m \u001b[39mimport\u001b[39;00m load_accelerator_state, load_custom_state, save_accelerator_state, save_custom_state\n\u001b[0;32m     36\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39mdata_loader\u001b[39;00m \u001b[39mimport\u001b[39;00m DataLoaderDispatcher, prepare_data_loader, skip_first_batches\n",
      "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\accelerate\\checkpointing.py:24\u001b[0m\n\u001b[0;32m     22\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mtorch\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mcuda\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mamp\u001b[39;00m \u001b[39mimport\u001b[39;00m GradScaler\n\u001b[1;32m---> 24\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39mutils\u001b[39;00m \u001b[39mimport\u001b[39;00m (\n\u001b[0;32m     25\u001b[0m     MODEL_NAME,\n\u001b[0;32m     26\u001b[0m     OPTIMIZER_NAME,\n\u001b[0;32m     27\u001b[0m     RNG_STATE_NAME,\n\u001b[0;32m     28\u001b[0m     SCALER_NAME,\n\u001b[0;32m     29\u001b[0m     SCHEDULER_NAME,\n\u001b[0;32m     30\u001b[0m     get_pretty_name,\n\u001b[0;32m     31\u001b[0m     is_tpu_available,\n\u001b[0;32m     32\u001b[0m     is_xpu_available,\n\u001b[0;32m     33\u001b[0m     save,\n\u001b[0;32m     34\u001b[0m )\n\u001b[0;32m     37\u001b[0m \u001b[39mif\u001b[39;00m is_tpu_available(check_device\u001b[39m=\u001b[39m\u001b[39mFalse\u001b[39;00m):\n",
      "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\accelerate\\utils\\__init__.py:131\u001b[0m\n\u001b[0;32m    122\u001b[0m     \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39mdeepspeed\u001b[39;00m \u001b[39mimport\u001b[39;00m (\n\u001b[0;32m    123\u001b[0m         DeepSpeedEngineWrapper,\n\u001b[0;32m    124\u001b[0m         DeepSpeedOptimizerWrapper,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    128\u001b[0m         HfDeepSpeedConfig,\n\u001b[0;32m    129\u001b[0m     )\n\u001b[1;32m--> 131\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39mbnb\u001b[39;00m \u001b[39mimport\u001b[39;00m has_4bit_bnb_layers, load_and_quantize_model\n\u001b[0;32m    132\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39mfsdp_utils\u001b[39;00m \u001b[39mimport\u001b[39;00m load_fsdp_model, load_fsdp_optimizer, save_fsdp_model, save_fsdp_optimizer\n",
      "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\accelerate\\utils\\bnb.py:42\u001b[0m\n\u001b[0;32m     41\u001b[0m \u001b[39mif\u001b[39;00m is_bnb_available():\n\u001b[1;32m---> 42\u001b[0m     \u001b[39mimport\u001b[39;00m \u001b[39mbitsandbytes\u001b[39;00m \u001b[39mas\u001b[39;00m \u001b[39mbnb\u001b[39;00m\n\u001b[0;32m     44\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mcopy\u001b[39;00m \u001b[39mimport\u001b[39;00m deepcopy\n",
      "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\bitsandbytes\\__init__.py:6\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[39m# Copyright (c) Facebook, Inc. and its affiliates.\u001b[39;00m\n\u001b[0;32m      2\u001b[0m \u001b[39m#\u001b[39;00m\n\u001b[0;32m      3\u001b[0m \u001b[39m# This source code is licensed under the MIT license found in the\u001b[39;00m\n\u001b[0;32m      4\u001b[0m \u001b[39m# LICENSE file in the root directory of this source tree.\u001b[39;00m\n\u001b[1;32m----> 6\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m \u001b[39mimport\u001b[39;00m cuda_setup, utils, research\n\u001b[0;32m      7\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39mautograd\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39m_functions\u001b[39;00m \u001b[39mimport\u001b[39;00m (\n\u001b[0;32m      8\u001b[0m     MatmulLtState,\n\u001b[0;32m      9\u001b[0m     bmm_cublas,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m     13\u001b[0m     matmul_4bit\n\u001b[0;32m     14\u001b[0m )\n",
      "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\bitsandbytes\\research\\__init__.py:1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m \u001b[39mimport\u001b[39;00m nn\n\u001b[0;32m      2\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39mautograd\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39m_functions\u001b[39;00m \u001b[39mimport\u001b[39;00m (\n\u001b[0;32m      3\u001b[0m     switchback_bnb,\n\u001b[0;32m      4\u001b[0m     matmul_fp8_global,\n\u001b[0;32m      5\u001b[0m     matmul_fp8_mixed,\n\u001b[0;32m      6\u001b[0m )\n",
      "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\bitsandbytes\\research\\nn\\__init__.py:1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39mmodules\u001b[39;00m \u001b[39mimport\u001b[39;00m LinearFP8Mixed, LinearFP8Global\n",
      "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\bitsandbytes\\research\\nn\\modules.py:8\u001b[0m\n\u001b[0;32m      7\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mbitsandbytes\u001b[39;00m \u001b[39mas\u001b[39;00m \u001b[39mbnb\u001b[39;00m\n\u001b[1;32m----> 8\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mbitsandbytes\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39moptim\u001b[39;00m \u001b[39mimport\u001b[39;00m GlobalOptimManager\n\u001b[0;32m      9\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mbitsandbytes\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mutils\u001b[39;00m \u001b[39mimport\u001b[39;00m OutlierTracer, find_outlier_dims\n",
      "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\bitsandbytes\\optim\\__init__.py:6\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[39m# Copyright (c) Facebook, Inc. and its affiliates.\u001b[39;00m\n\u001b[0;32m      2\u001b[0m \u001b[39m#\u001b[39;00m\n\u001b[0;32m      3\u001b[0m \u001b[39m# This source code is licensed under the MIT license found in the\u001b[39;00m\n\u001b[0;32m      4\u001b[0m \u001b[39m# LICENSE file in the root directory of this source tree.\u001b[39;00m\n\u001b[1;32m----> 6\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mbitsandbytes\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mcextension\u001b[39;00m \u001b[39mimport\u001b[39;00m COMPILED_WITH_CUDA\n\u001b[0;32m      8\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39madagrad\u001b[39;00m \u001b[39mimport\u001b[39;00m Adagrad, Adagrad8bit, Adagrad32bit\n",
      "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\bitsandbytes\\cextension.py:20\u001b[0m\n\u001b[0;32m     19\u001b[0m     CUDASetup\u001b[39m.\u001b[39mget_instance()\u001b[39m.\u001b[39mprint_log_stack()\n\u001b[1;32m---> 20\u001b[0m     \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\u001b[39m'''\u001b[39m\n\u001b[0;32m     21\u001b[0m \u001b[39m    CUDA Setup failed despite GPU being available. Please run the following command to get more information:\u001b[39m\n\u001b[0;32m     22\u001b[0m \n\u001b[0;32m     23\u001b[0m \u001b[39m    python -m bitsandbytes\u001b[39m\n\u001b[0;32m     24\u001b[0m \n\u001b[0;32m     25\u001b[0m \u001b[39m    Inspect the output of the command and see if you can locate CUDA libraries. You might need to add them\u001b[39m\n\u001b[0;32m     26\u001b[0m \u001b[39m    to your LD_LIBRARY_PATH. If you suspect a bug, please take the information from python -m bitsandbytes\u001b[39m\n\u001b[0;32m     27\u001b[0m \u001b[39m    and open an issue at: https://github.com/TimDettmers/bitsandbytes/issues\u001b[39m\u001b[39m'''\u001b[39m)\n\u001b[0;32m     28\u001b[0m lib\u001b[39m.\u001b[39mcadam32bit_grad_fp32 \u001b[39m# runs on an error if the library could not be found -> COMPILED_WITH_CUDA=False\u001b[39;00m\n",
      "\u001b[1;31mRuntimeError\u001b[0m: \n        CUDA Setup failed despite GPU being available. Please run the following command to get more information:\n\n        python -m bitsandbytes\n\n        Inspect the output of the command and see if you can locate CUDA libraries. You might need to add them\n        to your LD_LIBRARY_PATH. If you suspect a bug, please take the information from python -m bitsandbytes\n        and open an issue at: https://github.com/TimDettmers/bitsandbytes/issues",
      "\nThe above exception was the direct cause of the following exception:\n",
      "\u001b[1;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
      "\u001b[1;32me:\\Projects\\Hackathons\\StudyBot\\notebooks\\embeddings.ipynb Cell 17\u001b[0m line \u001b[0;36m1\n\u001b[1;32m----> <a href='vscode-notebook-cell:/e%3A/Projects/Hackathons/StudyBot/notebooks/embeddings.ipynb#X22sZmlsZQ%3D%3D?line=0'>1</a>\u001b[0m sentence_transformer_ef \u001b[39m=\u001b[39m embedding_functions\u001b[39m.\u001b[39;49mSentenceTransformerEmbeddingFunction(\n\u001b[0;32m      <a href='vscode-notebook-cell:/e%3A/Projects/Hackathons/StudyBot/notebooks/embeddings.ipynb#X22sZmlsZQ%3D%3D?line=1'>2</a>\u001b[0m     model_name\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mall-MiniLM-L6-v2\u001b[39;49m\u001b[39m\"\u001b[39;49m\n\u001b[0;32m      <a href='vscode-notebook-cell:/e%3A/Projects/Hackathons/StudyBot/notebooks/embeddings.ipynb#X22sZmlsZQ%3D%3D?line=2'>3</a>\u001b[0m )\n",
      "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\chromadb\\utils\\embedding_functions.py:53\u001b[0m, in \u001b[0;36mSentenceTransformerEmbeddingFunction.__init__\u001b[1;34m(self, model_name, device, normalize_embeddings)\u001b[0m\n\u001b[0;32m     49\u001b[0m     \u001b[39mexcept\u001b[39;00m \u001b[39mImportError\u001b[39;00m:\n\u001b[0;32m     50\u001b[0m         \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[0;32m     51\u001b[0m             \u001b[39m\"\u001b[39m\u001b[39mThe sentence_transformers python package is not installed. Please install it with `pip install sentence_transformers`\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m     52\u001b[0m         )\n\u001b[1;32m---> 53\u001b[0m     \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mmodels[model_name] \u001b[39m=\u001b[39m SentenceTransformer(model_name, device\u001b[39m=\u001b[39;49mdevice)\n\u001b[0;32m     54\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_model \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mmodels[model_name]\n\u001b[0;32m     55\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_normalize_embeddings \u001b[39m=\u001b[39m normalize_embeddings\n",
      "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\sentence_transformers\\SentenceTransformer.py:95\u001b[0m, in \u001b[0;36mSentenceTransformer.__init__\u001b[1;34m(self, model_name_or_path, modules, device, cache_folder, use_auth_token)\u001b[0m\n\u001b[0;32m     87\u001b[0m         snapshot_download(model_name_or_path,\n\u001b[0;32m     88\u001b[0m                             cache_dir\u001b[39m=\u001b[39mcache_folder,\n\u001b[0;32m     89\u001b[0m                             library_name\u001b[39m=\u001b[39m\u001b[39m'\u001b[39m\u001b[39msentence-transformers\u001b[39m\u001b[39m'\u001b[39m,\n\u001b[0;32m     90\u001b[0m                             library_version\u001b[39m=\u001b[39m__version__,\n\u001b[0;32m     91\u001b[0m                             ignore_files\u001b[39m=\u001b[39m[\u001b[39m'\u001b[39m\u001b[39mflax_model.msgpack\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m'\u001b[39m\u001b[39mrust_model.ot\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m'\u001b[39m\u001b[39mtf_model.h5\u001b[39m\u001b[39m'\u001b[39m],\n\u001b[0;32m     92\u001b[0m                             use_auth_token\u001b[39m=\u001b[39muse_auth_token)\n\u001b[0;32m     94\u001b[0m \u001b[39mif\u001b[39;00m os\u001b[39m.\u001b[39mpath\u001b[39m.\u001b[39mexists(os\u001b[39m.\u001b[39mpath\u001b[39m.\u001b[39mjoin(model_path, \u001b[39m'\u001b[39m\u001b[39mmodules.json\u001b[39m\u001b[39m'\u001b[39m)):    \u001b[39m#Load as SentenceTransformer model\u001b[39;00m\n\u001b[1;32m---> 95\u001b[0m     modules \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_load_sbert_model(model_path)\n\u001b[0;32m     96\u001b[0m \u001b[39melse\u001b[39;00m:   \u001b[39m#Load with AutoModel\u001b[39;00m\n\u001b[0;32m     97\u001b[0m     modules \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_load_auto_model(model_path)\n",
      "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\sentence_transformers\\SentenceTransformer.py:840\u001b[0m, in \u001b[0;36mSentenceTransformer._load_sbert_model\u001b[1;34m(self, model_path)\u001b[0m\n\u001b[0;32m    838\u001b[0m \u001b[39mfor\u001b[39;00m module_config \u001b[39min\u001b[39;00m modules_config:\n\u001b[0;32m    839\u001b[0m     module_class \u001b[39m=\u001b[39m import_from_string(module_config[\u001b[39m'\u001b[39m\u001b[39mtype\u001b[39m\u001b[39m'\u001b[39m])\n\u001b[1;32m--> 840\u001b[0m     module \u001b[39m=\u001b[39m module_class\u001b[39m.\u001b[39;49mload(os\u001b[39m.\u001b[39;49mpath\u001b[39m.\u001b[39;49mjoin(model_path, module_config[\u001b[39m'\u001b[39;49m\u001b[39mpath\u001b[39;49m\u001b[39m'\u001b[39;49m]))\n\u001b[0;32m    841\u001b[0m     modules[module_config[\u001b[39m'\u001b[39m\u001b[39mname\u001b[39m\u001b[39m'\u001b[39m]] \u001b[39m=\u001b[39m module\n\u001b[0;32m    843\u001b[0m \u001b[39mreturn\u001b[39;00m modules\n",
      "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\sentence_transformers\\models\\Transformer.py:137\u001b[0m, in \u001b[0;36mTransformer.load\u001b[1;34m(input_path)\u001b[0m\n\u001b[0;32m    135\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mopen\u001b[39m(sbert_config_path) \u001b[39mas\u001b[39;00m fIn:\n\u001b[0;32m    136\u001b[0m     config \u001b[39m=\u001b[39m json\u001b[39m.\u001b[39mload(fIn)\n\u001b[1;32m--> 137\u001b[0m \u001b[39mreturn\u001b[39;00m Transformer(model_name_or_path\u001b[39m=\u001b[39minput_path, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mconfig)\n",
      "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\sentence_transformers\\models\\Transformer.py:29\u001b[0m, in \u001b[0;36mTransformer.__init__\u001b[1;34m(self, model_name_or_path, max_seq_length, model_args, cache_dir, tokenizer_args, do_lower_case, tokenizer_name_or_path)\u001b[0m\n\u001b[0;32m     26\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mdo_lower_case \u001b[39m=\u001b[39m do_lower_case\n\u001b[0;32m     28\u001b[0m config \u001b[39m=\u001b[39m AutoConfig\u001b[39m.\u001b[39mfrom_pretrained(model_name_or_path, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mmodel_args, cache_dir\u001b[39m=\u001b[39mcache_dir)\n\u001b[1;32m---> 29\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_load_model(model_name_or_path, config, cache_dir)\n\u001b[0;32m     31\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtokenizer \u001b[39m=\u001b[39m AutoTokenizer\u001b[39m.\u001b[39mfrom_pretrained(tokenizer_name_or_path \u001b[39mif\u001b[39;00m tokenizer_name_or_path \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m \u001b[39melse\u001b[39;00m model_name_or_path, cache_dir\u001b[39m=\u001b[39mcache_dir, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mtokenizer_args)\n\u001b[0;32m     33\u001b[0m \u001b[39m#No max_seq_length set. Try to infer from model\u001b[39;00m\n",
      "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\sentence_transformers\\models\\Transformer.py:49\u001b[0m, in \u001b[0;36mTransformer._load_model\u001b[1;34m(self, model_name_or_path, config, cache_dir)\u001b[0m\n\u001b[0;32m     47\u001b[0m     \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_load_t5_model(model_name_or_path, config, cache_dir)\n\u001b[0;32m     48\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m---> 49\u001b[0m     \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mauto_model \u001b[39m=\u001b[39m AutoModel\u001b[39m.\u001b[39;49mfrom_pretrained(model_name_or_path, config\u001b[39m=\u001b[39;49mconfig, cache_dir\u001b[39m=\u001b[39;49mcache_dir)\n",
      "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\transformers\\models\\auto\\auto_factory.py:515\u001b[0m, in \u001b[0;36m_BaseAutoModelClass.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, *model_args, **kwargs)\u001b[0m\n\u001b[0;32m    511\u001b[0m     \u001b[39mreturn\u001b[39;00m model_class\u001b[39m.\u001b[39mfrom_pretrained(\n\u001b[0;32m    512\u001b[0m         pretrained_model_name_or_path, \u001b[39m*\u001b[39mmodel_args, config\u001b[39m=\u001b[39mconfig, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mhub_kwargs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs\n\u001b[0;32m    513\u001b[0m     )\n\u001b[0;32m    514\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39mtype\u001b[39m(config) \u001b[39min\u001b[39;00m \u001b[39mcls\u001b[39m\u001b[39m.\u001b[39m_model_mapping\u001b[39m.\u001b[39mkeys():\n\u001b[1;32m--> 515\u001b[0m     model_class \u001b[39m=\u001b[39m _get_model_class(config, \u001b[39mcls\u001b[39;49m\u001b[39m.\u001b[39;49m_model_mapping)\n\u001b[0;32m    516\u001b[0m     \u001b[39mreturn\u001b[39;00m model_class\u001b[39m.\u001b[39mfrom_pretrained(\n\u001b[0;32m    517\u001b[0m         pretrained_model_name_or_path, \u001b[39m*\u001b[39mmodel_args, config\u001b[39m=\u001b[39mconfig, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mhub_kwargs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs\n\u001b[0;32m    518\u001b[0m     )\n\u001b[0;32m    519\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[0;32m    520\u001b[0m     \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mUnrecognized configuration class \u001b[39m\u001b[39m{\u001b[39;00mconfig\u001b[39m.\u001b[39m\u001b[39m__class__\u001b[39m\u001b[39m}\u001b[39;00m\u001b[39m for this kind of AutoModel: \u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mcls\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__name__\u001b[39m\u001b[39m}\u001b[39;00m\u001b[39m.\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[0;32m    521\u001b[0m     \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mModel type should be one of \u001b[39m\u001b[39m{\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m, \u001b[39m\u001b[39m'\u001b[39m\u001b[39m.\u001b[39mjoin(c\u001b[39m.\u001b[39m\u001b[39m__name__\u001b[39m \u001b[39mfor\u001b[39;00m c \u001b[39min\u001b[39;00m \u001b[39mcls\u001b[39m\u001b[39m.\u001b[39m_model_mapping\u001b[39m.\u001b[39mkeys())\u001b[39m}\u001b[39;00m\u001b[39m.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m    522\u001b[0m )\n",
      "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\transformers\\models\\auto\\auto_factory.py:377\u001b[0m, in \u001b[0;36m_get_model_class\u001b[1;34m(config, model_mapping)\u001b[0m\n\u001b[0;32m    376\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_get_model_class\u001b[39m(config, model_mapping):\n\u001b[1;32m--> 377\u001b[0m     supported_models \u001b[39m=\u001b[39m model_mapping[\u001b[39mtype\u001b[39;49m(config)]\n\u001b[0;32m    378\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39misinstance\u001b[39m(supported_models, (\u001b[39mlist\u001b[39m, \u001b[39mtuple\u001b[39m)):\n\u001b[0;32m    379\u001b[0m         \u001b[39mreturn\u001b[39;00m supported_models\n",
      "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\transformers\\models\\auto\\auto_factory.py:690\u001b[0m, in \u001b[0;36m_LazyAutoMapping.__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m    688\u001b[0m \u001b[39mif\u001b[39;00m model_type \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_model_mapping:\n\u001b[0;32m    689\u001b[0m     model_name \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_model_mapping[model_type]\n\u001b[1;32m--> 690\u001b[0m     \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_load_attr_from_module(model_type, model_name)\n\u001b[0;32m    692\u001b[0m \u001b[39m# Maybe there was several model types associated with this config.\u001b[39;00m\n\u001b[0;32m    693\u001b[0m model_types \u001b[39m=\u001b[39m [k \u001b[39mfor\u001b[39;00m k, v \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_config_mapping\u001b[39m.\u001b[39mitems() \u001b[39mif\u001b[39;00m v \u001b[39m==\u001b[39m key\u001b[39m.\u001b[39m\u001b[39m__name__\u001b[39m]\n",
      "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\transformers\\models\\auto\\auto_factory.py:704\u001b[0m, in \u001b[0;36m_LazyAutoMapping._load_attr_from_module\u001b[1;34m(self, model_type, attr)\u001b[0m\n\u001b[0;32m    702\u001b[0m \u001b[39mif\u001b[39;00m module_name \u001b[39mnot\u001b[39;00m \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_modules:\n\u001b[0;32m    703\u001b[0m     \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_modules[module_name] \u001b[39m=\u001b[39m importlib\u001b[39m.\u001b[39mimport_module(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m.\u001b[39m\u001b[39m{\u001b[39;00mmodule_name\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mtransformers.models\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m--> 704\u001b[0m \u001b[39mreturn\u001b[39;00m getattribute_from_module(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_modules[module_name], attr)\n",
      "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\transformers\\models\\auto\\auto_factory.py:648\u001b[0m, in \u001b[0;36mgetattribute_from_module\u001b[1;34m(module, attr)\u001b[0m\n\u001b[0;32m    646\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(attr, \u001b[39mtuple\u001b[39m):\n\u001b[0;32m    647\u001b[0m     \u001b[39mreturn\u001b[39;00m \u001b[39mtuple\u001b[39m(getattribute_from_module(module, a) \u001b[39mfor\u001b[39;00m a \u001b[39min\u001b[39;00m attr)\n\u001b[1;32m--> 648\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mhasattr\u001b[39;49m(module, attr):\n\u001b[0;32m    649\u001b[0m     \u001b[39mreturn\u001b[39;00m \u001b[39mgetattr\u001b[39m(module, attr)\n\u001b[0;32m    650\u001b[0m \u001b[39m# Some of the mappings have entries model_type -> object of another model type. In that case we try to grab the\u001b[39;00m\n\u001b[0;32m    651\u001b[0m \u001b[39m# object at the top level.\u001b[39;00m\n",
      "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\transformers\\utils\\import_utils.py:1120\u001b[0m, in \u001b[0;36m_LazyModule.__getattr__\u001b[1;34m(self, name)\u001b[0m\n\u001b[0;32m   1118\u001b[0m     value \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_get_module(name)\n\u001b[0;32m   1119\u001b[0m \u001b[39melif\u001b[39;00m name \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_class_to_module\u001b[39m.\u001b[39mkeys():\n\u001b[1;32m-> 1120\u001b[0m     module \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_get_module(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_class_to_module[name])\n\u001b[0;32m   1121\u001b[0m     value \u001b[39m=\u001b[39m \u001b[39mgetattr\u001b[39m(module, name)\n\u001b[0;32m   1122\u001b[0m \u001b[39melse\u001b[39;00m:\n",
      "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\transformers\\utils\\import_utils.py:1132\u001b[0m, in \u001b[0;36m_LazyModule._get_module\u001b[1;34m(self, module_name)\u001b[0m\n\u001b[0;32m   1130\u001b[0m     \u001b[39mreturn\u001b[39;00m importlib\u001b[39m.\u001b[39mimport_module(\u001b[39m\"\u001b[39m\u001b[39m.\u001b[39m\u001b[39m\"\u001b[39m \u001b[39m+\u001b[39m module_name, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__name__\u001b[39m)\n\u001b[0;32m   1131\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m-> 1132\u001b[0m     \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\n\u001b[0;32m   1133\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mFailed to import \u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__name__\u001b[39m\u001b[39m}\u001b[39;00m\u001b[39m.\u001b[39m\u001b[39m{\u001b[39;00mmodule_name\u001b[39m}\u001b[39;00m\u001b[39m because of the following error (look up to see its\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m   1134\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m traceback):\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m{\u001b[39;00me\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[0;32m   1135\u001b[0m     ) \u001b[39mfrom\u001b[39;00m \u001b[39me\u001b[39;00m\n",
      "\u001b[1;31mRuntimeError\u001b[0m: Failed to import transformers.models.bert.modeling_bert because of the following error (look up to see its traceback):\n\n        CUDA Setup failed despite GPU being available. Please run the following command to get more information:\n\n        python -m bitsandbytes\n\n        Inspect the output of the command and see if you can locate CUDA libraries. You might need to add them\n        to your LD_LIBRARY_PATH. If you suspect a bug, please take the information from python -m bitsandbytes\n        and open an issue at: https://github.com/TimDettmers/bitsandbytes/issues"
     ]
    }
   ],
   "source": [
    "sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(\n",
    "    model_name=\"all-MiniLM-L6-v2\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for csv in track(csvs):\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "pytorchML",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}