diff --git "a/notebooks/embeddings.ipynb" "b/notebooks/embeddings.ipynb" new file mode 100644--- /dev/null +++ "b/notebooks/embeddings.ipynb" @@ -0,0 +1,1506 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "from bs4 import BeautifulSoup" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "1: Install PyTorch Lightning¶\n", + "\n", + "For pip users\n", + "pip install lightning\n", + "\n", + "\n", + "\n", + "For conda users\n", + "conda install lightning -c conda-forge\n", + "\n", + "\n", + "\n", + "Or read the advanced install guide\n", + "\n", + "\n", + "-------------------\n", + "\n", + "2: Define a LightningModule¶\n", + "A LightningModule enables your PyTorch nn.Module to play together in complex ways inside the training_step (there is also an optional validation_step and test_step).\n", + "import os\n", + "from torch import optim, nn, utils, Tensor\n", + "from torchvision.datasets import MNIST\n", + "from torchvision.transforms import ToTensor\n", + "import lightning as L\n", + "\n", + "# define any number of nn.Modules (or use your current ones)\n", + "encoder = nn.Sequential(nn.Linear(28 * 28, 64), nn.ReLU(), nn.Linear(64, 3))\n", + "decoder = nn.Sequential(nn.Linear(3, 64), nn.ReLU(), nn.Linear(64, 28 * 28))\n", + "\n", + "\n", + "# define the LightningModule\n", + "class LitAutoEncoder(L.LightningModule):\n", + " def __init__(self, encoder, decoder):\n", + " super().__init__()\n", + " self.encoder = encoder\n", + " self.decoder = decoder\n", + "\n", + " def training_step(self, batch, batch_idx):\n", + " # training_step defines the train loop.\n", + " # it is independent of forward\n", + " x, y = batch\n", + " x = x.view(x.size(0), -1)\n", + " z = self.encoder(x)\n", + " x_hat = self.decoder(z)\n", + " loss = nn.functional.mse_loss(x_hat, x)\n", + " # Logging to TensorBoard (if installed) by default\n", + " self.log(\"train_loss\", loss)\n", + " return loss\n", + "\n", + " def configure_optimizers(self):\n", + " optimizer = optim.Adam(self.parameters(), lr=1e-3)\n", + " return optimizer\n", + "\n", + "\n", + "# init the autoencoder\n", + "autoencoder = LitAutoEncoder(encoder, decoder)\n", + "\n", + "\n", + "\n", + "-------------------\n", + "\n", + "3: Define a dataset¶\n", + "Lightning supports ANY iterable (DataLoader, numpy, etc…) for the train/val/test/predict splits.\n", + "# setup data\n", + "dataset = MNIST(os.getcwd(), download=True, transform=ToTensor())\n", + "train_loader = utils.data.DataLoader(dataset)\n", + "\n", + "\n", + "\n", + "-------------------\n", + "\n", + "4: Train the model¶\n", + "The Lightning Trainer “mixes” any LightningModule with any dataset and abstracts away all the engineering complexity needed for scale.\n", + "# train the model (hint: here are some helpful Trainer arguments for rapid idea iteration)\n", + "trainer = L.Trainer(limit_train_batches=100, max_epochs=1)\n", + "trainer.fit(model=autoencoder, train_dataloaders=train_loader)\n", + "\n", + "\n", + "The Lightning Trainer automates 40+ tricks including:\n", + "\n", + "Epoch and batch iteration\n", + "optimizer.step(), loss.backward(), optimizer.zero_grad() calls\n", + "Calling of model.eval(), enabling/disabling grads during evaluation\n", + "Checkpoint Saving and Loading\n", + "Tensorboard (see loggers options)\n", + "Multi-GPU support\n", + "TPU\n", + "16-bit precision AMP support\n", + "\n", + "\n", + "-------------------\n", + "\n", + "5: Use the model¶\n", + "Once you’ve trained the model you can export to onnx, torchscript and put it into production or simply load the weights and run predictions.\n", + "# load checkpoint\n", + "checkpoint = \"./lightning_logs/version_0/checkpoints/epoch=0-step=100.ckpt\"\n", + "autoencoder = LitAutoEncoder.load_from_checkpoint(checkpoint, encoder=encoder, decoder=decoder)\n", + "\n", + "# choose your trained nn.Module\n", + "encoder = autoencoder.encoder\n", + "encoder.eval()\n", + "\n", + "# embed 4 fake images!\n", + "fake_image_batch = torch.rand(4, 28 * 28, device=autoencoder.device)\n", + "embeddings = encoder(fake_image_batch)\n", + "print(\"⚡\" * 20, \"\\nPredictions (4 image embeddings):\\n\", embeddings, \"\\n\", \"⚡\" * 20)\n", + "\n", + "\n", + "\n", + "-------------------\n", + "\n", + "6: Visualize training¶\n", + "If you have tensorboard installed, you can use it for visualizing experiments.\n", + "Run this on your commandline and open your browser to http://localhost:6006/\n", + "tensorboard --logdir .\n", + "\n", + "\n", + "\n", + "-------------------\n", + "\n", + "7: Supercharge training¶\n", + "Enable advanced training features using Trainer arguments. These are state-of-the-art techniques that are automatically integrated into your training loop without changes to your code.\n", + "# train on 4 GPUs\n", + "trainer = Trainer(\n", + " devices=4,\n", + " accelerator=\"gpu\",\n", + " )\n", + "\n", + "# train 1TB+ parameter models with Deepspeed/fsdp\n", + "trainer = L.Trainer(\n", + " devices=4,\n", + " accelerator=\"gpu\",\n", + " strategy=\"deepspeed_stage_2\",\n", + " precision=16\n", + " )\n", + "\n", + "# 20+ helpful flags for rapid idea iteration\n", + "trainer = L.Trainer(\n", + " max_epochs=10,\n", + " min_epochs=5,\n", + " overfit_batches=1\n", + " )\n", + "\n", + "# access the latest state of the art techniques\n", + "trainer = Trainer(callbacks=[StochasticWeightAveraging(...)])\n", + "\n", + "\n", + "\n", + "-------------------\n", + "\n", + "Maximize flexibility¶\n", + "Lightning’s core guiding principle is to always provide maximal flexibility without ever hiding any of the PyTorch.\n", + "Lightning offers 5 added degrees of flexibility depending on your project’s complexity.\n", + "\n", + "\n", + "Customize training loop¶\n", + "\n", + "Inject custom code anywhere in the Training loop using any of the 20+ methods (Hooks) available in the LightningModule.\n", + "class LitAutoEncoder(L.LightningModule):\n", + " def backward(self, loss):\n", + " loss.backward()\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Extend the Trainer¶\n", + "If you have multiple lines of code with similar functionalities, you can use callbacks to easily group them together and toggle all of those lines on or off at the same time.\n", + "trainer = Trainer(callbacks=[AWSCheckpoints()])\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Use a raw PyTorch loop¶\n", + "For certain types of work at the bleeding-edge of research, Lightning offers experts full control of optimization or the training loop in various ways.\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Manual optimization\n", + "Automated training loop, but you own the optimization steps.\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "-------------------\n", + "\n", + "Customize training loop¶\n", + "\n", + "Inject custom code anywhere in the Training loop using any of the 20+ methods (Hooks) available in the LightningModule.\n", + "class LitAutoEncoder(L.LightningModule):\n", + " def backward(self, loss):\n", + " loss.backward()\n", + "\n", + "\n", + "\n", + "-------------------\n", + "\n", + "Extend the Trainer¶\n", + "If you have multiple lines of code with similar functionalities, you can use callbacks to easily group them together and toggle all of those lines on or off at the same time.\n", + "trainer = Trainer(callbacks=[AWSCheckpoints()])\n", + "\n", + "\n", + "\n", + "-------------------\n", + "\n", + "Use a raw PyTorch loop¶\n", + "For certain types of work at the bleeding-edge of research, Lightning offers experts full control of optimization or the training loop in various ways.\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Manual optimization\n", + "Automated training loop, but you own the optimization steps.\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "-------------------\n", + "\n", + "Next steps¶\n", + "Depending on your use case, you might want to check one of these out next.\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Level 2: Add a validation and test set\n", + "Add validation and test sets to avoid over/underfitting.\n", + "\n", + "\n", + "basic\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "See more examples\n", + "See examples across computer vision, NLP, RL, etc...\n", + "\n", + "\n", + "basic\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Deploy your model\n", + "Learn how to predict or put your model into production\n", + "\n", + "\n", + "basic\n", + "\n", + "\n", + "\n", + "-------------------\n" + ] + } + ], + "source": [ + "url = \"https://lightning.ai/docs/pytorch/latest/starter/introduction.html\"\n", + "\n", + "response = requests.get(url)\n", + "\n", + "if response.status_code == 200:\n", + " soup = BeautifulSoup(response.text, 'html.parser')\n", + "\n", + " div_content = soup.find('div', class_='rst-content')\n", + "\n", + " if div_content:\n", + " sections = div_content.find_all('section')\n", + "\n", + " for section in sections[1:]:\n", + "\n", + " section_content = section.get_text()\n", + " print(section_content)\n", + " print('-------------------')\n", + " else:\n", + " print(\"Div element with class 'rst-content' not found. Check the HTML structure of the page.\")\n", + "\n", + "else:\n", + " print(f\"Failed to retrieve the page. Status code: {response.status_code}\")\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from urllib.parse import urlparse, urlunparse,urljoin" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "def remove_fragment(url):\n", + " parser_url = urlparse(url)\n", + "\n", + " new_url = urlunparse(parser_url._replace(fragment=\"\"))\n", + " return new_url" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "from bs4 import BeautifulSoup\n", + "from urllib.parse import urlparse, urljoin\n", + "\n", + "crawled_urls = set()\n", + "\n", + "\n", + "# Function to fetch and extract links from a page\n", + "def get_links(url):\n", + " try:\n", + " response = requests.get(url)\n", + " if response.status_code == 200:\n", + " soup = BeautifulSoup(response.text, \"html.parser\")\n", + " links = []\n", + " for a_tag in soup.find_all(\"a\", href=True):\n", + " link = a_tag[\"href\"]\n", + " links.append(link)\n", + " return links\n", + " except Exception as e:\n", + " print(f\"Failed to fetch links from {url}: {e}\")\n", + " return []\n", + "\n", + "\n", + "# Function to recursively fetch links within the same domain\n", + "def fetch_links_recursive(base_url, current_url, visited_urls, max_depth=4):\n", + " if current_url in visited_urls or max_depth == 0:\n", + " return\n", + "\n", + " visited_urls.add(current_url)\n", + " links = get_links(current_url)\n", + " for link in links:\n", + " absolute_url = urljoin(current_url, link)\n", + " parsed_url = urlparse(absolute_url)\n", + " if parsed_url.netloc == base_url.netloc:\n", + " crawled_urls.add(remove_fragment(absolute_url))\n", + " fetch_links_recursive(base_url, absolute_url, visited_urls, max_depth - 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "base_url = urlparse(\n", + " \"https://lightning.ai/docs/pytorch/latest/starter/introduction.html\"\n", + ")\n", + "visited_urls = set()\n", + "fetch_links_recursive(base_url, base_url.geturl(), visited_urls)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "275" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(crawled_urls)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'https://lightning.ai/docs/fabric/',\n", + " 'https://lightning.ai/docs/pytorch/latest/_images/custom_loop.png',\n", + " 'https://lightning.ai/docs/pytorch/latest/_images/ddp.gif',\n", + " 'https://lightning.ai/docs/pytorch/latest/_modules/lightning/fabric/utilities/throughput.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/_modules/lightning/pytorch/core/module.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/_modules/lightning/pytorch/trainer/trainer.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/accelerators/gpu.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/accelerators/tpu.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/advanced/speed.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/api_references.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/common/checkpointing.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/common/index.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/common/lightning_module.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/common/trainer.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/community/index.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/deploy/production.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/glossary/index.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/index.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/levels/advanced.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/levels/basic_level_2.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/levels/core_skills.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/levels/expert.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/levels/intermediate.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/model/build_model_advanced.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/starter/installation.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/starter/introduction.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/tutorials.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/upgrade/from_1_4.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/upgrade/from_1_5.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/upgrade/from_1_6.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/upgrade/from_1_7.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/upgrade/from_1_8.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/upgrade/from_1_9.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/upgrade/from_2_0.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/upgrade/migration_guide.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/_sources/visualize/loggers.rst.txt',\n", + " 'https://lightning.ai/docs/pytorch/latest/accelerators/accelerator_prepare.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/accelerators/gpu.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/accelerators/gpu_advanced.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/accelerators/gpu_basic.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/accelerators/gpu_expert.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/accelerators/gpu_faq.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/accelerators/gpu_intermediate.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/accelerators/mps.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/accelerators/tpu.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/accelerators/tpu_advanced.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/accelerators/tpu_basic.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/accelerators/tpu_faq.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/accelerators/tpu_intermediate.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/advanced/ddp_optimizations.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/advanced/finetuning.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/advanced/model_init.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/advanced/model_parallel.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/advanced/model_parallel/deepspeed.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/advanced/model_parallel/fsdp.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/advanced/post_training_quantization.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/advanced/pretrained.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/advanced/pruning_quantization.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/advanced/speed.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/advanced/strategy_registry.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/advanced/training_tricks.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/advanced/transfer_learning.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/advanced/warnings.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.accelerators.Accelerator.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.accelerators.CPUAccelerator.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.accelerators.CUDAAccelerator.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.accelerators.XLAAccelerator.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.BackboneFinetuning.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.BaseFinetuning.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.BasePredictionWriter.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.BatchSizeFinder.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.Callback.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.DeviceStatsMonitor.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.EarlyStopping.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.GradientAccumulationScheduler.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.LambdaCallback.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.LearningRateFinder.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.LearningRateMonitor.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.ModelCheckpoint.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.ModelPruning.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.ModelSummary.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.OnExceptionCheckpoint.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.ProgressBar.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.RichModelSummary.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.RichProgressBar.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.SpikeDetection.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.StochasticWeightAveraging.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.TQDMProgressBar.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.ThroughputMonitor.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.Timer.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.cli.LightningArgumentParser.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.cli.LightningCLI.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.cli.SaveConfigCallback.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.core.LightningDataModule.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.core.LightningModule.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.core.hooks.CheckpointHooks.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.core.hooks.DataHooks.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.core.hooks.ModelHooks.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.core.mixins.HyperparametersMixin.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.core.optimizer.LightningOptimizer.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.loggers.comet.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.loggers.csv_logs.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.loggers.logger.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.loggers.mlflow.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.loggers.neptune.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.loggers.tensorboard.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.loggers.wandb.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.LayerSync.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.TorchSyncBatchNorm.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.environments.ClusterEnvironment.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.environments.KubeflowEnvironment.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.environments.LSFEnvironment.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.environments.LightningEnvironment.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.environments.MPIEnvironment.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.environments.SLURMEnvironment.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.environments.TorchElasticEnvironment.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.environments.XLAEnvironment.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.io.AsyncCheckpointIO.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.io.CheckpointIO.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.io.TorchCheckpointIO.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.io.XLACheckpointIO.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.precision.BitsandbytesPrecision.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.precision.DeepSpeedPrecision.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.precision.DoublePrecision.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.precision.FSDPPrecision.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.precision.HalfPrecision.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.precision.MixedPrecision.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.precision.Precision.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.precision.TransformerEnginePrecision.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.plugins.precision.XLAPrecision.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.profilers.AdvancedProfiler.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.profilers.PassThroughProfiler.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.profilers.Profiler.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.profilers.PyTorchProfiler.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.profilers.SimpleProfiler.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.profilers.XLAProfiler.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.strategies.DDPStrategy.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.strategies.DeepSpeedStrategy.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.strategies.FSDPStrategy.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.strategies.ParallelStrategy.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.strategies.SingleDeviceStrategy.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.strategies.SingleDeviceXLAStrategy.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.strategies.Strategy.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.strategies.XLAStrategy.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.trainer.trainer.Trainer.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.tuner.tuning.Tuner.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.utilities.combined_loader.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.utilities.data.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.utilities.deepspeed.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.utilities.memory.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.utilities.model_summary.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.utilities.parsing.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.utilities.rank_zero.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.utilities.seed.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.utilities.warnings.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/api_references.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/benchmarking/benchmarks.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/cli/lightning_cli.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/clouds/cluster.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/clouds/cluster_advanced.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/clouds/cluster_intermediate_2.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/common/checkpointing.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/common/checkpointing_advanced.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/common/checkpointing_basic.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/common/checkpointing_expert.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/common/checkpointing_intermediate.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/common/checkpointing_migration.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/common/console_logs.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/common/early_stopping.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/common/evaluation.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/common/evaluation_basic.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/common/hyperparameters.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/common/index.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/common/lightning_module.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/common/notebooks.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/common/optimization.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/common/precision.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/common/precision_expert.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/common/progress_bar.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/common/remote_fs.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/common/trainer.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/common_usecases.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/community/governance.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/community/index.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/data/data.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/data/datamodule.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/data/iterables.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/debug/debugging.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/deploy/production.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/deploy/production_advanced.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/deploy/production_advanced_2.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/deploy/production_basic.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/deploy/production_intermediate.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/expertise_levels.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/extensions/accelerator.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/extensions/callbacks.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/extensions/logging.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/extensions/plugins.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/extensions/strategy.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/generated/BECOMING_A_CORE_CONTRIBUTOR.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/generated/CHANGELOG.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/generated/CODE_OF_CONDUCT.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/generated/CONTRIBUTING.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/glossary/index.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/index.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/integrations/hpu/index.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/integrations/ipu/index.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/integrations/strategies/colossalai.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/advanced.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/advanced_level_15.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/advanced_level_16.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/advanced_level_17.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/advanced_level_18.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/advanced_level_19.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/advanced_level_20.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/advanced_level_21.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/advanced_level_22.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/basic_level_2.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/basic_level_5.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/core_level_6.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/core_skills.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/expert.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/expert_level_23.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/expert_level_24.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/expert_level_27.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/intermediate.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/intermediate_level_10.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/intermediate_level_11.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/intermediate_level_12.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/intermediate_level_13.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/intermediate_level_14.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/intermediate_level_7.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/levels/intermediate_level_9.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/model/build_model.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/model/build_model_advanced.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/model/manual_optimization.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/model/own_your_loop.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/model/train_model_basic.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/notebooks/course_UvA-DL/01-introduction-to-pytorch.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/notebooks/course_UvA-DL/02-activation-functions.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/notebooks/course_UvA-DL/03-initialization-and-optimization.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/notebooks/course_UvA-DL/04-inception-resnet-densenet.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/notebooks/course_UvA-DL/05-transformers-and-MH-attention.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/notebooks/course_UvA-DL/06-graph-neural-networks.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/notebooks/course_UvA-DL/07-deep-energy-based-generative-models.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/notebooks/course_UvA-DL/08-deep-autoencoders.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/notebooks/course_UvA-DL/09-normalizing-flows.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/notebooks/course_UvA-DL/10-autoregressive-image-modeling.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/notebooks/course_UvA-DL/11-vision-transformer.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/notebooks/lightning_examples/cifar10-baseline.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/notebooks/lightning_examples/datamodules.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/notebooks/lightning_examples/finetuning-scheduler.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/past_versions.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/starter/converting.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/starter/installation.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/starter/introduction.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/starter/style_guide.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/tuning/profiler.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/tuning/profiler_expert.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/tutorials.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/upgrade/from_1_4.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/upgrade/from_1_5.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/upgrade/from_1_6.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/upgrade/from_1_7.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/upgrade/from_1_8.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/upgrade/from_1_9.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/upgrade/from_2_0.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/upgrade/migration_guide.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/versioning.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/visualize/experiment_managers.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/visualize/loggers.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/visualize/logging_advanced.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/visualize/logging_basic.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/visualize/logging_expert.html',\n", + " 'https://lightning.ai/docs/pytorch/latest/visualize/logging_intermediate.html'}" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "crawled_urls" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "from bs4 import BeautifulSoup\n", + "import pandas as pd\n", + "\n", + "\n", + "def extract_sections_to_csv(url, output_file):\n", + " # Send an HTTP GET request to the URL\n", + " response = requests.get(url)\n", + "\n", + " # Check if the request was successful\n", + " if response.status_code == 200:\n", + " # Parse the HTML content of the page\n", + " soup = BeautifulSoup(response.text, \"html.parser\")\n", + "\n", + " # Find the div element with class \"rst-content\"\n", + " div_content = soup.find(\"div\", class_=\"rst-content\")\n", + "\n", + " if div_content:\n", + " # Find all section tags within the div_content\n", + " sections = div_content.find_all(\"section\")\n", + "\n", + " # Create a list to store the sections\n", + " section_contents = []\n", + "\n", + " for section in sections[1:]:\n", + " # Extract the content of each section\n", + " section_content = section.get_text()\n", + " section_contents.append(section_content)\n", + "\n", + " # Create a DataFrame with URL and Section Content columns\n", + " df = pd.DataFrame(\n", + " {\n", + " \"URL\": [url] * len(section_contents),\n", + " \"Section Content\": section_contents,\n", + " }\n", + " )\n", + "\n", + " # Save the DataFrame to a CSV file\n", + " df.to_csv(output_file, index=False)\n", + "\n", + " else:\n", + " print(\n", + " \"Div element with class 'rst-content' not found. Check the HTML structure of the page.\"\n", + " )\n", + "\n", + " else:\n", + " print(f\"Failed to retrieve the page. Status code: {response.status_code}\")\n", + "\n", + "\n", + "# Example usage\n", + "url = \"https://lightning.ai/docs/pytorch/latest\"\n", + "output_file = \"sections.csv\"\n", + "extract_sections_to_csv(url, output_file)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "212333e70eb6447e84384a2364a8f179", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
+       "
\n" + ], + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
+       "
\n" + ], + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
+       "
\n" + ], + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
+       "
\n" + ], + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
+       "
\n" + ], + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
+       "
\n" + ], + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
+       "
\n" + ], + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
+       "
\n" + ], + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
+       "
\n" + ], + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
+       "
\n" + ], + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
C:\\Users\\alfat\\AppData\\Local\\Temp\\ipykernel_7584\\215768005.py:13: MarkupResemblesLocatorWarning: The input looks \n",
+       "more like a filename than markup. You may want to open this file and pass the filehandle into Beautiful Soup.\n",
+       "  soup = BeautifulSoup(response.text, \"html.parser\")\n",
+       "
\n" + ], + "text/plain": [ + "C:\\Users\\alfat\\AppData\\Local\\Temp\\ipykernel_7584\\215768005.py:13: MarkupResemblesLocatorWarning: The input looks \n", + "more like a filename than markup. You may want to open this file and pass the filehandle into Beautiful Soup.\n", + " soup = BeautifulSoup(response.text, \"html.parser\")\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
+       "
\n" + ], + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
+       "
\n" + ], + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
+       "
\n" + ], + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
+       "
\n" + ], + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
+       "
\n" + ], + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
+       "
\n" + ], + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
+       "
\n" + ], + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
+       "
\n" + ], + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
+       "
\n" + ], + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
+       "
\n" + ], + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
+       "
\n" + ], + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
+       "
\n" + ], + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
+       "
\n" + ], + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
+       "
\n" + ], + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
+       "
\n" + ], + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
+       "
\n" + ], + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
+       "
\n" + ], + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
+       "
\n" + ], + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
+       "
\n" + ], + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
+       "
\n" + ], + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
+       "
\n" + ], + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
+       "
\n" + ], + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Div element with class 'rst-content' not found. Check the HTML structure of the page.\n",
+       "
\n" + ], + "text/plain": [ + "Div element with class 'rst-content' not found. Check the HTML structure of the page.\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "
\n",
+       "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from rich.progress import track\n", + "\n", + "for i, url in enumerate(track(crawled_urls)):\n", + " output_file = f\"E:/Projects/Hackathons/StudyBot/crawled/{i}.csv\"\n", + " extract_sections_to_csv(url, output_file)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "d:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\pinecone\\index.py:4: TqdmExperimentalWarning: Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)\n", + " from tqdm.autonotebook import tqdm\n" + ] + } + ], + "source": [ + "from langchain.embeddings import HuggingFaceEmbeddings\n", + "import pandas as pd\n", + "from glob import glob\n", + "import pinecone" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "pinecone.init(api_key=\"2c94d392-e7de-40d6-8dbc-b485fac62af2\", environment=\"gcp-starter\")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "from chromadb.utils import embedding_functions\n", + "\n", + "\n", + "import pandas as pd\n", + "from glob import glob" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "import chromadb\n", + "\n", + "chroma_client = chromadb.PersistentClient(path=\"db\")\n", + "\n", + "# collection = chroma_client.create_collection(name=\"test\")\n", + "collection = chroma_client.get_collection(name=\"test\")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "csvs = glob(\"crawled/*.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "from rich.progress import track\n", + "from rich import print\n", + "from os.path import basename" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "False\n", + "\n", + "===================================BUG REPORT===================================\n", + "================================================================================\n", + "The following directories listed in your path were found to be non-existent: {WindowsPath('/Anaconda3/envs/pytorchML/lib'), WindowsPath('D')}\n", + "The following directories listed in your path were found to be non-existent: {WindowsPath('vs/workbench/api/node/extensionHostProcess')}\n", + "The following directories listed in your path were found to be non-existent: {WindowsPath('module'), WindowsPath('/matplotlib_inline.backend_inline')}\n", + "CUDA_SETUP: WARNING! libcudart.so not found in any environmental path. Searching in backup paths...\n", + "The following directories listed in your path were found to be non-existent: {WindowsPath('/usr/local/cuda/lib64')}\n", + "DEBUG: Possible options found for libcudart.so: set()\n", + "CUDA SETUP: PyTorch settings found: CUDA_VERSION=117, Highest Compute Capability: 7.5.\n", + "CUDA SETUP: To manually override the PyTorch CUDA version please see:https://github.com/TimDettmers/bitsandbytes/blob/main/how_to_use_nonpytorch_cuda.md\n", + "CUDA SETUP: Loading binary d:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\bitsandbytes\\libbitsandbytes_cuda117.so...\n", + "argument of type 'WindowsPath' is not iterable\n", + "CUDA SETUP: Problem: The main issue seems to be that the main CUDA runtime library was not detected.\n", + "CUDA SETUP: Solution 1: To solve the issue the libcudart.so location needs to be added to the LD_LIBRARY_PATH variable\n", + "CUDA SETUP: Solution 1a): Find the cuda runtime library via: find / -name libcudart.so 2>/dev/null\n", + "CUDA SETUP: Solution 1b): Once the library is found add it to the LD_LIBRARY_PATH: export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:FOUND_PATH_FROM_1a\n", + "CUDA SETUP: Solution 1c): For a permanent solution add the export from 1b into your .bashrc file, located at ~/.bashrc\n", + "CUDA SETUP: Solution 2: If no library was found in step 1a) you need to install CUDA.\n", + "CUDA SETUP: Solution 2a): Download CUDA install script: wget https://github.com/TimDettmers/bitsandbytes/blob/main/cuda_install.sh\n", + "CUDA SETUP: Solution 2b): Install desired CUDA version to desired location. The syntax is bash cuda_install.sh CUDA_VERSION PATH_TO_INSTALL_INTO.\n", + "CUDA SETUP: Solution 2b): For example, \"bash cuda_install.sh 113 ~/local/\" will download CUDA 11.3 and install into the folder ~/local\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "d:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\bitsandbytes\\cuda_setup\\main.py:166: UserWarning: Welcome to bitsandbytes. For bug reports, please run\n", + "\n", + "python -m bitsandbytes\n", + "\n", + "\n", + " warn(msg)\n", + "d:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\bitsandbytes\\cuda_setup\\main.py:166: UserWarning: D:\\Anaconda3\\envs\\pytorchML did not contain ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] as expected! Searching further paths...\n", + " warn(msg)\n" + ] + }, + { + "ename": "RuntimeError", + "evalue": "Failed to import transformers.models.bert.modeling_bert because of the following error (look up to see its traceback):\n\n CUDA Setup failed despite GPU being available. Please run the following command to get more information:\n\n python -m bitsandbytes\n\n Inspect the output of the command and see if you can locate CUDA libraries. You might need to add them\n to your LD_LIBRARY_PATH. If you suspect a bug, please take the information from python -m bitsandbytes\n and open an issue at: https://github.com/TimDettmers/bitsandbytes/issues", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mRuntimeError\u001b[0m Traceback (most recent call last)", + "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\transformers\\utils\\import_utils.py:1130\u001b[0m, in \u001b[0;36m_LazyModule._get_module\u001b[1;34m(self, module_name)\u001b[0m\n\u001b[0;32m 1129\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m-> 1130\u001b[0m \u001b[39mreturn\u001b[39;00m importlib\u001b[39m.\u001b[39;49mimport_module(\u001b[39m\"\u001b[39;49m\u001b[39m.\u001b[39;49m\u001b[39m\"\u001b[39;49m \u001b[39m+\u001b[39;49m module_name, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m\u001b[39m__name__\u001b[39;49m)\n\u001b[0;32m 1131\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m e:\n", + "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\importlib\\__init__.py:126\u001b[0m, in \u001b[0;36mimport_module\u001b[1;34m(name, package)\u001b[0m\n\u001b[0;32m 125\u001b[0m level \u001b[39m+\u001b[39m\u001b[39m=\u001b[39m \u001b[39m1\u001b[39m\n\u001b[1;32m--> 126\u001b[0m \u001b[39mreturn\u001b[39;00m _bootstrap\u001b[39m.\u001b[39;49m_gcd_import(name[level:], package, level)\n", + "File \u001b[1;32m:1050\u001b[0m, in \u001b[0;36m_gcd_import\u001b[1;34m(name, package, level)\u001b[0m\n", + "File \u001b[1;32m:1027\u001b[0m, in \u001b[0;36m_find_and_load\u001b[1;34m(name, import_)\u001b[0m\n", + "File \u001b[1;32m:1006\u001b[0m, in \u001b[0;36m_find_and_load_unlocked\u001b[1;34m(name, import_)\u001b[0m\n", + "File \u001b[1;32m:688\u001b[0m, in \u001b[0;36m_load_unlocked\u001b[1;34m(spec)\u001b[0m\n", + "File \u001b[1;32m:883\u001b[0m, in \u001b[0;36mexec_module\u001b[1;34m(self, module)\u001b[0m\n", + "File \u001b[1;32m:241\u001b[0m, in \u001b[0;36m_call_with_frames_removed\u001b[1;34m(f, *args, **kwds)\u001b[0m\n", + "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\transformers\\models\\bert\\modeling_bert.py:42\u001b[0m\n\u001b[0;32m 31\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mmodeling_outputs\u001b[39;00m \u001b[39mimport\u001b[39;00m (\n\u001b[0;32m 32\u001b[0m BaseModelOutputWithPastAndCrossAttentions,\n\u001b[0;32m 33\u001b[0m BaseModelOutputWithPoolingAndCrossAttentions,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 40\u001b[0m TokenClassifierOutput,\n\u001b[0;32m 41\u001b[0m )\n\u001b[1;32m---> 42\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mmodeling_utils\u001b[39;00m \u001b[39mimport\u001b[39;00m PreTrainedModel\n\u001b[0;32m 43\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mpytorch_utils\u001b[39;00m \u001b[39mimport\u001b[39;00m apply_chunking_to_forward, find_pruneable_heads_and_indices, prune_linear_layer\n", + "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\transformers\\modeling_utils.py:88\u001b[0m\n\u001b[0;32m 87\u001b[0m \u001b[39mif\u001b[39;00m is_accelerate_available():\n\u001b[1;32m---> 88\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39maccelerate\u001b[39;00m \u001b[39mimport\u001b[39;00m dispatch_model, infer_auto_device_map, init_empty_weights\n\u001b[0;32m 89\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39maccelerate\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mhooks\u001b[39;00m \u001b[39mimport\u001b[39;00m add_hook_to_module\n", + "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\accelerate\\__init__.py:3\u001b[0m\n\u001b[0;32m 1\u001b[0m __version__ \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39m0.21.0\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m----> 3\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39maccelerator\u001b[39;00m \u001b[39mimport\u001b[39;00m Accelerator\n\u001b[0;32m 4\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39mbig_modeling\u001b[39;00m \u001b[39mimport\u001b[39;00m (\n\u001b[0;32m 5\u001b[0m cpu_offload,\n\u001b[0;32m 6\u001b[0m cpu_offload_with_hook,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 11\u001b[0m load_checkpoint_and_dispatch,\n\u001b[0;32m 12\u001b[0m )\n", + "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\accelerate\\accelerator.py:35\u001b[0m\n\u001b[0;32m 33\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mtorch\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mutils\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mhooks\u001b[39;00m \u001b[39mas\u001b[39;00m \u001b[39mhooks\u001b[39;00m\n\u001b[1;32m---> 35\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39mcheckpointing\u001b[39;00m \u001b[39mimport\u001b[39;00m load_accelerator_state, load_custom_state, save_accelerator_state, save_custom_state\n\u001b[0;32m 36\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39mdata_loader\u001b[39;00m \u001b[39mimport\u001b[39;00m DataLoaderDispatcher, prepare_data_loader, skip_first_batches\n", + "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\accelerate\\checkpointing.py:24\u001b[0m\n\u001b[0;32m 22\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mtorch\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mcuda\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mamp\u001b[39;00m \u001b[39mimport\u001b[39;00m GradScaler\n\u001b[1;32m---> 24\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39mutils\u001b[39;00m \u001b[39mimport\u001b[39;00m (\n\u001b[0;32m 25\u001b[0m MODEL_NAME,\n\u001b[0;32m 26\u001b[0m OPTIMIZER_NAME,\n\u001b[0;32m 27\u001b[0m RNG_STATE_NAME,\n\u001b[0;32m 28\u001b[0m SCALER_NAME,\n\u001b[0;32m 29\u001b[0m SCHEDULER_NAME,\n\u001b[0;32m 30\u001b[0m get_pretty_name,\n\u001b[0;32m 31\u001b[0m is_tpu_available,\n\u001b[0;32m 32\u001b[0m is_xpu_available,\n\u001b[0;32m 33\u001b[0m save,\n\u001b[0;32m 34\u001b[0m )\n\u001b[0;32m 37\u001b[0m \u001b[39mif\u001b[39;00m is_tpu_available(check_device\u001b[39m=\u001b[39m\u001b[39mFalse\u001b[39;00m):\n", + "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\accelerate\\utils\\__init__.py:131\u001b[0m\n\u001b[0;32m 122\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39mdeepspeed\u001b[39;00m \u001b[39mimport\u001b[39;00m (\n\u001b[0;32m 123\u001b[0m DeepSpeedEngineWrapper,\n\u001b[0;32m 124\u001b[0m DeepSpeedOptimizerWrapper,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 128\u001b[0m HfDeepSpeedConfig,\n\u001b[0;32m 129\u001b[0m )\n\u001b[1;32m--> 131\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39mbnb\u001b[39;00m \u001b[39mimport\u001b[39;00m has_4bit_bnb_layers, load_and_quantize_model\n\u001b[0;32m 132\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39mfsdp_utils\u001b[39;00m \u001b[39mimport\u001b[39;00m load_fsdp_model, load_fsdp_optimizer, save_fsdp_model, save_fsdp_optimizer\n", + "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\accelerate\\utils\\bnb.py:42\u001b[0m\n\u001b[0;32m 41\u001b[0m \u001b[39mif\u001b[39;00m is_bnb_available():\n\u001b[1;32m---> 42\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mbitsandbytes\u001b[39;00m \u001b[39mas\u001b[39;00m \u001b[39mbnb\u001b[39;00m\n\u001b[0;32m 44\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mcopy\u001b[39;00m \u001b[39mimport\u001b[39;00m deepcopy\n", + "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\bitsandbytes\\__init__.py:6\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[39m# Copyright (c) Facebook, Inc. and its affiliates.\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[39m#\u001b[39;00m\n\u001b[0;32m 3\u001b[0m \u001b[39m# This source code is licensed under the MIT license found in the\u001b[39;00m\n\u001b[0;32m 4\u001b[0m \u001b[39m# LICENSE file in the root directory of this source tree.\u001b[39;00m\n\u001b[1;32m----> 6\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m \u001b[39mimport\u001b[39;00m cuda_setup, utils, research\n\u001b[0;32m 7\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39mautograd\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39m_functions\u001b[39;00m \u001b[39mimport\u001b[39;00m (\n\u001b[0;32m 8\u001b[0m MatmulLtState,\n\u001b[0;32m 9\u001b[0m bmm_cublas,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 13\u001b[0m matmul_4bit\n\u001b[0;32m 14\u001b[0m )\n", + "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\bitsandbytes\\research\\__init__.py:1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m \u001b[39mimport\u001b[39;00m nn\n\u001b[0;32m 2\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39mautograd\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39m_functions\u001b[39;00m \u001b[39mimport\u001b[39;00m (\n\u001b[0;32m 3\u001b[0m switchback_bnb,\n\u001b[0;32m 4\u001b[0m matmul_fp8_global,\n\u001b[0;32m 5\u001b[0m matmul_fp8_mixed,\n\u001b[0;32m 6\u001b[0m )\n", + "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\bitsandbytes\\research\\nn\\__init__.py:1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39mmodules\u001b[39;00m \u001b[39mimport\u001b[39;00m LinearFP8Mixed, LinearFP8Global\n", + "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\bitsandbytes\\research\\nn\\modules.py:8\u001b[0m\n\u001b[0;32m 7\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mbitsandbytes\u001b[39;00m \u001b[39mas\u001b[39;00m \u001b[39mbnb\u001b[39;00m\n\u001b[1;32m----> 8\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mbitsandbytes\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39moptim\u001b[39;00m \u001b[39mimport\u001b[39;00m GlobalOptimManager\n\u001b[0;32m 9\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mbitsandbytes\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mutils\u001b[39;00m \u001b[39mimport\u001b[39;00m OutlierTracer, find_outlier_dims\n", + "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\bitsandbytes\\optim\\__init__.py:6\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[39m# Copyright (c) Facebook, Inc. and its affiliates.\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[39m#\u001b[39;00m\n\u001b[0;32m 3\u001b[0m \u001b[39m# This source code is licensed under the MIT license found in the\u001b[39;00m\n\u001b[0;32m 4\u001b[0m \u001b[39m# LICENSE file in the root directory of this source tree.\u001b[39;00m\n\u001b[1;32m----> 6\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mbitsandbytes\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mcextension\u001b[39;00m \u001b[39mimport\u001b[39;00m COMPILED_WITH_CUDA\n\u001b[0;32m 8\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39madagrad\u001b[39;00m \u001b[39mimport\u001b[39;00m Adagrad, Adagrad8bit, Adagrad32bit\n", + "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\bitsandbytes\\cextension.py:20\u001b[0m\n\u001b[0;32m 19\u001b[0m CUDASetup\u001b[39m.\u001b[39mget_instance()\u001b[39m.\u001b[39mprint_log_stack()\n\u001b[1;32m---> 20\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\u001b[39m'''\u001b[39m\n\u001b[0;32m 21\u001b[0m \u001b[39m CUDA Setup failed despite GPU being available. Please run the following command to get more information:\u001b[39m\n\u001b[0;32m 22\u001b[0m \n\u001b[0;32m 23\u001b[0m \u001b[39m python -m bitsandbytes\u001b[39m\n\u001b[0;32m 24\u001b[0m \n\u001b[0;32m 25\u001b[0m \u001b[39m Inspect the output of the command and see if you can locate CUDA libraries. You might need to add them\u001b[39m\n\u001b[0;32m 26\u001b[0m \u001b[39m to your LD_LIBRARY_PATH. If you suspect a bug, please take the information from python -m bitsandbytes\u001b[39m\n\u001b[0;32m 27\u001b[0m \u001b[39m and open an issue at: https://github.com/TimDettmers/bitsandbytes/issues\u001b[39m\u001b[39m'''\u001b[39m)\n\u001b[0;32m 28\u001b[0m lib\u001b[39m.\u001b[39mcadam32bit_grad_fp32 \u001b[39m# runs on an error if the library could not be found -> COMPILED_WITH_CUDA=False\u001b[39;00m\n", + "\u001b[1;31mRuntimeError\u001b[0m: \n CUDA Setup failed despite GPU being available. Please run the following command to get more information:\n\n python -m bitsandbytes\n\n Inspect the output of the command and see if you can locate CUDA libraries. You might need to add them\n to your LD_LIBRARY_PATH. If you suspect a bug, please take the information from python -m bitsandbytes\n and open an issue at: https://github.com/TimDettmers/bitsandbytes/issues", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[1;31mRuntimeError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32me:\\Projects\\Hackathons\\StudyBot\\notebooks\\embeddings.ipynb Cell 17\u001b[0m line \u001b[0;36m1\n\u001b[1;32m----> 1\u001b[0m sentence_transformer_ef \u001b[39m=\u001b[39m embedding_functions\u001b[39m.\u001b[39;49mSentenceTransformerEmbeddingFunction(\n\u001b[0;32m 2\u001b[0m model_name\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mall-MiniLM-L6-v2\u001b[39;49m\u001b[39m\"\u001b[39;49m\n\u001b[0;32m 3\u001b[0m )\n", + "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\chromadb\\utils\\embedding_functions.py:53\u001b[0m, in \u001b[0;36mSentenceTransformerEmbeddingFunction.__init__\u001b[1;34m(self, model_name, device, normalize_embeddings)\u001b[0m\n\u001b[0;32m 49\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mImportError\u001b[39;00m:\n\u001b[0;32m 50\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[0;32m 51\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mThe sentence_transformers python package is not installed. Please install it with `pip install sentence_transformers`\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 52\u001b[0m )\n\u001b[1;32m---> 53\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mmodels[model_name] \u001b[39m=\u001b[39m SentenceTransformer(model_name, device\u001b[39m=\u001b[39;49mdevice)\n\u001b[0;32m 54\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_model \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mmodels[model_name]\n\u001b[0;32m 55\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_normalize_embeddings \u001b[39m=\u001b[39m normalize_embeddings\n", + "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\sentence_transformers\\SentenceTransformer.py:95\u001b[0m, in \u001b[0;36mSentenceTransformer.__init__\u001b[1;34m(self, model_name_or_path, modules, device, cache_folder, use_auth_token)\u001b[0m\n\u001b[0;32m 87\u001b[0m snapshot_download(model_name_or_path,\n\u001b[0;32m 88\u001b[0m cache_dir\u001b[39m=\u001b[39mcache_folder,\n\u001b[0;32m 89\u001b[0m library_name\u001b[39m=\u001b[39m\u001b[39m'\u001b[39m\u001b[39msentence-transformers\u001b[39m\u001b[39m'\u001b[39m,\n\u001b[0;32m 90\u001b[0m library_version\u001b[39m=\u001b[39m__version__,\n\u001b[0;32m 91\u001b[0m ignore_files\u001b[39m=\u001b[39m[\u001b[39m'\u001b[39m\u001b[39mflax_model.msgpack\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m'\u001b[39m\u001b[39mrust_model.ot\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m'\u001b[39m\u001b[39mtf_model.h5\u001b[39m\u001b[39m'\u001b[39m],\n\u001b[0;32m 92\u001b[0m use_auth_token\u001b[39m=\u001b[39muse_auth_token)\n\u001b[0;32m 94\u001b[0m \u001b[39mif\u001b[39;00m os\u001b[39m.\u001b[39mpath\u001b[39m.\u001b[39mexists(os\u001b[39m.\u001b[39mpath\u001b[39m.\u001b[39mjoin(model_path, \u001b[39m'\u001b[39m\u001b[39mmodules.json\u001b[39m\u001b[39m'\u001b[39m)): \u001b[39m#Load as SentenceTransformer model\u001b[39;00m\n\u001b[1;32m---> 95\u001b[0m modules \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_load_sbert_model(model_path)\n\u001b[0;32m 96\u001b[0m \u001b[39melse\u001b[39;00m: \u001b[39m#Load with AutoModel\u001b[39;00m\n\u001b[0;32m 97\u001b[0m modules \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_load_auto_model(model_path)\n", + "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\sentence_transformers\\SentenceTransformer.py:840\u001b[0m, in \u001b[0;36mSentenceTransformer._load_sbert_model\u001b[1;34m(self, model_path)\u001b[0m\n\u001b[0;32m 838\u001b[0m \u001b[39mfor\u001b[39;00m module_config \u001b[39min\u001b[39;00m modules_config:\n\u001b[0;32m 839\u001b[0m module_class \u001b[39m=\u001b[39m import_from_string(module_config[\u001b[39m'\u001b[39m\u001b[39mtype\u001b[39m\u001b[39m'\u001b[39m])\n\u001b[1;32m--> 840\u001b[0m module \u001b[39m=\u001b[39m module_class\u001b[39m.\u001b[39;49mload(os\u001b[39m.\u001b[39;49mpath\u001b[39m.\u001b[39;49mjoin(model_path, module_config[\u001b[39m'\u001b[39;49m\u001b[39mpath\u001b[39;49m\u001b[39m'\u001b[39;49m]))\n\u001b[0;32m 841\u001b[0m modules[module_config[\u001b[39m'\u001b[39m\u001b[39mname\u001b[39m\u001b[39m'\u001b[39m]] \u001b[39m=\u001b[39m module\n\u001b[0;32m 843\u001b[0m \u001b[39mreturn\u001b[39;00m modules\n", + "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\sentence_transformers\\models\\Transformer.py:137\u001b[0m, in \u001b[0;36mTransformer.load\u001b[1;34m(input_path)\u001b[0m\n\u001b[0;32m 135\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mopen\u001b[39m(sbert_config_path) \u001b[39mas\u001b[39;00m fIn:\n\u001b[0;32m 136\u001b[0m config \u001b[39m=\u001b[39m json\u001b[39m.\u001b[39mload(fIn)\n\u001b[1;32m--> 137\u001b[0m \u001b[39mreturn\u001b[39;00m Transformer(model_name_or_path\u001b[39m=\u001b[39minput_path, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mconfig)\n", + "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\sentence_transformers\\models\\Transformer.py:29\u001b[0m, in \u001b[0;36mTransformer.__init__\u001b[1;34m(self, model_name_or_path, max_seq_length, model_args, cache_dir, tokenizer_args, do_lower_case, tokenizer_name_or_path)\u001b[0m\n\u001b[0;32m 26\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mdo_lower_case \u001b[39m=\u001b[39m do_lower_case\n\u001b[0;32m 28\u001b[0m config \u001b[39m=\u001b[39m AutoConfig\u001b[39m.\u001b[39mfrom_pretrained(model_name_or_path, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mmodel_args, cache_dir\u001b[39m=\u001b[39mcache_dir)\n\u001b[1;32m---> 29\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_load_model(model_name_or_path, config, cache_dir)\n\u001b[0;32m 31\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtokenizer \u001b[39m=\u001b[39m AutoTokenizer\u001b[39m.\u001b[39mfrom_pretrained(tokenizer_name_or_path \u001b[39mif\u001b[39;00m tokenizer_name_or_path \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m \u001b[39melse\u001b[39;00m model_name_or_path, cache_dir\u001b[39m=\u001b[39mcache_dir, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mtokenizer_args)\n\u001b[0;32m 33\u001b[0m \u001b[39m#No max_seq_length set. Try to infer from model\u001b[39;00m\n", + "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\sentence_transformers\\models\\Transformer.py:49\u001b[0m, in \u001b[0;36mTransformer._load_model\u001b[1;34m(self, model_name_or_path, config, cache_dir)\u001b[0m\n\u001b[0;32m 47\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_load_t5_model(model_name_or_path, config, cache_dir)\n\u001b[0;32m 48\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m---> 49\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mauto_model \u001b[39m=\u001b[39m AutoModel\u001b[39m.\u001b[39;49mfrom_pretrained(model_name_or_path, config\u001b[39m=\u001b[39;49mconfig, cache_dir\u001b[39m=\u001b[39;49mcache_dir)\n", + "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\transformers\\models\\auto\\auto_factory.py:515\u001b[0m, in \u001b[0;36m_BaseAutoModelClass.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, *model_args, **kwargs)\u001b[0m\n\u001b[0;32m 511\u001b[0m \u001b[39mreturn\u001b[39;00m model_class\u001b[39m.\u001b[39mfrom_pretrained(\n\u001b[0;32m 512\u001b[0m pretrained_model_name_or_path, \u001b[39m*\u001b[39mmodel_args, config\u001b[39m=\u001b[39mconfig, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mhub_kwargs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs\n\u001b[0;32m 513\u001b[0m )\n\u001b[0;32m 514\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39mtype\u001b[39m(config) \u001b[39min\u001b[39;00m \u001b[39mcls\u001b[39m\u001b[39m.\u001b[39m_model_mapping\u001b[39m.\u001b[39mkeys():\n\u001b[1;32m--> 515\u001b[0m model_class \u001b[39m=\u001b[39m _get_model_class(config, \u001b[39mcls\u001b[39;49m\u001b[39m.\u001b[39;49m_model_mapping)\n\u001b[0;32m 516\u001b[0m \u001b[39mreturn\u001b[39;00m model_class\u001b[39m.\u001b[39mfrom_pretrained(\n\u001b[0;32m 517\u001b[0m pretrained_model_name_or_path, \u001b[39m*\u001b[39mmodel_args, config\u001b[39m=\u001b[39mconfig, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mhub_kwargs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs\n\u001b[0;32m 518\u001b[0m )\n\u001b[0;32m 519\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[0;32m 520\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mUnrecognized configuration class \u001b[39m\u001b[39m{\u001b[39;00mconfig\u001b[39m.\u001b[39m\u001b[39m__class__\u001b[39m\u001b[39m}\u001b[39;00m\u001b[39m for this kind of AutoModel: \u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mcls\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__name__\u001b[39m\u001b[39m}\u001b[39;00m\u001b[39m.\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[0;32m 521\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mModel type should be one of \u001b[39m\u001b[39m{\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m, \u001b[39m\u001b[39m'\u001b[39m\u001b[39m.\u001b[39mjoin(c\u001b[39m.\u001b[39m\u001b[39m__name__\u001b[39m \u001b[39mfor\u001b[39;00m c \u001b[39min\u001b[39;00m \u001b[39mcls\u001b[39m\u001b[39m.\u001b[39m_model_mapping\u001b[39m.\u001b[39mkeys())\u001b[39m}\u001b[39;00m\u001b[39m.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 522\u001b[0m )\n", + "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\transformers\\models\\auto\\auto_factory.py:377\u001b[0m, in \u001b[0;36m_get_model_class\u001b[1;34m(config, model_mapping)\u001b[0m\n\u001b[0;32m 376\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_get_model_class\u001b[39m(config, model_mapping):\n\u001b[1;32m--> 377\u001b[0m supported_models \u001b[39m=\u001b[39m model_mapping[\u001b[39mtype\u001b[39;49m(config)]\n\u001b[0;32m 378\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39misinstance\u001b[39m(supported_models, (\u001b[39mlist\u001b[39m, \u001b[39mtuple\u001b[39m)):\n\u001b[0;32m 379\u001b[0m \u001b[39mreturn\u001b[39;00m supported_models\n", + "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\transformers\\models\\auto\\auto_factory.py:690\u001b[0m, in \u001b[0;36m_LazyAutoMapping.__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 688\u001b[0m \u001b[39mif\u001b[39;00m model_type \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_model_mapping:\n\u001b[0;32m 689\u001b[0m model_name \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_model_mapping[model_type]\n\u001b[1;32m--> 690\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_load_attr_from_module(model_type, model_name)\n\u001b[0;32m 692\u001b[0m \u001b[39m# Maybe there was several model types associated with this config.\u001b[39;00m\n\u001b[0;32m 693\u001b[0m model_types \u001b[39m=\u001b[39m [k \u001b[39mfor\u001b[39;00m k, v \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_config_mapping\u001b[39m.\u001b[39mitems() \u001b[39mif\u001b[39;00m v \u001b[39m==\u001b[39m key\u001b[39m.\u001b[39m\u001b[39m__name__\u001b[39m]\n", + "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\transformers\\models\\auto\\auto_factory.py:704\u001b[0m, in \u001b[0;36m_LazyAutoMapping._load_attr_from_module\u001b[1;34m(self, model_type, attr)\u001b[0m\n\u001b[0;32m 702\u001b[0m \u001b[39mif\u001b[39;00m module_name \u001b[39mnot\u001b[39;00m \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_modules:\n\u001b[0;32m 703\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_modules[module_name] \u001b[39m=\u001b[39m importlib\u001b[39m.\u001b[39mimport_module(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m.\u001b[39m\u001b[39m{\u001b[39;00mmodule_name\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mtransformers.models\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m--> 704\u001b[0m \u001b[39mreturn\u001b[39;00m getattribute_from_module(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_modules[module_name], attr)\n", + "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\transformers\\models\\auto\\auto_factory.py:648\u001b[0m, in \u001b[0;36mgetattribute_from_module\u001b[1;34m(module, attr)\u001b[0m\n\u001b[0;32m 646\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(attr, \u001b[39mtuple\u001b[39m):\n\u001b[0;32m 647\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mtuple\u001b[39m(getattribute_from_module(module, a) \u001b[39mfor\u001b[39;00m a \u001b[39min\u001b[39;00m attr)\n\u001b[1;32m--> 648\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mhasattr\u001b[39;49m(module, attr):\n\u001b[0;32m 649\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mgetattr\u001b[39m(module, attr)\n\u001b[0;32m 650\u001b[0m \u001b[39m# Some of the mappings have entries model_type -> object of another model type. In that case we try to grab the\u001b[39;00m\n\u001b[0;32m 651\u001b[0m \u001b[39m# object at the top level.\u001b[39;00m\n", + "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\transformers\\utils\\import_utils.py:1120\u001b[0m, in \u001b[0;36m_LazyModule.__getattr__\u001b[1;34m(self, name)\u001b[0m\n\u001b[0;32m 1118\u001b[0m value \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_get_module(name)\n\u001b[0;32m 1119\u001b[0m \u001b[39melif\u001b[39;00m name \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_class_to_module\u001b[39m.\u001b[39mkeys():\n\u001b[1;32m-> 1120\u001b[0m module \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_get_module(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_class_to_module[name])\n\u001b[0;32m 1121\u001b[0m value \u001b[39m=\u001b[39m \u001b[39mgetattr\u001b[39m(module, name)\n\u001b[0;32m 1122\u001b[0m \u001b[39melse\u001b[39;00m:\n", + "File \u001b[1;32md:\\Anaconda3\\envs\\pytorchML\\lib\\site-packages\\transformers\\utils\\import_utils.py:1132\u001b[0m, in \u001b[0;36m_LazyModule._get_module\u001b[1;34m(self, module_name)\u001b[0m\n\u001b[0;32m 1130\u001b[0m \u001b[39mreturn\u001b[39;00m importlib\u001b[39m.\u001b[39mimport_module(\u001b[39m\"\u001b[39m\u001b[39m.\u001b[39m\u001b[39m\"\u001b[39m \u001b[39m+\u001b[39m module_name, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__name__\u001b[39m)\n\u001b[0;32m 1131\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m-> 1132\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\n\u001b[0;32m 1133\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mFailed to import \u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__name__\u001b[39m\u001b[39m}\u001b[39;00m\u001b[39m.\u001b[39m\u001b[39m{\u001b[39;00mmodule_name\u001b[39m}\u001b[39;00m\u001b[39m because of the following error (look up to see its\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 1134\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m traceback):\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m{\u001b[39;00me\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[0;32m 1135\u001b[0m ) \u001b[39mfrom\u001b[39;00m \u001b[39me\u001b[39;00m\n", + "\u001b[1;31mRuntimeError\u001b[0m: Failed to import transformers.models.bert.modeling_bert because of the following error (look up to see its traceback):\n\n CUDA Setup failed despite GPU being available. Please run the following command to get more information:\n\n python -m bitsandbytes\n\n Inspect the output of the command and see if you can locate CUDA libraries. You might need to add them\n to your LD_LIBRARY_PATH. If you suspect a bug, please take the information from python -m bitsandbytes\n and open an issue at: https://github.com/TimDettmers/bitsandbytes/issues" + ] + } + ], + "source": [ + "sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(\n", + " model_name=\"all-MiniLM-L6-v2\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for csv in track(csvs):\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "pytorchML", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}