ireneng commited on
Commit
44de051
·
1 Parent(s): ebadee5
Files changed (9) hide show
  1. .gitattributes +3 -0
  2. Book_Rec_Sum_Models.ipynb +0 -0
  3. README.md +2 -2
  4. app.ipynb +288 -0
  5. app.py +126 -0
  6. data/BX_Books.csv +3 -0
  7. dataloader.pkl +3 -0
  8. myModel.pth +3 -0
  9. requirements.txt +8 -0
.gitattributes CHANGED
@@ -1,2 +1,5 @@
1
  # Auto detect text files and perform LF normalization
2
  * text=auto
 
 
 
 
1
  # Auto detect text files and perform LF normalization
2
  * text=auto
3
+ myModel.pth filter=lfs diff=lfs merge=lfs -text
4
+ dataloader.pkl filter=lfs diff=lfs merge=lfs -text
5
+ data/BX_Books.csv filter=lfs diff=lfs merge=lfs -text
Book_Rec_Sum_Models.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
README.md CHANGED
@@ -1,2 +1,2 @@
1
- # book-rec
2
-
 
1
+ # Book-Recommendation
2
+ A deep-learning powered book recommendation and summarization web app
app.ipynb ADDED
@@ -0,0 +1,288 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "attachments": {},
5
+ "cell_type": "markdown",
6
+ "metadata": {},
7
+ "source": [
8
+ "This file is a space to construct functions, experiment and see changes directly instead of having to reload the app everytime. It serves as the draft for app.py and contains similar functions except for the streamlit app component"
9
+ ]
10
+ },
11
+ {
12
+ "cell_type": "code",
13
+ "execution_count": 2,
14
+ "metadata": {},
15
+ "outputs": [],
16
+ "source": [
17
+ "import streamlit as st\n",
18
+ "from fastai.collab import *\n",
19
+ "import torch\n",
20
+ "from torch import nn\n",
21
+ "import pickle\n",
22
+ "import pandas as pd\n",
23
+ "from transformers import PegasusForConditionalGeneration, PegasusTokenizer\n",
24
+ "import sentencepiece\n",
25
+ "import string\n",
26
+ "import requests"
27
+ ]
28
+ },
29
+ {
30
+ "cell_type": "code",
31
+ "execution_count": 89,
32
+ "metadata": {},
33
+ "outputs": [
34
+ {
35
+ "ename": "KeyboardInterrupt",
36
+ "evalue": "",
37
+ "output_type": "error",
38
+ "traceback": [
39
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
40
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
41
+ "Cell \u001b[0;32mIn[89], line 5\u001b[0m\n\u001b[1;32m 2\u001b[0m dls\u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39mread_pickle(\u001b[39m'\u001b[39m\u001b[39mdataloader.pkl\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[1;32m 4\u001b[0m \u001b[39m# Create an instance of the model\u001b[39;00m\n\u001b[0;32m----> 5\u001b[0m learn \u001b[39m=\u001b[39m collab_learner(dls, use_nn\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m,layers\u001b[39m=\u001b[39;49m[\u001b[39m20\u001b[39;49m,\u001b[39m10\u001b[39;49m],y_range\u001b[39m=\u001b[39;49m(\u001b[39m0\u001b[39;49m,\u001b[39m10.5\u001b[39;49m))\n\u001b[1;32m 7\u001b[0m \u001b[39m# Load the saved state dictionary\u001b[39;00m\n\u001b[1;32m 8\u001b[0m state_dict \u001b[39m=\u001b[39m torch\u001b[39m.\u001b[39mload(\u001b[39m'\u001b[39m\u001b[39mmyModel.pth\u001b[39m\u001b[39m'\u001b[39m,map_location\u001b[39m=\u001b[39mtorch\u001b[39m.\u001b[39mdevice(\u001b[39m'\u001b[39m\u001b[39mcpu\u001b[39m\u001b[39m'\u001b[39m))\n",
42
+ "File \u001b[0;32m~/mambaforge/lib/python3.10/site-packages/fastai/collab.py:100\u001b[0m, in \u001b[0;36mcollab_learner\u001b[0;34m(dls, n_factors, use_nn, emb_szs, layers, config, y_range, loss_func, **kwargs)\u001b[0m\n\u001b[1;32m 98\u001b[0m \u001b[39mif\u001b[39;00m y_range \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m: config[\u001b[39m'\u001b[39m\u001b[39my_range\u001b[39m\u001b[39m'\u001b[39m] \u001b[39m=\u001b[39m y_range\n\u001b[1;32m 99\u001b[0m \u001b[39mif\u001b[39;00m layers \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m: layers \u001b[39m=\u001b[39m [n_factors]\n\u001b[0;32m--> 100\u001b[0m \u001b[39mif\u001b[39;00m use_nn: model \u001b[39m=\u001b[39m EmbeddingNN(emb_szs\u001b[39m=\u001b[39;49memb_szs, layers\u001b[39m=\u001b[39;49mlayers, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mconfig)\n\u001b[1;32m 101\u001b[0m \u001b[39melse\u001b[39;00m: model \u001b[39m=\u001b[39m EmbeddingDotBias\u001b[39m.\u001b[39mfrom_classes(n_factors, dls\u001b[39m.\u001b[39mclasses, y_range\u001b[39m=\u001b[39my_range)\n\u001b[1;32m 102\u001b[0m \u001b[39mreturn\u001b[39;00m Learner(dls, model, loss_func\u001b[39m=\u001b[39mloss_func, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n",
43
+ "File \u001b[0;32m~/mambaforge/lib/python3.10/site-packages/fastcore/meta.py:40\u001b[0m, in \u001b[0;36mPrePostInitMeta.__call__\u001b[0;34m(cls, *args, **kwargs)\u001b[0m\n\u001b[1;32m 38\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mtype\u001b[39m(res)\u001b[39m==\u001b[39m\u001b[39mcls\u001b[39m:\n\u001b[1;32m 39\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mhasattr\u001b[39m(res,\u001b[39m'\u001b[39m\u001b[39m__pre_init__\u001b[39m\u001b[39m'\u001b[39m): res\u001b[39m.\u001b[39m__pre_init__(\u001b[39m*\u001b[39margs,\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m---> 40\u001b[0m res\u001b[39m.\u001b[39;49m\u001b[39m__init__\u001b[39;49m(\u001b[39m*\u001b[39;49margs,\u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 41\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mhasattr\u001b[39m(res,\u001b[39m'\u001b[39m\u001b[39m__post_init__\u001b[39m\u001b[39m'\u001b[39m): res\u001b[39m.\u001b[39m__post_init__(\u001b[39m*\u001b[39margs,\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[1;32m 42\u001b[0m \u001b[39mreturn\u001b[39;00m res\n",
44
+ "File \u001b[0;32m~/mambaforge/lib/python3.10/site-packages/fastai/collab.py:89\u001b[0m, in \u001b[0;36mEmbeddingNN.__init__\u001b[0;34m(self, emb_szs, layers, **kwargs)\u001b[0m\n\u001b[1;32m 87\u001b[0m \u001b[39m@delegates\u001b[39m(TabularModel\u001b[39m.\u001b[39m\u001b[39m__init__\u001b[39m)\n\u001b[1;32m 88\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__init__\u001b[39m(\u001b[39mself\u001b[39m, emb_szs, layers, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs):\n\u001b[0;32m---> 89\u001b[0m \u001b[39msuper\u001b[39;49m()\u001b[39m.\u001b[39;49m\u001b[39m__init__\u001b[39;49m(emb_szs\u001b[39m=\u001b[39;49memb_szs, n_cont\u001b[39m=\u001b[39;49m\u001b[39m0\u001b[39;49m, out_sz\u001b[39m=\u001b[39;49m\u001b[39m1\u001b[39;49m, layers\u001b[39m=\u001b[39;49mlayers, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n",
45
+ "File \u001b[0;32m~/mambaforge/lib/python3.10/site-packages/fastai/tabular/model.py:53\u001b[0m, in \u001b[0;36mTabularModel.__init__\u001b[0;34m(self, emb_szs, n_cont, out_sz, layers, ps, embed_p, y_range, use_bn, bn_final, bn_cont, act_cls, lin_first)\u001b[0m\n\u001b[1;32m 51\u001b[0m ps \u001b[39m=\u001b[39m ifnone(ps, [\u001b[39m0\u001b[39m]\u001b[39m*\u001b[39m\u001b[39mlen\u001b[39m(layers))\n\u001b[1;32m 52\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m is_listy(ps): ps \u001b[39m=\u001b[39m [ps]\u001b[39m*\u001b[39m\u001b[39mlen\u001b[39m(layers)\n\u001b[0;32m---> 53\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39membeds \u001b[39m=\u001b[39m nn\u001b[39m.\u001b[39mModuleList([Embedding(ni, nf) \u001b[39mfor\u001b[39;00m ni,nf \u001b[39min\u001b[39;00m emb_szs])\n\u001b[1;32m 54\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39memb_drop \u001b[39m=\u001b[39m nn\u001b[39m.\u001b[39mDropout(embed_p)\n\u001b[1;32m 55\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mbn_cont \u001b[39m=\u001b[39m nn\u001b[39m.\u001b[39mBatchNorm1d(n_cont) \u001b[39mif\u001b[39;00m bn_cont \u001b[39melse\u001b[39;00m \u001b[39mNone\u001b[39;00m\n",
46
+ "File \u001b[0;32m~/mambaforge/lib/python3.10/site-packages/fastai/tabular/model.py:53\u001b[0m, in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 51\u001b[0m ps \u001b[39m=\u001b[39m ifnone(ps, [\u001b[39m0\u001b[39m]\u001b[39m*\u001b[39m\u001b[39mlen\u001b[39m(layers))\n\u001b[1;32m 52\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m is_listy(ps): ps \u001b[39m=\u001b[39m [ps]\u001b[39m*\u001b[39m\u001b[39mlen\u001b[39m(layers)\n\u001b[0;32m---> 53\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39membeds \u001b[39m=\u001b[39m nn\u001b[39m.\u001b[39mModuleList([Embedding(ni, nf) \u001b[39mfor\u001b[39;00m ni,nf \u001b[39min\u001b[39;00m emb_szs])\n\u001b[1;32m 54\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39memb_drop \u001b[39m=\u001b[39m nn\u001b[39m.\u001b[39mDropout(embed_p)\n\u001b[1;32m 55\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mbn_cont \u001b[39m=\u001b[39m nn\u001b[39m.\u001b[39mBatchNorm1d(n_cont) \u001b[39mif\u001b[39;00m bn_cont \u001b[39melse\u001b[39;00m \u001b[39mNone\u001b[39;00m\n",
47
+ "File \u001b[0;32m~/mambaforge/lib/python3.10/site-packages/fastai/layers.py:291\u001b[0m, in \u001b[0;36mEmbedding.__init__\u001b[0;34m(self, ni, nf, std)\u001b[0m\n\u001b[1;32m 290\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__init__\u001b[39m(\u001b[39mself\u001b[39m, ni, nf, std\u001b[39m=\u001b[39m\u001b[39m0.01\u001b[39m):\n\u001b[0;32m--> 291\u001b[0m \u001b[39msuper\u001b[39;49m()\u001b[39m.\u001b[39;49m\u001b[39m__init__\u001b[39;49m(ni, nf)\n\u001b[1;32m 292\u001b[0m trunc_normal_(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mweight\u001b[39m.\u001b[39mdata, std\u001b[39m=\u001b[39mstd)\n",
48
+ "File \u001b[0;32m~/mambaforge/lib/python3.10/site-packages/torch/nn/modules/sparse.py:142\u001b[0m, in \u001b[0;36mEmbedding.__init__\u001b[0;34m(self, num_embeddings, embedding_dim, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse, _weight, device, dtype)\u001b[0m\n\u001b[1;32m 140\u001b[0m \u001b[39mif\u001b[39;00m _weight \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 141\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mweight \u001b[39m=\u001b[39m Parameter(torch\u001b[39m.\u001b[39mempty((num_embeddings, embedding_dim), \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mfactory_kwargs))\n\u001b[0;32m--> 142\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mreset_parameters()\n\u001b[1;32m 143\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 144\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39mlist\u001b[39m(_weight\u001b[39m.\u001b[39mshape) \u001b[39m==\u001b[39m [num_embeddings, embedding_dim], \\\n\u001b[1;32m 145\u001b[0m \u001b[39m'\u001b[39m\u001b[39mShape of weight does not match num_embeddings and embedding_dim\u001b[39m\u001b[39m'\u001b[39m\n",
49
+ "File \u001b[0;32m~/mambaforge/lib/python3.10/site-packages/torch/nn/modules/sparse.py:151\u001b[0m, in \u001b[0;36mEmbedding.reset_parameters\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 150\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mreset_parameters\u001b[39m(\u001b[39mself\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> 151\u001b[0m init\u001b[39m.\u001b[39;49mnormal_(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mweight)\n\u001b[1;32m 152\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_fill_padding_idx_with_zero()\n",
50
+ "File \u001b[0;32m~/mambaforge/lib/python3.10/site-packages/torch/nn/init.py:155\u001b[0m, in \u001b[0;36mnormal_\u001b[0;34m(tensor, mean, std)\u001b[0m\n\u001b[1;32m 153\u001b[0m \u001b[39mif\u001b[39;00m torch\u001b[39m.\u001b[39moverrides\u001b[39m.\u001b[39mhas_torch_function_variadic(tensor):\n\u001b[1;32m 154\u001b[0m \u001b[39mreturn\u001b[39;00m torch\u001b[39m.\u001b[39moverrides\u001b[39m.\u001b[39mhandle_torch_function(normal_, (tensor,), tensor\u001b[39m=\u001b[39mtensor, mean\u001b[39m=\u001b[39mmean, std\u001b[39m=\u001b[39mstd)\n\u001b[0;32m--> 155\u001b[0m \u001b[39mreturn\u001b[39;00m _no_grad_normal_(tensor, mean, std)\n",
51
+ "File \u001b[0;32m~/mambaforge/lib/python3.10/site-packages/torch/nn/init.py:19\u001b[0m, in \u001b[0;36m_no_grad_normal_\u001b[0;34m(tensor, mean, std)\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_no_grad_normal_\u001b[39m(tensor, mean, std):\n\u001b[1;32m 18\u001b[0m \u001b[39mwith\u001b[39;00m torch\u001b[39m.\u001b[39mno_grad():\n\u001b[0;32m---> 19\u001b[0m \u001b[39mreturn\u001b[39;00m tensor\u001b[39m.\u001b[39;49mnormal_(mean, std)\n",
52
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
53
+ ]
54
+ }
55
+ ],
56
+ "source": [
57
+ "# Load the data loader \n",
58
+ "dls= pd.read_pickle('dataloader.pkl')\n",
59
+ "\n",
60
+ "# Create an instance of the model\n",
61
+ "learn = collab_learner(dls, use_nn=True,layers=[20,10],y_range=(0,10.5))\n",
62
+ "\n",
63
+ "# Load the saved state dictionary\n",
64
+ "state_dict = torch.load('myModel.pth',map_location=torch.device('cpu'))\n",
65
+ "\n",
66
+ "# Assign the loaded state dictionary to the model's load_state_dict() method\n",
67
+ "learn.model.load_state_dict(state_dict)"
68
+ ]
69
+ },
70
+ {
71
+ "cell_type": "code",
72
+ "execution_count": null,
73
+ "metadata": {},
74
+ "outputs": [],
75
+ "source": [
76
+ "def get_3_recs(book):\n",
77
+ " book_factors = learn.model.embeds[1].weight\n",
78
+ " idx = dls.classes['title'].o2i[book]\n",
79
+ " distances = nn.CosineSimilarity(dim=1)(book_factors, book_factors[idx][None])\n",
80
+ " idxs = distances.argsort(descending=True)[1:4]\n",
81
+ " recs = [dls.classes['title'][i] for i in idxs]\n",
82
+ " return recs"
83
+ ]
84
+ },
85
+ {
86
+ "cell_type": "code",
87
+ "execution_count": 3,
88
+ "metadata": {},
89
+ "outputs": [],
90
+ "source": [
91
+ "#load books dataframe\n",
92
+ "books_df = pd.read_csv('./data/BX_Books.csv', sep=';',encoding='latin-1')"
93
+ ]
94
+ },
95
+ {
96
+ "cell_type": "code",
97
+ "execution_count": 12,
98
+ "metadata": {},
99
+ "outputs": [
100
+ {
101
+ "data": {
102
+ "text/plain": [
103
+ "['http://images.amazon.com/images/P/0451524934.01.LZZZZZZZ.jpg',\n",
104
+ " 'http://images.amazon.com/images/P/185326041X.01.LZZZZZZZ.jpg']"
105
+ ]
106
+ },
107
+ "execution_count": 12,
108
+ "metadata": {},
109
+ "output_type": "execute_result"
110
+ }
111
+ ],
112
+ "source": [
113
+ "#get covers\n",
114
+ "def get_covers(recs):\n",
115
+ " imgs = [books_df[books_df['Book-Title']==r]['Image-URL-L'].tolist()[0]for r in recs]\n",
116
+ " return imgs\n",
117
+ "\n",
118
+ "get_covers(['1984', 'The Great Gatsby'])"
119
+ ]
120
+ },
121
+ {
122
+ "cell_type": "code",
123
+ "execution_count": null,
124
+ "metadata": {},
125
+ "outputs": [
126
+ {
127
+ "name": "stderr",
128
+ "output_type": "stream",
129
+ "text": [
130
+ "2023-06-24 16:15:04.552 \n",
131
+ " \u001b[33m\u001b[1mWarning:\u001b[0m to view this Streamlit app on a browser, run it with the following\n",
132
+ " command:\n",
133
+ "\n",
134
+ " streamlit run /Users/irenenguyen/mambaforge/lib/python3.10/site-packages/ipykernel_launcher.py [ARGUMENTS]\n"
135
+ ]
136
+ }
137
+ ],
138
+ "source": [
139
+ "user_input = st.text_input(\"What's your favorite book?\")\n",
140
+ "recs = get_3_recs(user_input)\n",
141
+ "st.write(\"Try these books:\", \",\".join(recs))"
142
+ ]
143
+ },
144
+ {
145
+ "attachments": {},
146
+ "cell_type": "markdown",
147
+ "metadata": {},
148
+ "source": [
149
+ "Description Summarizer"
150
+ ]
151
+ },
152
+ {
153
+ "attachments": {},
154
+ "cell_type": "markdown",
155
+ "metadata": {},
156
+ "source": [
157
+ "Getting book description from Google Books API"
158
+ ]
159
+ },
160
+ {
161
+ "cell_type": "code",
162
+ "execution_count": null,
163
+ "metadata": {},
164
+ "outputs": [],
165
+ "source": [
166
+ "def search_book_description(title):\n",
167
+ " # Google Books API endpoint for book search\n",
168
+ " url = \"https://www.googleapis.com/books/v1/volumes\"\n",
169
+ "\n",
170
+ " # Parameters for the book search\n",
171
+ " params = {\n",
172
+ " \"q\": title,\n",
173
+ " \"maxResults\": 1\n",
174
+ " }\n",
175
+ "\n",
176
+ " # Send GET request to Google Books API\n",
177
+ " response = requests.get(url, params=params)\n",
178
+ "\n",
179
+ " # Check if the request was successful\n",
180
+ " if response.status_code == 200:\n",
181
+ " # Parse the JSON response to extract the book description\n",
182
+ " data = response.json()\n",
183
+ "\n",
184
+ " if \"items\" in data and len(data[\"items\"]) > 0:\n",
185
+ " book_description = data[\"items\"][0][\"volumeInfo\"].get(\"description\", \"No description available.\")\n",
186
+ " return book_description\n",
187
+ " else:\n",
188
+ " print(\"No book found with the given title.\")\n",
189
+ " return None\n",
190
+ " else:\n",
191
+ " # If the request failed, print the error message\n",
192
+ " print(\"Error:\", response.status_code, response.text)\n",
193
+ " return None"
194
+ ]
195
+ },
196
+ {
197
+ "attachments": {},
198
+ "cell_type": "markdown",
199
+ "metadata": {},
200
+ "source": [
201
+ "Summarization Model"
202
+ ]
203
+ },
204
+ {
205
+ "cell_type": "code",
206
+ "execution_count": null,
207
+ "metadata": {},
208
+ "outputs": [
209
+ {
210
+ "name": "stderr",
211
+ "output_type": "stream",
212
+ "text": [
213
+ "You are using a model of type pegasus_x to instantiate a model of type pegasus. This is not supported for all configurations of models and can yield errors.\n",
214
+ "Some weights of the model checkpoint at pszemraj/pegasus-x-large-book-summary were not used when initializing PegasusForConditionalGeneration: ['model.encoder.layers.5.global_self_attn_layer_norm.bias', 'model.encoder.layers.2.global_self_attn_layer_norm.weight', 'model.encoder.layers.15.global_self_attn_layer_norm.weight', 'model.encoder.layers.11.global_self_attn_layer_norm.weight', 'model.encoder.layers.12.global_self_attn_layer_norm.bias', 'model.encoder.layers.4.global_self_attn_layer_norm.weight', 'model.encoder.layers.0.global_self_attn_layer_norm.bias', 'model.encoder.layers.10.global_self_attn_layer_norm.bias', 'model.encoder.layers.5.global_self_attn_layer_norm.weight', 'model.encoder.layers.7.global_self_attn_layer_norm.bias', 'model.encoder.layers.11.global_self_attn_layer_norm.bias', 'model.encoder.layers.13.global_self_attn_layer_norm.bias', 'model.encoder.layers.13.global_self_attn_layer_norm.weight', 'model.encoder.layers.14.global_self_attn_layer_norm.weight', 'model.encoder.layers.9.global_self_attn_layer_norm.weight', 'model.encoder.layers.8.global_self_attn_layer_norm.weight', 'model.encoder.layers.3.global_self_attn_layer_norm.weight', 'model.encoder.layers.4.global_self_attn_layer_norm.bias', 'model.encoder.layers.14.global_self_attn_layer_norm.bias', 'model.encoder.layers.10.global_self_attn_layer_norm.weight', 'model.encoder.layers.6.global_self_attn_layer_norm.bias', 'model.encoder.layers.2.global_self_attn_layer_norm.bias', 'model.encoder.layers.1.global_self_attn_layer_norm.weight', 'model.encoder.layers.9.global_self_attn_layer_norm.bias', 'model.encoder.layers.0.global_self_attn_layer_norm.weight', 'model.encoder.layers.12.global_self_attn_layer_norm.weight', 'model.encoder.layers.7.global_self_attn_layer_norm.weight', 'model.encoder.layers.3.global_self_attn_layer_norm.bias', 'model.encoder.layers.6.global_self_attn_layer_norm.weight', 'model.encoder.layers.15.global_self_attn_layer_norm.bias', 'model.encoder.embed_global.weight', 'model.encoder.layers.1.global_self_attn_layer_norm.bias', 'model.encoder.layers.8.global_self_attn_layer_norm.bias']\n",
215
+ "- This IS expected if you are initializing PegasusForConditionalGeneration from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
216
+ "- This IS NOT expected if you are initializing PegasusForConditionalGeneration from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
217
+ "Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at pszemraj/pegasus-x-large-book-summary and are newly initialized: ['model.decoder.layers.7.encoder_attn.q_proj.bias', 'model.encoder.layers.6.self_attn.q_proj.bias', 'model.decoder.layers.4.self_attn.out_proj.bias', 'model.decoder.layers.6.encoder_attn.q_proj.bias', 'model.decoder.layers.12.self_attn.v_proj.bias', 'model.decoder.layers.6.encoder_attn.k_proj.bias', 'model.decoder.layers.7.encoder_attn.out_proj.bias', 'model.decoder.layers.3.encoder_attn.v_proj.bias', 'model.decoder.layers.7.encoder_attn.v_proj.bias', 'model.decoder.layers.13.encoder_attn.out_proj.bias', 'model.decoder.layers.10.self_attn.k_proj.bias', 'model.decoder.layers.2.self_attn.out_proj.bias', 'model.encoder.layers.12.self_attn.k_proj.bias', 'model.decoder.layers.9.encoder_attn.k_proj.bias', 'model.decoder.layers.11.encoder_attn.v_proj.bias', 'model.decoder.layers.7.encoder_attn.k_proj.bias', 'model.encoder.layers.7.self_attn.v_proj.bias', 'model.encoder.layers.12.self_attn.q_proj.bias', 'model.encoder.layers.0.self_attn.v_proj.bias', 'model.decoder.layers.2.self_attn.v_proj.bias', 'model.decoder.layers.11.self_attn.q_proj.bias', 'model.decoder.layers.5.self_attn.v_proj.bias', 'model.decoder.layers.15.self_attn.v_proj.bias', 'model.decoder.layers.0.encoder_attn.v_proj.bias', 'model.decoder.layers.12.self_attn.out_proj.bias', 'model.encoder.layers.0.self_attn.out_proj.bias', 'model.encoder.layers.10.self_attn.out_proj.bias', 'model.encoder.layers.13.self_attn.k_proj.bias', 'model.encoder.layers.10.self_attn.v_proj.bias', 'model.decoder.layers.6.self_attn.out_proj.bias', 'model.decoder.layers.14.encoder_attn.out_proj.bias', 'model.decoder.layers.3.encoder_attn.out_proj.bias', 'model.encoder.layers.14.self_attn.q_proj.bias', 'model.decoder.layers.2.encoder_attn.out_proj.bias', 'model.decoder.layers.6.encoder_attn.v_proj.bias', 'model.decoder.layers.10.self_attn.v_proj.bias', 'model.encoder.layers.9.self_attn.q_proj.bias', 'model.decoder.layers.4.encoder_attn.q_proj.bias', 'model.decoder.layers.5.encoder_attn.k_proj.bias', 'model.encoder.layers.14.self_attn.k_proj.bias', 'model.decoder.layers.7.self_attn.out_proj.bias', 'model.decoder.layers.0.encoder_attn.out_proj.bias', 'model.decoder.layers.4.encoder_attn.v_proj.bias', 'model.encoder.layers.13.self_attn.out_proj.bias', 'model.decoder.layers.5.self_attn.out_proj.bias', 'model.decoder.layers.2.self_attn.k_proj.bias', 'model.decoder.layers.0.self_attn.k_proj.bias', 'model.decoder.layers.1.self_attn.v_proj.bias', 'model.decoder.layers.4.self_attn.k_proj.bias', 'model.encoder.layers.3.self_attn.v_proj.bias', 'model.decoder.layers.10.encoder_attn.v_proj.bias', 'model.encoder.layers.10.self_attn.k_proj.bias', 'model.decoder.layers.14.encoder_attn.q_proj.bias', 'model.encoder.layers.11.self_attn.k_proj.bias', 'model.decoder.layers.7.self_attn.q_proj.bias', 'model.encoder.layers.9.self_attn.v_proj.bias', 'model.decoder.layers.5.self_attn.k_proj.bias', 'model.decoder.layers.8.encoder_attn.out_proj.bias', 'model.decoder.layers.1.encoder_attn.out_proj.bias', 'model.decoder.layers.14.encoder_attn.k_proj.bias', 'model.decoder.layers.4.encoder_attn.k_proj.bias', 'model.decoder.layers.6.self_attn.q_proj.bias', 'model.decoder.layers.3.encoder_attn.q_proj.bias', 'model.decoder.layers.8.self_attn.out_proj.bias', 'model.encoder.layers.8.self_attn.q_proj.bias', 'model.decoder.layers.4.self_attn.q_proj.bias', 'model.decoder.layers.11.self_attn.k_proj.bias', 'model.decoder.layers.9.self_attn.out_proj.bias', 'model.decoder.layers.10.encoder_attn.q_proj.bias', 'model.encoder.layers.3.self_attn.out_proj.bias', 'model.encoder.layers.12.self_attn.v_proj.bias', 'model.encoder.layers.6.self_attn.out_proj.bias', 'model.decoder.layers.2.encoder_attn.v_proj.bias', 'model.decoder.layers.3.self_attn.out_proj.bias', 'model.encoder.layers.11.self_attn.q_proj.bias', 'model.encoder.layers.13.self_attn.q_proj.bias', 'model.decoder.layers.14.self_attn.out_proj.bias', 'model.encoder.layers.13.self_attn.v_proj.bias', 'model.decoder.layers.0.self_attn.v_proj.bias', 'model.decoder.layers.2.self_attn.q_proj.bias', 'model.decoder.layers.13.self_attn.k_proj.bias', 'model.encoder.layers.8.self_attn.k_proj.bias', 'model.decoder.layers.14.self_attn.k_proj.bias', 'model.decoder.layers.3.encoder_attn.k_proj.bias', 'model.encoder.layers.8.self_attn.v_proj.bias', 'model.decoder.layers.10.encoder_attn.out_proj.bias', 'model.decoder.layers.0.encoder_attn.q_proj.bias', 'model.decoder.layers.11.encoder_attn.k_proj.bias', 'model.decoder.layers.8.self_attn.q_proj.bias', 'model.decoder.layers.15.encoder_attn.out_proj.bias', 'model.encoder.layers.7.self_attn.q_proj.bias', 'model.encoder.layers.12.self_attn.out_proj.bias', 'model.decoder.layers.12.self_attn.q_proj.bias', 'model.encoder.layers.15.self_attn.out_proj.bias', 'model.encoder.layers.15.self_attn.v_proj.bias', 'model.encoder.layers.5.self_attn.q_proj.bias', 'model.decoder.layers.13.encoder_attn.q_proj.bias', 'model.decoder.layers.7.self_attn.k_proj.bias', 'model.decoder.layers.12.self_attn.k_proj.bias', 'model.decoder.layers.10.self_attn.q_proj.bias', 'model.encoder.layers.3.self_attn.k_proj.bias', 'model.decoder.layers.13.self_attn.v_proj.bias', 'model.decoder.layers.0.encoder_attn.k_proj.bias', 'model.decoder.layers.15.encoder_attn.k_proj.bias', 'model.decoder.layers.6.encoder_attn.out_proj.bias', 'model.decoder.layers.4.encoder_attn.out_proj.bias', 'model.decoder.layers.15.self_attn.q_proj.bias', 'model.encoder.layers.15.self_attn.k_proj.bias', 'model.encoder.layers.2.self_attn.k_proj.bias', 'model.encoder.layers.14.self_attn.v_proj.bias', 'model.decoder.layers.8.encoder_attn.k_proj.bias', 'model.encoder.layers.4.self_attn.q_proj.bias', 'model.encoder.layers.8.self_attn.out_proj.bias', 'model.encoder.layers.1.self_attn.q_proj.bias', 'model.decoder.layers.12.encoder_attn.v_proj.bias', 'model.decoder.layers.2.encoder_attn.k_proj.bias', 'model.encoder.layers.1.self_attn.v_proj.bias', 'model.encoder.layers.4.self_attn.k_proj.bias', 'model.encoder.layers.4.self_attn.out_proj.bias', 'model.decoder.layers.3.self_attn.v_proj.bias', 'model.decoder.layers.14.self_attn.v_proj.bias', 'model.decoder.layers.3.self_attn.q_proj.bias', 'model.decoder.layers.13.encoder_attn.k_proj.bias', 'model.decoder.layers.15.encoder_attn.v_proj.bias', 'model.decoder.layers.0.self_attn.q_proj.bias', 'model.encoder.layers.1.self_attn.k_proj.bias', 'model.decoder.layers.13.self_attn.q_proj.bias', 'model.decoder.layers.5.encoder_attn.out_proj.bias', 'model.decoder.layers.12.encoder_attn.q_proj.bias', 'model.encoder.layers.7.self_attn.k_proj.bias', 'model.encoder.layers.0.self_attn.k_proj.bias', 'model.decoder.layers.7.self_attn.v_proj.bias', 'model.decoder.layers.13.encoder_attn.v_proj.bias', 'model.decoder.layers.12.encoder_attn.k_proj.bias', 'model.decoder.layers.5.self_attn.q_proj.bias', 'model.decoder.layers.11.encoder_attn.q_proj.bias', 'model.decoder.layers.8.self_attn.k_proj.bias', 'model.encoder.layers.1.self_attn.out_proj.bias', 'model.encoder.layers.5.self_attn.out_proj.bias', 'model.decoder.layers.1.self_attn.q_proj.bias', 'model.decoder.layers.1.self_attn.k_proj.bias', 'model.encoder.layers.15.self_attn.q_proj.bias', 'model.encoder.layers.9.self_attn.k_proj.bias', 'model.decoder.layers.9.self_attn.k_proj.bias', 'model.encoder.layers.6.self_attn.k_proj.bias', 'model.decoder.layers.1.encoder_attn.v_proj.bias', 'model.decoder.layers.9.encoder_attn.q_proj.bias', 'model.encoder.layers.11.self_attn.out_proj.bias', 'model.decoder.layers.6.self_attn.v_proj.bias', 'model.decoder.layers.9.encoder_attn.v_proj.bias', 'model.decoder.layers.14.encoder_attn.v_proj.bias', 'model.decoder.layers.11.self_attn.v_proj.bias', 'model.decoder.layers.1.self_attn.out_proj.bias', 'model.decoder.layers.11.encoder_attn.out_proj.bias', 'model.decoder.layers.5.encoder_attn.v_proj.bias', 'model.decoder.layers.15.encoder_attn.q_proj.bias', 'model.encoder.layers.7.self_attn.out_proj.bias', 'model.decoder.layers.2.encoder_attn.q_proj.bias', 'model.encoder.layers.4.self_attn.v_proj.bias', 'model.encoder.layers.2.self_attn.q_proj.bias', 'model.decoder.layers.9.self_attn.v_proj.bias', 'model.decoder.layers.8.encoder_attn.q_proj.bias', 'model.decoder.layers.8.encoder_attn.v_proj.bias', 'model.encoder.layers.10.self_attn.q_proj.bias', 'model.decoder.layers.5.encoder_attn.q_proj.bias', 'model.decoder.layers.13.self_attn.out_proj.bias', 'model.decoder.layers.1.encoder_attn.k_proj.bias', 'model.encoder.layers.11.self_attn.v_proj.bias', 'model.decoder.layers.15.self_attn.out_proj.bias', 'model.decoder.layers.0.self_attn.out_proj.bias', 'model.encoder.layers.3.self_attn.q_proj.bias', 'model.encoder.layers.2.self_attn.v_proj.bias', 'model.decoder.layers.15.self_attn.k_proj.bias', 'model.decoder.layers.4.self_attn.v_proj.bias', 'model.encoder.layers.9.self_attn.out_proj.bias', 'model.encoder.layers.5.self_attn.v_proj.bias', 'model.encoder.layers.14.self_attn.out_proj.bias', 'model.decoder.layers.1.encoder_attn.q_proj.bias', 'model.decoder.layers.10.encoder_attn.k_proj.bias', 'model.decoder.layers.10.self_attn.out_proj.bias', 'model.decoder.layers.14.self_attn.q_proj.bias', 'model.decoder.layers.12.encoder_attn.out_proj.bias', 'model.decoder.layers.6.self_attn.k_proj.bias', 'model.decoder.layers.11.self_attn.out_proj.bias', 'model.decoder.layers.9.encoder_attn.out_proj.bias', 'model.decoder.layers.3.self_attn.k_proj.bias', 'model.encoder.layers.6.self_attn.v_proj.bias', 'model.encoder.layers.5.self_attn.k_proj.bias', 'model.decoder.layers.8.self_attn.v_proj.bias', 'model.encoder.layers.0.self_attn.q_proj.bias', 'model.encoder.layers.2.self_attn.out_proj.bias', 'model.decoder.layers.9.self_attn.q_proj.bias']\n",
218
+ "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
219
+ ]
220
+ }
221
+ ],
222
+ "source": [
223
+ "#load tokenizer\n",
224
+ "tokenizer = PegasusTokenizer.from_pretrained(\"pszemraj/pegasus-x-large-book-summary\")\n",
225
+ "#load model\n",
226
+ "model = PegasusForConditionalGeneration.from_pretrained(\"pszemraj/pegasus-x-large-book-summary\")"
227
+ ]
228
+ },
229
+ {
230
+ "cell_type": "code",
231
+ "execution_count": 108,
232
+ "metadata": {},
233
+ "outputs": [],
234
+ "source": [
235
+ "#function to ensure summaries end with punctuation\n",
236
+ "def cut(sum):\n",
237
+ " last_punc_idx = max(sum.rfind(p) for p in string.punctuation)\n",
238
+ " output = sum[:last_punc_idx + 1]\n",
239
+ " return output\n",
240
+ "\n",
241
+ "\n",
242
+ "#function to summarize\n",
243
+ "\n",
244
+ "def summarize(des_list):\n",
245
+ " if \"No description available.\" in des_list:\n",
246
+ " idx = des_list.index(\"No description available.\")\n",
247
+ " des = des_list.copy()\n",
248
+ " des.pop(idx)\n",
249
+ " rest = summarize(des)\n",
250
+ " rest.insert(idx,'No description available.')\n",
251
+ " return rest\n",
252
+ " else: \n",
253
+ " # Tokenize all the descriptions in the list\n",
254
+ " encoded_inputs = tokenizer(des_list, truncation=True, padding=\"longest\", return_tensors=\"pt\")\n",
255
+ "\n",
256
+ " # Generate summaries for all the inputs\n",
257
+ " summaries = model.generate(**encoded_inputs, max_new_tokens=100)\n",
258
+ "\n",
259
+ " # Decode the summaries and process them\n",
260
+ " outputs = tokenizer.batch_decode(summaries, skip_special_tokens=True)\n",
261
+ " outputs = list(map(cut, outputs))\n",
262
+ " return outputs\n"
263
+ ]
264
+ }
265
+ ],
266
+ "metadata": {
267
+ "kernelspec": {
268
+ "display_name": "base",
269
+ "language": "python",
270
+ "name": "python3"
271
+ },
272
+ "language_info": {
273
+ "codemirror_mode": {
274
+ "name": "ipython",
275
+ "version": 3
276
+ },
277
+ "file_extension": ".py",
278
+ "mimetype": "text/x-python",
279
+ "name": "python",
280
+ "nbconvert_exporter": "python",
281
+ "pygments_lexer": "ipython3",
282
+ "version": "3.10.10"
283
+ },
284
+ "orig_nbformat": 4
285
+ },
286
+ "nbformat": 4,
287
+ "nbformat_minor": 2
288
+ }
app.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from fastai.collab import *
3
+ import torch
4
+ from torch import nn
5
+ import pickle
6
+ import pandas as pd
7
+ from transformers import PegasusForConditionalGeneration, PegasusTokenizer
8
+ import sentencepiece
9
+ import string
10
+ import requests
11
+
12
+ @st.cache_resource
13
+ def load_stuff():
14
+ # Load the data loader
15
+ dls= pd.read_pickle('dataloader.pkl')
16
+ # Create an instance of the model
17
+ learn = collab_learner(dls, use_nn=True,layers=[20,10],y_range=(0,10.5))
18
+ # Load the saved state dictionary
19
+ state_dict = torch.load('myModel.pth',map_location=torch.device('cpu'))
20
+ # Assign the loaded state dictionary to the model's load_state_dict() method
21
+ learn.model.load_state_dict(state_dict)
22
+ #load books dataframe
23
+ books = pd.read_csv('./data/BX_Books.csv', sep=';',encoding='latin-1')
24
+ #load tokenizer
25
+ tokenizer = PegasusTokenizer.from_pretrained("pszemraj/pegasus-x-large-book-summary")
26
+ #load model
27
+ model = PegasusForConditionalGeneration.from_pretrained("pszemraj/pegasus-x-large-book-summary")
28
+ return dls, learn, books, tokenizer, model
29
+
30
+ dls, learn, books, tokenizer, model = load_stuff()
31
+
32
+ #function to get recommendations
33
+ def get_3_recs(book):
34
+ book_factors = learn.model.embeds[1].weight
35
+ idx = dls.classes['title'].o2i[book]
36
+ distances = nn.CosineSimilarity(dim=1)(book_factors, book_factors[idx][None])
37
+ idxs = distances.argsort(descending=True)[1:4]
38
+ recs = [dls.classes['title'][i] for i in idxs]
39
+ return recs
40
+
41
+ #function to get descriptions from Google Books
42
+ def search_book_description(title):
43
+ # Google Books API endpoint for book search
44
+ url = "https://www.googleapis.com/books/v1/volumes"
45
+ # Parameters for the book search
46
+ params = {
47
+ "q": title,
48
+ "maxResults": 1
49
+ }
50
+ # Send GET request to Google Books API
51
+ response = requests.get(url, params=params)
52
+ # Check if the request was successful
53
+ if response.status_code == 200:
54
+ # Parse the JSON response to extract the book description
55
+ data = response.json()
56
+
57
+ if "items" in data and len(data["items"]) > 0:
58
+ book_description = data["items"][0]["volumeInfo"].get("description", "No description available.")
59
+ return book_description
60
+ else:
61
+ print("No book found with the given title.")
62
+ return None
63
+ else:
64
+ # If the request failed, print the error message
65
+ print("Error:", response.status_code, response.text)
66
+ return None
67
+
68
+ #function to ensure summaries end with punctuation
69
+ def cut(sum):
70
+ last_punc_idx = max(sum.rfind(p) for p in string.punctuation)
71
+ output = sum[:last_punc_idx + 1]
72
+ return output
73
+
74
+
75
+ #function to summarize
76
+ def summarize(des_list):
77
+ if "No description available." in des_list:
78
+ idx = des_list.index("No description available.")
79
+ des = des_list.copy()
80
+ des.pop(idx)
81
+ rest = summarize(des)
82
+ rest.insert(idx,'No description available.')
83
+ return rest
84
+ else:
85
+ # Tokenize all the descriptions in the list
86
+ encoded_inputs = tokenizer(des_list, truncation=True, padding="longest", return_tensors="pt")
87
+
88
+ # Generate summaries for all the inputs
89
+ summaries = model.generate(**encoded_inputs, max_new_tokens=100)
90
+
91
+ # Decode the summaries and process them
92
+ outputs = tokenizer.batch_decode(summaries, skip_special_tokens=True)
93
+ outputs = list(map(cut, outputs))
94
+ return outputs
95
+
96
+ #function to get cover images
97
+ def get_covers(recs):
98
+ imgs = [books[books['Book-Title']==r]['Image-URL-L'].tolist()[0]for r in recs]
99
+ return imgs
100
+
101
+ #streamlit app construction
102
+ st.title('Your digital librarian')
103
+ st.markdown("Hi there! I recommend you books based on one you love (which might not be in the same genre because that's boring) and give you my own synopsis of each book. Enjoy!")
104
+ options = books["Book-Title"].tolist()
105
+ input = st.selectbox('Select your favorite book', options)
106
+ if st.button("Get recommendations"):
107
+ recs = get_3_recs(input)
108
+ descriptions = list(map(search_book_description,recs))
109
+ des_sums = summarize(descriptions)
110
+ imgs = get_covers(recs)
111
+
112
+ col1, col2, col3 = st.columns(3)
113
+ col1.image(imgs[0])
114
+ col1.markdown(f"**{recs[0]}**")
115
+ col1.write(des_sums[0])
116
+
117
+ col2.image(imgs[1])
118
+ col2.markdown(f"**{recs[1]}**")
119
+ col2.write(des_sums[1])
120
+
121
+ col3.image(imgs[2])
122
+ col3.markdown(f"**{recs[2]}**")
123
+ col3.write(des_sums[2])
124
+
125
+
126
+
data/BX_Books.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce0eefc5821f75b3d69bc0f871182192e10572bc69c61c692418ba2e0137e80c
3
+ size 77430569
dataloader.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e951d962389857249f08bd895e969d7977eff80eb04ef8a0c4aeb60d6298986
3
+ size 48720654
myModel.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:488fc67cb404b3ccb15800140fa34d1a8025352062a6afec175b231162bb6187
3
+ size 773923366
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ fastai
2
+ pickle
3
+ pandas
4
+ transformers
5
+ sentencepiece
6
+ torch
7
+ requests
8
+ string