File size: 4,582 Bytes
4c8efc5 7cf9593 dd1e684 4c8efc5 25ed7fb 4c8efc5 03164f7 4c8efc5 03164f7 4c8efc5 5a1f34c 4c8efc5 03164f7 4f0ec5a 5a1f34c 03164f7 4f0ec5a 50861f7 03164f7 2e4da47 fe189c2 de78233 fe189c2 2e4da47 de78233 2e4da47 dd1e684 25ed7fb dd1e684 d67f012 88e5e39 d67f012 d4d5227 ce40dcc d4d5227 d67f012 4c8efc5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
import pandas as pd
from tqdm import tqdm
import numpy as np
import itertools
import requests
import sys
from pyvis.network import Network
import torch
import torch.nn.functional as F
from torch.nn import Linear
from arango import ArangoClient
import torch_geometric.transforms as T
from torch_geometric.nn import SAGEConv, to_hetero
from torch_geometric.transforms import RandomLinkSplit, ToUndirected
from sentence_transformers import SentenceTransformer
from torch_geometric.data import HeteroData
import yaml
import pickle
def net_repr_html(self):
nodes, edges, height, width, options = self.get_network_data()
html = self.template.render(height=height, width=width, nodes=nodes, edges=edges, options=options)
return html
Network._repr_html_ = net_repr_html
#----------------------------------------------
# SAGE model
class GNNEncoder(torch.nn.Module):
def __init__(self, hidden_channels, out_channels):
super().__init__()
# these convolutions have been replicated to match the number of edge types
self.conv1 = SAGEConv((-1, -1), hidden_channels)
self.conv2 = SAGEConv((-1, -1), out_channels)
def forward(self, x, edge_index):
x = self.conv1(x, edge_index).relu()
x = self.conv2(x, edge_index)
return x
class EdgeDecoder(torch.nn.Module):
def __init__(self, hidden_channels):
super().__init__()
self.lin1 = Linear(2 * hidden_channels, hidden_channels)
self.lin2 = Linear(hidden_channels, 1)
def forward(self, z_dict, edge_label_index):
row, col = edge_label_index
# concat user and movie embeddings
z = torch.cat([z_dict['user'][row], z_dict['movie'][col]], dim=-1)
# concatenated embeddings passed to linear layer
z = self.lin1(z).relu()
z = self.lin2(z)
return z.view(-1)
class Model(torch.nn.Module):
def __init__(self, hidden_channels):
super().__init__()
self.encoder = GNNEncoder(hidden_channels, hidden_channels)
self.encoder = to_hetero(self.encoder, data.metadata(), aggr='sum')
self.decoder = EdgeDecoder(hidden_channels)
def forward(self, x_dict, edge_index_dict, edge_label_index):
# z_dict contains dictionary of movie and user embeddings returned from GraphSage
z_dict = self.encoder(x_dict, edge_index_dict)
return self.decoder(z_dict, edge_label_index)
#----------------------------------------------
def load_hetero_data():
with open('Hgraph.pkl', 'rb') as file:
global data
data = pickle.load(file)
return data
def load_model(data):
model = Model(hidden_channels=32)
with torch.no_grad():
model.encoder(data.x_dict, data.edge_index_dict)
model.load_state_dict(torch.load('model.pt',map_location=torch.device('cpu')))
model.eval()
return model
global id_map
with open('id_map.pkl', 'rb') as file:
id_map = pickle.load(file)
global m_id
with open('m_id.pkl', 'rb') as file:
m_id = pickle.load(file)
def get_title(idx):
return id_map.loc[id_map['movieId'] == m_id[idx]].index[0]
def get_recommendation(model,data,user_id):
total_movies = 9025
user_row = torch.tensor([user_id] * total_movies)
all_movie_ids = torch.arange(total_movies)
edge_label_index = torch.stack([user_row, all_movie_ids], dim=0)
pred = model(data.x_dict, data.edge_index_dict,edge_label_index)
pred = pred.clamp(min=0, max=5)
# we will only select movies for the user where the predicting rating is =5
rec_movie_ids = (pred == 5).nonzero(as_tuple=True)
top_ten_recs = [rec_movies for rec_movies in rec_movie_ids[0].tolist()[:10]]
top_ten_recs = [get_title(movie_idx) for movie_idx in top_ten_recs]
return {'user': user_id, 'rec_movies': top_ten_recs}
def make_1_hop_graph(data,user_id):
a = data["user", "rates", "movie"].edge_index
b = data["user", "rates", "movie"].edge_label
idxs = (a[0] == user_id).nonzero(as_tuple=True)[0]
ratings = b[idxs]#.tolist()
movie_idxs = a[1][idxs]#.tolist()
n = len(ratings)
net = Network(notebook=True)
for i in range(n):
#print(i)
Source = user_id
lab = get_title(movie_idxs[i])
Target = movie_idxs[i] + 671 # Addition for sperating movie with user_id
weight = ratings[i].item()
net.add_node(Source, label=str(Source),color='#FF0000')
net.add_node(Target.item(), label=lab)
net.add_edge(Source, Target.item(), title=weight)
net.save_graph('index.html')
|