Spaces:

neuronslabs
/

GNN_GradioDemo

Running

App Files Files Community

AJ-Gazin commited on May 21, 2024

Commit

5cc7af1

1 Parent(s): f3cdf0c

first commit

Browse files

Files changed (17) hide show

.gitattributes +1 -0
.gitignore +8 -0
README.md +0 -12
app.py +133 -0
data/amazon_reviews.csv +3 -0
data/loss_data.csv +3 -0
data/organized_reviews.csv +3 -0
data/product_data_PYG.pt +3 -0
data/product_embeddings.pt +3 -0
data/rev_user_mapping.json +0 -0
data/reviews_sample.csv +3 -0
data/sample_metadata.csv +3 -0
data/user_embeddings.pt +3 -0
data/user_mapping.json +0 -0
models/amazon_best_model.pt +3 -0
recommender.py +104 -0
requirements.txt +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.csv filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,8 @@

+etc/
+include/
+lib/
+library/
+Scripts/
+share/
+__pycache__/
+test.ipynb

README.md CHANGED Viewed

@@ -1,12 +0,0 @@
----
-title: GNN GradioDemo
-emoji: 🚀
-colorFrom: green
-colorTo: blue
-sdk: gradio
-sdk_version: 4.31.4
-app_file: app.py
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,133 @@

+import gradio as gr
+import plotly.express as px
+import pandas as pd
+import torch
+from sklearn.decomposition import PCA
+from sklearn.manifold import TSNE
+import umap
+from recommender import get_recommendations
+# Loading pre-trained product embeddings for visualization
+product_embeddings_path = 'data/product_embeddings.pt'
+product_emb = torch.load(product_embeddings_path, map_location=torch.device('cpu'))
+# Loading pre-trained user embeddings for visualization
+user_embeddings_path = 'data/user_embeddings.pt'
+user_emb = torch.load(user_embeddings_path, map_location=torch.device('cpu'))
+# Loading the reviews dataframe for visualization purposes
+reviews_df = pd.read_csv('data/organized_reviews.csv')
+# Loading the training and validation loss data
+loss_data_path = 'data/loss_data.csv'
+loss_df = pd.read_csv(loss_data_path)
+loss_df.columns = ['Epoch', 'Training Loss', 'Validation Loss']
+# Creating a user dataframe by extracting unique user IDs and usernames
+user_df = reviews_df[['user_id', 'username']].drop_duplicates()
+# Function to perform dimensionality reduction on embeddings
+# This function reduces the high-dimensional embeddings to a lower-dimensional space for visualization
+def reduce_dimensions(embeddings, method, n_components=3):
+    # Selecting the appropriate dimensionality reduction technique based on the specified method
+    if method == "PCA":
+        reducer = PCA(n_components=n_components)
+    else:
+        # Performing initial PCA to reduce dimensionality before applying t-SNE or UMAP
+        pca = PCA(n_components=50)
+        reduced_embeddings = pca.fit_transform(embeddings)
+        reducer = TSNE(n_components=n_components) if method == "TSNE" else umap.UMAP(n_components=n_components)
+        embeddings = reduced_embeddings
+    # Applying the selected dimensionality reduction technique to the embeddings
+    reduced_embeddings = reducer.fit_transform(embeddings)
+    # Assigning appropriate column names based on the dimensionality reduction method
+    columns = ['PC1', 'PC2', 'PC3'] if method == "PCA" else ['TSNE1', 'TSNE2', 'TSNE3'] if method == "TSNE" else ['UMAP1', 'UMAP2', 'UMAP3']
+    return reduced_embeddings, columns
+# Function to visualize embeddings using interactive 3D scatter plots
+# This function creates an interactive plot to explore the embeddings in a three-dimensional space
+def visualize_embeddings(embeddings, df, method, is_product=True):
+    reduced_embeddings, columns = reduce_dimensions(embeddings, method)
+    df_reduced = pd.DataFrame(reduced_embeddings, columns=columns)
+    if is_product:
+        # Adding product-related information to the dataframe for hover interactions
+        df_reduced['product_id'] = df['product_id']
+        df_reduced['category'] = df['category']
+        fig = px.scatter_3d(df_reduced, x=columns[0], y=columns[1], z=columns[2], color='category', hover_data=['product_id'], opacity=0.9)
+    else:
+        # Adding user-related information to the dataframe for hover interactions
+        df_reduced['user_id'] = df['user_id']
+        df_reduced['username'] = df['username']
+        fig = px.scatter_3d(df_reduced, x=columns[0], y=columns[1], z=columns[2], hover_data=['user_id', 'username'], opacity=0.9)
+    return fig
+# Function to visualize product embeddings
+# This function specifically visualizes the product embeddings using the selected dimensionality reduction method
+def visualize_product_embeddings(method):
+    return visualize_embeddings(product_emb.cpu().numpy(), reviews_df, method)
+# Function to visualize user embeddings
+# This function specifically visualizes the user embeddings using the selected dimensionality reduction method
+def visualize_user_embeddings(method):
+    return visualize_embeddings(user_emb.cpu().numpy(), user_df, method, is_product=False)
+# Function to visualize training and validation loss
+# This function creates a line plot to visualize the model's training and validation loss over epochs
+def visualize_loss():
+    fig = px.line(loss_df, x='Epoch', y=['Training Loss', 'Validation Loss'], labels={
+        'Epoch': 'Epoch',
+        'value': 'Loss',
+        'variable': 'Loss Type'
+    })
+    fig.update_layout(title='Training and Validation Loss', legend_title='Loss Type')
+    return fig
+# Function to generate product recommendations for a given username
+# This function retrieves the user ID based on the provided username and generates personalized product recommendations
+def recommend(username, method):
+    user_id = user_df[user_df['username'] == username]['user_id'].values[0]
+    recommendations_title, recommendations = get_recommendations(user_id)
+    recommendations_list = [[rec[0], rec[1], rec[2]] for rec in recommendations]
+    return recommendations_title, recommendations_list
+# Sampling a subset of usernames for the dropdown menu
+sample_usernames = user_df['username'].sample(5, random_state=42).tolist()
+# Creating the Gradio interface for the recommendation system
+with gr.Blocks() as demo:
+    gr.Markdown("# Amazon Product Recommendation System")
+    with gr.Column():
+        username_input = gr.Dropdown(label="Select Username", choices=sample_usernames, value=sample_usernames[0])
+        recommendations_output = gr.Textbox(label="Recommendations")
+        recommendations_list = gr.Dataframe(headers=["Product ID", "Category", "Subcategory"])
+        recommend_button = gr.Button("Get Recommendations")
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown("### Product Embeddings Visualization")
+            method_input_product = gr.Dropdown(label="Visualization Method", choices=["PCA", "TSNE", "UMAP"], value="PCA")
+            embeddings_plot_product = gr.Plot(value=visualize_product_embeddings("PCA"))
+        with gr.Column():
+            gr.Markdown("### User Embeddings Visualization")
+            method_input_user = gr.Dropdown(label="Visualization Method", choices=["PCA", "TSNE", "UMAP"], value="PCA")
+            embeddings_plot_user = gr.Plot(value=visualize_user_embeddings("PCA"))
+    gr.Markdown("### Training and Validation Loss")
+    loss_plot = gr.Plot(value=visualize_loss())
+    # Event triggers and their corresponding actions
+    recommend_button.click(recommend, inputs=[username_input], outputs=[recommendations_output, recommendations_list])
+    method_input_product.change(visualize_product_embeddings, inputs=[method_input_product], outputs=[embeddings_plot_product])
+    method_input_user.change(visualize_user_embeddings, inputs=[method_input_user], outputs=[embeddings_plot_user])
+# Running the Gradio interface
+if __name__ == "__main__":
+    demo.launch()

data/amazon_reviews.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a88348c20ed3f85d647e8fbaac0a730ab2f09f95e5d1f4bcf1f9e3650ef624d7
+size 300904694

data/loss_data.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bd25a48b83864f4fc1248ff466dcbd5b38dddcd35cab44f883c0b8a8be8dff27
+size 1179

data/organized_reviews.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9617a398f69a0fac22cd43a2588dda166bd089882d9c356e6e67883654a70066
+size 9020826

data/product_data_PYG.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9c3ea2997f012564fcb92760bb74842ae906cfcfe9f651834f2db27c445f7354
+size 10706333

data/product_embeddings.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d69dd6db7b9190fd247673da1130ee73037ae66b789a5640ae5fa3c0569c5708
+size 1484243

data/rev_user_mapping.json ADDED Viewed

The diff for this file is too large to render. See raw diff

data/reviews_sample.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:21822c89e46dc619af4220e930f501a9220d8a2b29f26e19ebe21e158b6d0dfd
+size 9135407

data/sample_metadata.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:21822c89e46dc619af4220e930f501a9220d8a2b29f26e19ebe21e158b6d0dfd
+size 9135407

data/user_embeddings.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5e69bd6fa86cdf10af5ff335a6a42a7d948ad26dbe24a71bc646fc31ffbb2419
+size 15396548

data/user_mapping.json ADDED Viewed

The diff for this file is too large to render. See raw diff

models/amazon_best_model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f97e25eb3065749e9f14c7ed161561bc64d86f26fd9f9e7078a499b274373114
+size 231662

recommender.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import torch
+import pandas as pd
+from torch_geometric.data import HeteroData
+from torch_geometric.nn import SAGEConv, to_hetero
+from torch.nn import Linear
+# Load the trained model
+class GNNEncoder(torch.nn.Module):
+    def __init__(self, hidden_channels, out_channels):
+        super().__init__()
+        self.conv1 = SAGEConv((-1, -1), hidden_channels)
+        self.conv2 = SAGEConv((-1, -1), out_channels)
+    def forward(self, x, edge_index):
+        x = self.conv1(x, edge_index).relu()
+        x = self.conv2(x, edge_index)
+        return x
+class EdgeDecoder(torch.nn.Module):
+    def __init__(self, hidden_channels):
+        super().__init__()
+        self.lin1 = Linear(2 * hidden_channels, hidden_channels)
+        self.lin2 = Linear(hidden_channels, 1)
+    def forward(self, z_dict, edge_label_index):
+        row, col = edge_label_index
+        z = torch.cat([z_dict['user'][row], z_dict['products'][col]], dim=-1)
+        z = self.lin1(z).relu()
+        z = self.lin2(z)
+        return z.view(-1)
+class Model(torch.nn.Module):
+    def __init__(self, hidden_channels):
+        super().__init__()
+        self.encoder = GNNEncoder(hidden_channels, hidden_channels)
+        self.encoder = to_hetero(self.encoder, data.metadata(), aggr='sum')
+        self.decoder = EdgeDecoder(hidden_channels)
+    def forward(self, x_dict, edge_index_dict, edge_label_index):
+        z_dict = self.encoder(x_dict, edge_index_dict)
+        return self.decoder(z_dict, edge_label_index)
+# Load data and model
+data_path = 'data/product_data_PYG.pt'
+model_path = 'models/amazon_best_model.pt'
+reviews_path = 'data/organized_reviews.csv'
+user_mapping_path = 'data/user_mapping.json'
+rev_user_mapping_path = 'data/rev_user_mapping.json'
+print("Loading data...")
+data = torch.load(data_path, map_location=torch.device('cpu'))
+device = 'cpu'
+data = data.to(device)
+print("Loading model...")
+model = Model(hidden_channels=32).to(device)
+model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
+print("Loading reviews dataframe...")
+reviews_df = pd.read_csv(reviews_path)
+print("Loading user mappings...")
+user_mapping = pd.read_json(user_mapping_path, typ='series').to_dict()
+rev_user_mapping = pd.read_json(rev_user_mapping_path, typ='series').to_dict()
+# Function to get the username from user_id
+def get_username(user_id):
+    if user_id not in reviews_df['user_id'].values:
+        raise ValueError(f"User ID {user_id} not found in reviews_df")
+    return reviews_df[reviews_df['user_id'] == user_id]['username'].iloc[0]
+# Function to get product recommendations
+def get_product_recommendations(model, data, user_id, total_products):
+    user_idx = user_mapping[user_id]  # Get the embedding index for the user_id
+    user_row = torch.tensor([user_idx] * total_products).to(device)
+    all_product_ids = torch.arange(total_products).to(device)
+    edge_label_index = torch.stack([user_row, all_product_ids], dim=0)
+    pred = model(data.x_dict, data.edge_index_dict, edge_label_index).cpu()
+    top_five_indices = pred.topk(5).indices.numpy()  # Ensure indices are integers for indexing
+    recommendations = []
+    for idx in top_five_indices:
+        idx = int(idx)  # Convert to integer for indexing
+        product_id = reviews_df.iloc[idx]['product_id']
+        category = reviews_df.iloc[idx]['category']
+        subcategory = reviews_df.iloc[idx]['subcategory']
+        recommendations.append((product_id, category, subcategory))
+    return recommendations
+# Function to get and print recommendations for a given user
+def get_recommendations(user_id):
+    try:
+        user_id = str(user_id)
+        username = get_username(user_id)
+        recommendations = get_product_recommendations(model, data, user_id, data['products'].x.shape[0])
+        return f"Recommendations for {username} (User ID: {user_id}):", recommendations
+    except Exception as e:
+        return f"Error: {str(e)}", []
+if __name__ == "__main__":
+    # For testing the recommendation functionality
+    user_id = 'A314APAWYQFKBJ'  # Example user ID
+    recommendations_title, recommendations = get_recommendations(user_id)
+    print(recommendations_title)
+    print(recommendations)

requirements.txt ADDED Viewed

Binary file (5.09 kB). View file