Spaces:

Wintersmith
/

Book_recommender

Runtime error

App Files Files Community

Wintersmith commited on about 1 month ago

Commit

3b528be

•

1 Parent(s): dd9a11d

Upload 3 files

Browse files

Files changed (3) hide show

app.py +44 -0
recommender_system.py +80 -0
requirements.txt +0 -0

app.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import gradio as gr
+import pandas as pd
+import boto3
+import dotenv
+import os
+from recommender_system import match_books, recommend_books
+dotenv.load_dotenv()
+# Initialize S3 client and load data
+s3 = boto3.client('s3',
+                  aws_access_key_id=os.getenv('AWS_ACCESS_KEY_ID'),
+                  aws_secret_access_key=os.getenv('AWS_SECRET_ACCESS_KEY'))
+bucket_name = 'martinbucket1'
+obj_data = s3.get_object(Bucket=bucket_name, Key="Processed_data.csv")
+dataframe = pd.read_csv(obj_data["Body"], encoding='cp1251', sep=',', low_memory=False)
+def recommend_books_interface(selected_book) -> tuple:
+    matched_title = match_books(selected_book, dataframe)
+    if matched_title:
+        correlations_df = recommend_books(dataframe, matched_title)
+        message = f"Recommending these books based on your interest in: {matched_title}"
+        return correlations_df, message
+    else:
+        return pd.DataFrame({"Error": ["No matching book found"]}), "No books found"
+# Gradio interface
+inputs = gr.Textbox(lines=1, placeholder="Type a book title here...")
+message_output = gr.Markdown()
+outputs = gr.Dataframe()
+demo = gr.Interface(fn=recommend_books_interface, inputs=inputs, outputs=[outputs, message_output],
+                    title="Book Recommender System",
+                    description="Enter a book title to get recommendations based on similarity.",
+                    fill_width=True,
+                    flagging_mode='never',
+                    theme=gr.themes.Soft())
+if __name__ == "__main__":
+    demo.launch(share=True)

recommender_system.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import pandas as pd
+import numpy as np
+from fuzzywuzzy import process
+def match_books(user_input: str, df: pd.DataFrame, min_score: float = 0.8):
+    # Use process.extractOne to get the best match
+    book_titles = df['Book-Title'].unique()
+    best_match = process.extractOne(user_input, book_titles)
+    # Check if the best match score is above the minimum score
+    if best_match and best_match[1] >= min_score:
+        result = best_match[0]
+    else:
+        result = None
+    return result
+def recommend_books(df: pd.DataFrame, book_to_be_recommended: str) -> pd.DataFrame:
+    """
+    The recommend_books_new function identifies users who have read a specified book,
+    finds other books these users have read, computes the correlation between the specified book and these other books,
+    and returns a DataFrame with the recommended books, their correlation scores, and average ratings.
+    """
+    # Get relevant dataset of book's readers
+    book_readers = df['User-ID'][df['Book-Title'] == book_to_be_recommended]
+    book_readers = book_readers.tolist()
+    book_readers = np.unique(book_readers)
+    # Final dataset
+    books_of_book_readers = df[(df['User-ID'].isin(book_readers))]
+    number_of_rating_per_book = books_of_book_readers.groupby(['Book-Title']).agg('count').reset_index()
+    # Iterate over the number_of_user_ratings to get the highest number,
+    # while keeping at least 10 final records
+    threshold = 0
+    while True:
+        books_to_compare = number_of_rating_per_book['Book-Title'][number_of_rating_per_book['User-ID'] >= threshold]
+        books_to_compare = books_to_compare.tolist()
+        print(f"Threshold: {threshold}, Number of books to compare: {len(books_to_compare)}")
+        if len(books_to_compare) <= 11:
+            books_to_compare = number_of_rating_per_book['Book-Title'][number_of_rating_per_book['User-ID'] >= threshold-1]
+            break
+        threshold += 1
+    ratings_data_raw = books_of_book_readers[['User-ID', 'Book-Rating', 'Book-Title']][
+        books_of_book_readers['Book-Title'].isin(books_to_compare)]
+    # group by User and Book and compute mean
+    ratings_data_raw_nodup = ratings_data_raw.groupby(['User-ID', 'Book-Title'])['Book-Rating'].mean()
+    # reset index to see User-ID in every row
+    ratings_data_raw_nodup = ratings_data_raw_nodup.to_frame().reset_index()
+    dataset_for_corr = ratings_data_raw_nodup.pivot(index='User-ID', columns='Book-Title', values='Book-Rating')
+    # Method 1: Using pandas corr() with pairwise complete observations
+    correlations = dataset_for_corr.corrwith(dataset_for_corr[book_to_be_recommended], method='pearson')
+    # Add average ratings for each book in dataset_for_corr
+    average_ratings = ratings_data_raw_nodup.groupby('Book-Title')['Book-Rating'].mean().reset_index()
+    # Create DataFrame with correlations
+    correlations_df = pd.DataFrame({
+        'Book-Title': correlations.index,
+        'Correlation': correlations.values,
+    })
+    # Merge correlations_df with average_ratings
+    correlations_df = pd.merge(correlations_df, average_ratings, on='Book-Title')
+    correlations_df = correlations_df.rename(columns={'Book-Rating': 'Average ratings'})
+    # Sort by correlation value
+    correlations_df = correlations_df.sort_values('Correlation', ascending=False)
+    # Remove the book being recommended from the list
+    correlations_df = correlations_df[correlations_df['Book-Title'] != book_to_be_recommended]
+    correlations_df = correlations_df.head(10)
+    return correlations_df

requirements.txt ADDED Viewed

Binary file (2.33 kB). View file