Spaces:

Harsh502s
/

Anime-Recommender

Sleeping

App Files Files Community

Harsh502s commited on Oct 17, 2023

Commit

849f315

1 Parent(s): 7fbe44b

Commit

Browse files

Files changed (5) hide show

Pages/Recommender App.py +14 -15
anime_rec.csv +0 -0
anime_data_cleaned.csv → rec_data.csv +0 -0
scrape_anime.py +150 -0
similarity_matrix.pkl +0 -3

Pages/Recommender App.py CHANGED Viewed

@@ -7,11 +7,10 @@ import pickle
 @st.cache_data
 def load_data():
     try:
-        anime_data = pd.read_csv(r"anime_rec.csv")
-        anime_posters = pd.read_csv(r"anime_data_cleaned.csv")
     except:
         st.error("Dataset Not Found")
-    return anime_data, anime_posters
 # Uncomment this if you want to load the model
@@ -25,17 +24,17 @@ def load_data():
 # similarity = load_model()
-anime_data, anime_posters = load_data()
 # Fetching the poster and url of the anime
 def fetch_anime_url(anime_id):
-    url = anime_posters[anime_posters["anime_id"] == anime_id].urls.values[0]
     return url
 def fetch_poster(anime_id):
-    poster = anime_posters[anime_posters["anime_id"] == anime_id].poster.values[0]
     return poster
@@ -83,33 +82,33 @@ def recommender_page():
     if st.button("Recommendation"):
         if anime_select == "Top 8 Animes":
-            top8 = anime_posters.sort_values("score", ascending=False).head(8)
             col1, col2, col3, col4 = st.columns(4)
             with col1:
-                st.write(f"[{top8.iloc[0].title}]({top8.iloc[0].urls})")
                 st.image(top8.iloc[0].poster)
             with col2:
-                st.write(f"[{top8.iloc[1].title}]({top8.iloc[1].urls})")
                 st.image(top8.iloc[1].poster)
             with col3:
-                st.write(f"[{top8.iloc[2].title}]({top8.iloc[2].urls})")
                 st.image(top8.iloc[2].poster)
             with col4:
-                st.write(f"[{top8.iloc[3].title}]({top8.iloc[3].urls})")
                 st.image(top8.iloc[3].poster)
             col5, col6, col7, col8 = st.columns(4)
             with col5:
-                st.write(f"[{top8.iloc[4].title}]({top8.iloc[4].urls})")
                 st.image(top8.iloc[4].poster)
             with col6:
-                st.write(f"[{top8.iloc[5].title}]({top8.iloc[5].urls})")
                 st.image(top8.iloc[5].poster)
             with col7:
-                st.write(f"[{top8.iloc[6].title}]({top8.iloc[6].urls})")
                 st.image(top8.iloc[6].poster)
             with col8:
-                st.write(f"[{top8.iloc[7].title}]({top8.iloc[7].urls})")
                 st.image(top8.iloc[7].poster)
         else:
             (

 @st.cache_data
 def load_data():
     try:
+        anime_data = pd.read_csv(r"rec_data.csv")
     except:
         st.error("Dataset Not Found")
+    return anime_data
 # Uncomment this if you want to load the model
 # similarity = load_model()
+anime_data = load_data()
 # Fetching the poster and url of the anime
 def fetch_anime_url(anime_id):
+    url = anime_data[anime_data["anime_id"] == anime_id].urls.values[0]
     return url
 def fetch_poster(anime_id):
+    poster = anime_data[anime_data["anime_id"] == anime_id].poster.values[0]
     return poster
     if st.button("Recommendation"):
         if anime_select == "Top 8 Animes":
+            top8 = anime_data.sort_values("score", ascending=False).head(8)
             col1, col2, col3, col4 = st.columns(4)
             with col1:
+                st.write(f"[{top8.iloc[0].title}]({top8.iloc[0].anime_url})")
                 st.image(top8.iloc[0].poster)
             with col2:
+                st.write(f"[{top8.iloc[1].title}]({top8.iloc[1].anime_url})")
                 st.image(top8.iloc[1].poster)
             with col3:
+                st.write(f"[{top8.iloc[2].title}]({top8.iloc[2].anime_url})")
                 st.image(top8.iloc[2].poster)
             with col4:
+                st.write(f"[{top8.iloc[3].title}]({top8.iloc[3].anime_url})")
                 st.image(top8.iloc[3].poster)
             col5, col6, col7, col8 = st.columns(4)
             with col5:
+                st.write(f"[{top8.iloc[4].title}]({top8.iloc[4].anime_url})")
                 st.image(top8.iloc[4].poster)
             with col6:
+                st.write(f"[{top8.iloc[5].title}]({top8.iloc[5].anime_url})")
                 st.image(top8.iloc[5].poster)
             with col7:
+                st.write(f"[{top8.iloc[6].title}]({top8.iloc[6].anime_url})")
                 st.image(top8.iloc[6].poster)
             with col8:
+                st.write(f"[{top8.iloc[7].title}]({top8.iloc[7].anime_url})")
                 st.image(top8.iloc[7].poster)
         else:
             (

anime_rec.csv DELETED Viewed

The diff for this file is too large to render. See raw diff

anime_data_cleaned.csv → rec_data.csv RENAMED Viewed

The diff for this file is too large to render. See raw diff

scrape_anime.py ADDED Viewed

	@@ -0,0 +1,150 @@

+import requests
+from bs4 import BeautifulSoup
+import pandas as pd
+from concurrent.futures import ThreadPoolExecutor
+import requests
+from tqdm import tqdm
+import warnings as w
+w.filterwarnings("ignore")
+no_of_page = int(
+    BeautifulSoup(requests.get("https://aniwatch.to/az-list").content, "lxml")
+    .find("nav", attrs={"aria-label": "Page navigation"})
+    .find_all("li")[-1]
+    .find("a")["href"]
+    .split("=")[1]
+)
+landing_page_url = "https://aniwatch.to/az-list"
+page_urls = [
+    f"{landing_page_url}/?page={i}" if i != 1 else landing_page_url
+    for i in range(1, no_of_page + 1)
+]
+# Scraping the data from all the pages
+anime_urls = []
+for url in tqdm(page_urls):
+    page = requests.get(url)
+    soup = BeautifulSoup(page.content, "html.parser")
+    # Getting the url for the anime page
+    for anime in soup.find_all("div", class_="film-poster"):
+        anime = anime.find("a")["href"]
+        page = "https://aniwatch.to" + anime
+        anime_urls.append(page)
+        pass
+    pass
+anime_url = pd.DataFrame(anime_urls, columns=["anime_url"])
+anime_url.to_csv("anime_url.csv", index=False)
+# def process_url(url):
+#     soup = BeautifulSoup(requests.get(url).content, "html.parser")
+#     anime_poster = soup.find("div", class_="film-poster").find("img")["src"]
+#     # Getting the name of the anime
+#     anime_title = soup.find("h2", class_="film-name dynamic-name").text
+#     # Getting the overview of the anime
+#     anime_overview = anime_overview = (
+#         soup.find("div", class_="item item-title w-hide")
+#         .find("div", class_="text")
+#         .text
+#     )
+#     # Creating an object of the div containing all the details of the anime
+#     soup = soup.find("div", class_="anisc-info")
+#     # Extract MAL Score
+#     mal_score_element = soup.find("span", {"class": "item-head"}, text="MAL Score:")
+#     anime_mal_score = (
+#         mal_score_element.find_next_sibling("span", {"class": "name"}).text.strip()
+#         if mal_score_element
+#         else "NA"
+#     )
+#     # Extract Studios
+#     studios_element = soup.find("span", {"class": "item-head"}, text="Studios:")
+#     anime_studio = (
+#         studios_element.find_next("a", {"class": "name"}).text.strip()
+#         if studios_element
+#         else "NA"
+#     )
+#     # Extract Producers
+#     producers_element = soup.find("span", {"class": "item-head"}, text="Producers:")
+#     anime_producer = (
+#         [
+#             producer.text.strip()
+#             for producer in producers_element.find_next_siblings("a")
+#         ]
+#         if producers_element
+#         else ["NA"]
+#     )
+#     # Extract Genres
+#     genres_element = soup.find("span", {"class": "item-head"}, text="Genres:")
+#     anime_genres = (
+#         [genre.text.strip() for genre in genres_element.find_next_siblings("a")]
+#         if genres_element
+#         else ["NA"]
+#     )
+#     return (
+#         anime_poster,
+#         anime_title,
+#         anime_overview,
+#         anime_mal_score,
+#         anime_studio,
+#         anime_producer,
+#         anime_genres,
+#     )
+# def create_df_parallel(anime_urls, num_threads=4):
+#     anime_poster_list = []
+#     anime_title_list = []
+#     anime_overview_list = []
+#     anime_mal_score_list = []
+#     anime_studio_list = []
+#     anime_producer_list = []
+#     anime_genres_list = []
+#     with ThreadPoolExecutor(max_workers=num_threads) as executor:
+#         results = executor.map(process_url, anime_urls)
+#         for result in results:
+#             anime_poster_list.append(result[0])
+#             anime_title_list.append(result[1])
+#             anime_overview_list.append(result[2])
+#             anime_mal_score_list.append(result[3])
+#             anime_studio_list.append(result[4])
+#             anime_producer_list.append(result[5])
+#             anime_genres_list.append(result[6])
+#     anime_dict = {
+#         "anime_poster": anime_poster_list,
+#         "anime_title": anime_title_list,
+#         "anime_overview": anime_overview_list,
+#         "anime_mal_score": anime_mal_score_list,
+#         "anime_studio": anime_studio_list,
+#         "anime_producer": anime_producer_list,
+#         "anime_genres": anime_genres_list,
+#     }
+#     anime_df = pd.DataFrame(anime_dict)
+#     return anime_df
+# anime_df = create_df_parallel(anime_urls)
+# anime_df.head()
+# anime_df.to_csv("anime_data.csv", index=False)

similarity_matrix.pkl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:6467e01f6ad99f76155ccae156d96e285d22408947461b215eb0772730298888
-size 248912835