Harsh502s commited on
Commit
f3c4c94
·
2 Parent(s): 849f315 7e008b9
Files changed (7) hide show
  1. Pages/About.py +0 -28
  2. Pages/Recommender App.py +0 -149
  3. animes.jpg +0 -0
  4. app.py +0 -72
  5. ninja.png +0 -0
  6. rec_data.csv +0 -0
  7. scrape_anime.py +0 -150
Pages/About.py DELETED
@@ -1,28 +0,0 @@
1
- import streamlit as st
2
-
3
-
4
- # About page
5
- def about_page():
6
- style_for_page = """
7
- <style>
8
- div.css-nahz7x.e16nr0p34>p {
9
- font-family: Poppins, sans-serif;
10
- font-size: 1.07rem;
11
- }
12
- </style>
13
- """
14
- st.markdown(style_for_page, unsafe_allow_html=True)
15
- st.title("About")
16
- st.divider()
17
- st.subheader(
18
- "This is a content based recommender system that recommends animes similar to the animes you like."
19
- )
20
- st.write("\n")
21
- st.write("\n")
22
- st.write(
23
- "This Anime Recommender App is made by [Harshit Singh](https://Harsh502s.github.io/). :ninja:"
24
- )
25
-
26
-
27
- if __name__ == "__main__":
28
- about_page()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Pages/Recommender App.py DELETED
@@ -1,149 +0,0 @@
1
- import streamlit as st
2
- import pandas as pd
3
- import pickle
4
-
5
-
6
- # Importing the dataset
7
- @st.cache_data
8
- def load_data():
9
- try:
10
- anime_data = pd.read_csv(r"rec_data.csv")
11
- except:
12
- st.error("Dataset Not Found")
13
- return anime_data
14
-
15
-
16
- # Uncomment this if you want to load the model
17
- # @st.cache_resource
18
- # def load_model():
19
- # try:
20
- # similarity = pickle.load(open(r"similarity.pkl", "rb"))
21
- # except:
22
- # st.error("Model Not Found")
23
- # return similarity
24
-
25
-
26
- # similarity = load_model()
27
- anime_data = load_data()
28
-
29
-
30
- # Fetching the poster and url of the anime
31
- def fetch_anime_url(anime_id):
32
- url = anime_data[anime_data["anime_id"] == anime_id].urls.values[0]
33
- return url
34
-
35
-
36
- def fetch_poster(anime_id):
37
- poster = anime_data[anime_data["anime_id"] == anime_id].poster.values[0]
38
- return poster
39
-
40
-
41
- # Recommender System
42
- def recommend(anime):
43
- index = anime_data[anime_data["title"] == anime].index[0]
44
- distances = sorted(
45
- list(enumerate(similarity[index])), reverse=True, key=lambda x: x[1]
46
- )
47
- recommended_anime_names = []
48
- recommended_anime_posters = []
49
- recommended_anime_urls = []
50
-
51
- for i in distances[1:9]:
52
- # fetch the anime poster
53
- anime_id = anime_data.iloc[i[0]].anime_id
54
- recommended_anime_posters.append(fetch_poster(anime_id))
55
- recommended_anime_names.append(anime_data.iloc[i[0]].title)
56
- recommended_anime_urls.append(fetch_anime_url(anime_id))
57
-
58
- return recommended_anime_names, recommended_anime_posters, recommended_anime_urls
59
-
60
-
61
- # Recommender Page
62
- def recommender_page():
63
- style_for_page = """
64
- <style>
65
- div.css-1v0mbdj.etr89bj1>img {
66
- width: 100%;
67
- height: 100%;
68
- overflow: hidden;
69
- box-shadow: 0 0 0 1px rgba(0,0,0,.1);
70
- border-radius: 1rem;
71
- }
72
- </style>
73
- """
74
- st.markdown(style_for_page, unsafe_allow_html=True)
75
-
76
- st.title("Anime Recommendation System")
77
-
78
- anime_list = anime_data["title"].tolist()
79
- anime_list.sort()
80
- anime_list.insert(0, "Top 8 Animes")
81
- anime_select = st.selectbox("Select an Anime", anime_list)
82
-
83
- if st.button("Recommendation"):
84
- if anime_select == "Top 8 Animes":
85
- top8 = anime_data.sort_values("score", ascending=False).head(8)
86
- col1, col2, col3, col4 = st.columns(4)
87
- with col1:
88
- st.write(f"[{top8.iloc[0].title}]({top8.iloc[0].anime_url})")
89
- st.image(top8.iloc[0].poster)
90
- with col2:
91
- st.write(f"[{top8.iloc[1].title}]({top8.iloc[1].anime_url})")
92
- st.image(top8.iloc[1].poster)
93
- with col3:
94
- st.write(f"[{top8.iloc[2].title}]({top8.iloc[2].anime_url})")
95
- st.image(top8.iloc[2].poster)
96
- with col4:
97
- st.write(f"[{top8.iloc[3].title}]({top8.iloc[3].anime_url})")
98
- st.image(top8.iloc[3].poster)
99
-
100
- col5, col6, col7, col8 = st.columns(4)
101
- with col5:
102
- st.write(f"[{top8.iloc[4].title}]({top8.iloc[4].anime_url})")
103
- st.image(top8.iloc[4].poster)
104
- with col6:
105
- st.write(f"[{top8.iloc[5].title}]({top8.iloc[5].anime_url})")
106
- st.image(top8.iloc[5].poster)
107
- with col7:
108
- st.write(f"[{top8.iloc[6].title}]({top8.iloc[6].anime_url})")
109
- st.image(top8.iloc[6].poster)
110
- with col8:
111
- st.write(f"[{top8.iloc[7].title}]({top8.iloc[7].anime_url})")
112
- st.image(top8.iloc[7].poster)
113
- else:
114
- (
115
- recommended_anime_names,
116
- recommended_anime_posters,
117
- recommended_anime_urls,
118
- ) = recommend(anime_select)
119
- col1, col2, col3, col4 = st.columns(4)
120
- with col1:
121
- st.write(f"[{recommended_anime_names[0]}]({recommended_anime_urls[0]})")
122
- st.image(recommended_anime_posters[0])
123
- with col2:
124
- st.write(f"[{recommended_anime_names[1]}]({recommended_anime_urls[1]})")
125
- st.image(recommended_anime_posters[1])
126
- with col3:
127
- st.write(f"[{recommended_anime_names[2]}]({recommended_anime_urls[2]})")
128
- st.image(recommended_anime_posters[2])
129
- with col4:
130
- st.write(f"[{recommended_anime_names[3]}]({recommended_anime_urls[3]})")
131
- st.image(recommended_anime_posters[3])
132
-
133
- col5, col6, col7, col8 = st.columns(4)
134
- with col5:
135
- st.write(f"[{recommended_anime_names[4]}]({recommended_anime_urls[4]})")
136
- st.image(recommended_anime_posters[4])
137
- with col6:
138
- st.write(f"[{recommended_anime_names[5]}]({recommended_anime_urls[5]})")
139
- st.image(recommended_anime_posters[5])
140
- with col7:
141
- st.write(f"[{recommended_anime_names[6]}]({recommended_anime_urls[6]})")
142
- st.image(recommended_anime_posters[6])
143
- with col8:
144
- st.write(f"[{recommended_anime_names[7]}]({recommended_anime_urls[7]})")
145
- st.image(recommended_anime_posters[7])
146
-
147
-
148
- if __name__ == "__main__":
149
- recommender_page()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
animes.jpg DELETED
Binary file (299 kB)
 
app.py DELETED
@@ -1,72 +0,0 @@
1
- import streamlit as st
2
- from st_pages import Page, show_pages
3
- from PIL import Image
4
-
5
- # Configuring Pages
6
-
7
- show_pages(
8
- [
9
- Page(r"app.py", "Homepage", "🏠"),
10
- Page(r"Pages/Recommender App.py", "Anime Recommender", "📺"),
11
- Page(r"Pages/About.py", "About", "👋"),
12
- ]
13
- )
14
-
15
-
16
- # Make the page full width
17
- im = Image.open(r"ninja.png")
18
- st.set_page_config(
19
- page_title="Anime Recommender App",
20
- page_icon=im,
21
- layout="wide",
22
- initial_sidebar_state="expanded",
23
- menu_items={"About": "This Anime Recommender App is made by Harshit Singh."},
24
- )
25
-
26
-
27
- # Home Page
28
- def home_page():
29
- style_for_page = """
30
- <style>
31
- div.css-1v0mbdj.etr89bj1>img {
32
- width: 100%;
33
- height: 100%;
34
- box-shadow: 0 0 0 1px rgba(0,0,0,.1);
35
- border-radius: 5rem;
36
- padding: 4rem;
37
- justify-content: left;}
38
-
39
- div.css-k7vsyb.e16nr0p31>h1 {
40
- font-family: Poppins, sans-serif;
41
- }
42
-
43
- div.css-14xtw13.e8zbici0 {
44
- margin-right: 2rem;
45
- scale: 1.15;
46
- }
47
-
48
- div.css-nahz7x.e16nr0p34>p {
49
- font-family: Poppins, sans-serif;
50
- font-size: 1.05rem;
51
- }
52
- </style>
53
- """
54
- st.markdown(style_for_page, unsafe_allow_html=True)
55
-
56
- st.title("Welcome to Anime Recommender! :ninja:")
57
- st.subheader("Discover Your Next Favorite Anime")
58
-
59
- # Add unique content to the home page
60
- st.write(
61
- "Explore a world of anime and find personalized recommendations based on your anime preferences."
62
- )
63
- img = Image.open(r"animes.jpg")
64
- st.image(img, use_column_width=True, caption="Anime Characters")
65
- st.write(
66
- "Get started by selecting your favorite anime and let the recommendation system do the rest!"
67
- )
68
-
69
-
70
- # Web Application
71
- if __name__ == "__main__":
72
- home_page()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ninja.png DELETED
Binary file (7.57 kB)
 
rec_data.csv DELETED
The diff for this file is too large to render. See raw diff
 
scrape_anime.py DELETED
@@ -1,150 +0,0 @@
1
- import requests
2
- from bs4 import BeautifulSoup
3
- import pandas as pd
4
- from concurrent.futures import ThreadPoolExecutor
5
- import requests
6
- from tqdm import tqdm
7
- import warnings as w
8
-
9
- w.filterwarnings("ignore")
10
-
11
-
12
- no_of_page = int(
13
- BeautifulSoup(requests.get("https://aniwatch.to/az-list").content, "lxml")
14
- .find("nav", attrs={"aria-label": "Page navigation"})
15
- .find_all("li")[-1]
16
- .find("a")["href"]
17
- .split("=")[1]
18
- )
19
- landing_page_url = "https://aniwatch.to/az-list"
20
- page_urls = [
21
- f"{landing_page_url}/?page={i}" if i != 1 else landing_page_url
22
- for i in range(1, no_of_page + 1)
23
- ]
24
-
25
- # Scraping the data from all the pages
26
-
27
- anime_urls = []
28
-
29
- for url in tqdm(page_urls):
30
- page = requests.get(url)
31
- soup = BeautifulSoup(page.content, "html.parser")
32
-
33
- # Getting the url for the anime page
34
-
35
- for anime in soup.find_all("div", class_="film-poster"):
36
- anime = anime.find("a")["href"]
37
- page = "https://aniwatch.to" + anime
38
- anime_urls.append(page)
39
- pass
40
- pass
41
-
42
- anime_url = pd.DataFrame(anime_urls, columns=["anime_url"])
43
- anime_url.to_csv("anime_url.csv", index=False)
44
-
45
-
46
- # def process_url(url):
47
- # soup = BeautifulSoup(requests.get(url).content, "html.parser")
48
-
49
- # anime_poster = soup.find("div", class_="film-poster").find("img")["src"]
50
-
51
- # # Getting the name of the anime
52
-
53
- # anime_title = soup.find("h2", class_="film-name dynamic-name").text
54
-
55
- # # Getting the overview of the anime
56
-
57
- # anime_overview = anime_overview = (
58
- # soup.find("div", class_="item item-title w-hide")
59
- # .find("div", class_="text")
60
- # .text
61
- # )
62
-
63
- # # Creating an object of the div containing all the details of the anime
64
-
65
- # soup = soup.find("div", class_="anisc-info")
66
-
67
- # # Extract MAL Score
68
- # mal_score_element = soup.find("span", {"class": "item-head"}, text="MAL Score:")
69
- # anime_mal_score = (
70
- # mal_score_element.find_next_sibling("span", {"class": "name"}).text.strip()
71
- # if mal_score_element
72
- # else "NA"
73
- # )
74
-
75
- # # Extract Studios
76
- # studios_element = soup.find("span", {"class": "item-head"}, text="Studios:")
77
- # anime_studio = (
78
- # studios_element.find_next("a", {"class": "name"}).text.strip()
79
- # if studios_element
80
- # else "NA"
81
- # )
82
-
83
- # # Extract Producers
84
- # producers_element = soup.find("span", {"class": "item-head"}, text="Producers:")
85
- # anime_producer = (
86
- # [
87
- # producer.text.strip()
88
- # for producer in producers_element.find_next_siblings("a")
89
- # ]
90
- # if producers_element
91
- # else ["NA"]
92
- # )
93
-
94
- # # Extract Genres
95
- # genres_element = soup.find("span", {"class": "item-head"}, text="Genres:")
96
- # anime_genres = (
97
- # [genre.text.strip() for genre in genres_element.find_next_siblings("a")]
98
- # if genres_element
99
- # else ["NA"]
100
- # )
101
-
102
- # return (
103
- # anime_poster,
104
- # anime_title,
105
- # anime_overview,
106
- # anime_mal_score,
107
- # anime_studio,
108
- # anime_producer,
109
- # anime_genres,
110
- # )
111
-
112
-
113
- # def create_df_parallel(anime_urls, num_threads=4):
114
- # anime_poster_list = []
115
- # anime_title_list = []
116
- # anime_overview_list = []
117
- # anime_mal_score_list = []
118
- # anime_studio_list = []
119
- # anime_producer_list = []
120
- # anime_genres_list = []
121
-
122
- # with ThreadPoolExecutor(max_workers=num_threads) as executor:
123
- # results = executor.map(process_url, anime_urls)
124
-
125
- # for result in results:
126
- # anime_poster_list.append(result[0])
127
- # anime_title_list.append(result[1])
128
- # anime_overview_list.append(result[2])
129
- # anime_mal_score_list.append(result[3])
130
- # anime_studio_list.append(result[4])
131
- # anime_producer_list.append(result[5])
132
- # anime_genres_list.append(result[6])
133
-
134
- # anime_dict = {
135
- # "anime_poster": anime_poster_list,
136
- # "anime_title": anime_title_list,
137
- # "anime_overview": anime_overview_list,
138
- # "anime_mal_score": anime_mal_score_list,
139
- # "anime_studio": anime_studio_list,
140
- # "anime_producer": anime_producer_list,
141
- # "anime_genres": anime_genres_list,
142
- # }
143
-
144
- # anime_df = pd.DataFrame(anime_dict)
145
- # return anime_df
146
-
147
-
148
- # anime_df = create_df_parallel(anime_urls)
149
- # anime_df.head()
150
- # anime_df.to_csv("anime_data.csv", index=False)