Spaces:
Runtime error
Runtime error
File size: 2,172 Bytes
04242a9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
# def get_lyrics_url_from_website():
# # https://www.disneyclips.com/lyrics/
import aiohttp
import asyncio
from bs4 import BeautifulSoup
from typing import List, TypedDict, Tuple, Optional
class Lyric(TypedDict):
name: str
text: str
class Movie(TypedDict):
title: str
lyrics: List[Lyric]
URL = "https://www.disneyclips.com/lyrics/"
async def get_lyrics_urls_from_movie_url(url: str, session: aiohttp.ClientSession) -> Optional[Tuple[str, str]]:
async with session.get(url) as response:
html = await response.text()
soup = BeautifulSoup(html, 'html.parser')
table = soup.find('table', {'class': 'songs'})
names_and_urls = None
if table:
links = table.find_all('a')
names_and_urls = []
for link in links:
names_and_urls.append((link.text, f"{URL}/{link.get('href')}"))
return names_and_urls
async def get_lyric_from_lyric_url(url: str, name: str, session: aiohttp.ClientSession) -> Lyric:
async with session.get(url) as response:
html = await response.text()
soup = BeautifulSoup(html, 'html.parser')
div = soup.find('div', {'id': 'cnt'}).find('div', {'class': 'main'})
paragraphs = div.find_all('p')
text = ""
for p in paragraphs:
text += p.text
return text
async def get_movie_names_and_urls(session: aiohttp.ClientSession) -> List[Tuple[str, str]]:
async with session.get(URL) as response:
html = await response.text()
soup = BeautifulSoup(html, 'html.parser')
links = soup.find('div', {'id': 'cnt'}).find('div', {'class': 'main'}).find_all('a')
movie_names_and_urls = [(link.text, f"{URL}/{link.get('href')}") for link in links]
return movie_names_and_urls
async def main():
async with aiohttp.ClientSession() as session:
names_and_urls = await get_movie_names_and_urls(session)
data = await asyncio.gather(*[asyncio.create_task(get_lyrics_urls_from_movie_url(names, url, session)) for (names, url) in names_and_urls])
loop = asyncio.get_event_loop()
loop.run_until_complete(main()) |