File size: 2,172 Bytes
04242a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66

# def get_lyrics_url_from_website():
#     # https://www.disneyclips.com/lyrics/

import aiohttp
import asyncio
from bs4 import BeautifulSoup

from typing import List, TypedDict, Tuple, Optional

class Lyric(TypedDict):
    name: str 
    text: str

class Movie(TypedDict):
    title: str 
    lyrics: List[Lyric]


URL = "https://www.disneyclips.com/lyrics/"


async def get_lyrics_urls_from_movie_url(url: str, session: aiohttp.ClientSession) -> Optional[Tuple[str, str]]:
    async with session.get(url) as response:
        html = await response.text()
        soup = BeautifulSoup(html, 'html.parser')
        table = soup.find('table', {'class': 'songs'})
        names_and_urls = None
        if table:
            links = table.find_all('a')
            names_and_urls = []
            for link in links:
                names_and_urls.append((link.text,  f"{URL}/{link.get('href')}"))
        return names_and_urls

async def get_lyric_from_lyric_url(url: str, name: str, session: aiohttp.ClientSession) -> Lyric:
    async with session.get(url) as response:
        html = await response.text()
        soup = BeautifulSoup(html, 'html.parser')
        div = soup.find('div', {'id': 'cnt'}).find('div', {'class': 'main'})
        paragraphs = div.find_all('p')
        text = ""
        for p in paragraphs:
            text += p.text
        return text



async def get_movie_names_and_urls(session: aiohttp.ClientSession) -> List[Tuple[str, str]]:
    async with session.get(URL) as response:
        html = await response.text()
        soup = BeautifulSoup(html, 'html.parser')
        links = soup.find('div', {'id': 'cnt'}).find('div', {'class': 'main'}).find_all('a')
        movie_names_and_urls = [(link.text, f"{URL}/{link.get('href')}") for link in links]
        return movie_names_and_urls
       



async def main():
    async with aiohttp.ClientSession() as session:
        names_and_urls = await get_movie_names_and_urls(session)
        data = await asyncio.gather(*[asyncio.create_task(get_lyrics_urls_from_movie_url(names, url, session)) for (names, url) in names_and_urls])

loop = asyncio.get_event_loop()
loop.run_until_complete(main())