File size: 3,517 Bytes
3b664fa
 
 
 
 
 
 
b975738
3b664fa
 
 
 
b975738
 
 
 
 
 
 
3b664fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b975738
3b664fa
 
 
b975738
 
 
 
 
 
3b664fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9585fd9
3b664fa
 
 
b76d40c
9585fd9
 
 
 
b76d40c
9585fd9
 
3b664fa
 
9585fd9
b76d40c
9585fd9
 
3b664fa
9585fd9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import os
import re
import shutil
import requests
import gradio as gr
from bs4 import BeautifulSoup
from pydub import AudioSegment
from datetime import datetime, timedelta

TMP_DIR = "./__pycache__"


def get_prev_day(date_str):
    date_format = "%Y/%m/%d"
    date_obj = datetime.strptime(date_str, date_format)
    previous_day = date_obj - timedelta(days=1)
    return previous_day.strftime(date_format)


def remove_end_seconds(input_file: str, output_file: str, seconds: float):
    audio = AudioSegment.from_file(input_file)
    remove_ms = seconds * 1000
    new_audio = audio[:-remove_ms]
    new_audio.export(output_file, format="mp3")


def get_first_integer(input_string: str):
    match = re.search(r"\d+", input_string)
    if match:
        return str(int(match.group()))

    else:
        return ""


def create_dir(dirpath=TMP_DIR):
    if not os.path.exists(dirpath):
        os.makedirs(dirpath)


def clean_dir(dirpath=TMP_DIR):
    if os.path.exists(dirpath):
        shutil.rmtree(dirpath)


def download_mp3(url: str, local_filename: str):
    try:
        response = requests.get(url)
        if response.status_code == 200:
            with open(local_filename, "wb") as f:
                f.write(response.content)

            print(f"Successfully downloaded: {local_filename}")
            remove_end_seconds(local_filename, local_filename, 3.1)
            return True

        else:
            if response.status_code == 404:
                bad_date = "/".join(url.split("/audio/")[-1].split("/")[:-1])
                fixed_date = get_prev_day(bad_date)
                fixed_url = url.replace(bad_date, fixed_date)
                return download_mp3(fixed_url, local_filename)

            print(f"Error: {response.status_code}, {response.text}")
            return False

    except Exception as e:
        print(f"Error: {e}")
        return False


def get_sound_time(page_url):
    response = requests.get(page_url)
    soup = BeautifulSoup(response.text, "html.parser")
    audio_time_span = soup.find("span", class_="audioTime")
    audio_time = audio_time_span.text if audio_time_span else None
    if audio_time:
        return audio_time.replace("-", "/")

    return ""


def infer(page_url: str, date: str):
    clean_dir()
    domain = "https://www.lizhi.fm/"
    fail_voice = "./fail.mp3"
    if not page_url:
        return fail_voice

    if domain in page_url:
        sound_id = get_first_integer(page_url.split("/")[-1])
        if not sound_id.isdigit():
            return fail_voice

    else:
        return fail_voice

    voice_time = date.strip().replace("-", "/")  # voice_time = get_sound_time(page_url)
    mp3_url = f"http://cdn5.lizhi.fm/audio/{voice_time}/{sound_id}_hd.mp3"
    outpath = f"{TMP_DIR}/{sound_id}.mp3"
    create_dir()
    if download_mp3(mp3_url, outpath):
        return outpath

    else:
        return fail_voice


if __name__ == "__main__":
    gr.Interface(
        fn=infer,
        inputs=[
            gr.Textbox(
                label="Enter the sound page URL",
                placeholder="https://www.lizhi.fm/*/*",
                show_copy_button=True,
            ),
            gr.Textbox(
                label="Enter sound publication date in format",
                placeholder="YYYY-MM-DD",
                show_copy_button=True,
            ),
        ],
        outputs=gr.Audio(
            label="Download MP3",
            show_download_button=True,
        ),
        flagging_mode="never",
    ).launch()