demomodels commited on
Commit
a37aa0d
·
verified ·
1 Parent(s): 21843bb

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +171 -0
app.py ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from TTS.api import TTS
3
+ from transformers import T5ForConditionalGeneration, T5Tokenizer, pipeline
4
+ import feedparser
5
+ import re
6
+
7
+ language_map = {
8
+ 'en': 'English',
9
+ 'fr': 'French'
10
+ }
11
+
12
+ # Add default RSS feeds
13
+
14
+ rss_feed_map = {
15
+ "NY Times": 'https://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml',
16
+ "Fox News": 'https://moxie.foxnews.com/google-publisher/latest.xml',
17
+ "Yahoo! News": 'https://www.yahoo.com/news/rss',
18
+ "France 24": 'https://www.france24.com/fr/rss',
19
+ "France Info": 'https://www.francetvinfo.fr/titres.rss'
20
+ }
21
+
22
+ def get_rss_feeds(default_choices, custom_choices):
23
+ custom_rss_feeds = custom_choices.split("\n")
24
+ if custom_rss_feeds == ['']:
25
+ return list(set([rss_feed_map[key] for key in default_choices]))
26
+ return list(set(custom_rss_feeds + [rss_feed_map[key] for key in default_choices]))
27
+
28
+ # RSS feeds
29
+
30
+ def is_url(string):
31
+ url_pattern = re.compile(
32
+ r'^(?:http|ftp)s?://' # http:// or https://
33
+ r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' # domain...
34
+ r'localhost|' # localhost...
35
+ r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip
36
+ r'(?::\d+)?' # optional port
37
+ r'(?:/?|[/?]\S+)$', re.IGNORECASE)
38
+ return re.match(url_pattern, string) is not None
39
+
40
+ def fetch_news(rss_feed):
41
+ if not is_url(rss_feed):
42
+ raise ValueError(f"{rss_feed} is not a valid RSS feed.")
43
+ news = []
44
+ feed = feedparser.parse(rss_feed)
45
+ for entry in feed.entries:
46
+ news.append(entry.title)
47
+ return news
48
+
49
+ def fetch_news_multiple_urls(rss_feeds):
50
+ return [news for rss_feed in rss_feeds for news in fetch_news(rss_feed)]
51
+
52
+ # Language_id
53
+
54
+ model_ckpt = "papluca/xlm-roberta-base-language-detection"
55
+ pipe = pipeline("text-classification", model=model_ckpt)
56
+
57
+ def language_id(strings:list[str]):
58
+ return [(string,language_map[pipe(string, top_k=1, truncation=True)[0]['label']]) for string in strings]
59
+
60
+ # Translation
61
+
62
+ ## Initialize T5 model and tokenizer
63
+ model_name = "t5-small"
64
+ tokenizer = T5Tokenizer.from_pretrained(model_name)
65
+ model = T5ForConditionalGeneration.from_pretrained(model_name)
66
+
67
+ def translate(source_text_with_id, target_language):
68
+ # source_text_with_id = ('text','French') for example
69
+ source_language = source_text_with_id[1]
70
+ assert source_language in language_map.values(), f"{source_language} language is not supported."
71
+ assert target_language in language_map.values(), f"{target_language} language is not supported."
72
+
73
+ source_text = f"translate {source_language} to {target_language}: " + source_text_with_id[0]
74
+
75
+ # Tokenize input text
76
+ input_ids = tokenizer.encode(source_text, return_tensors="pt")
77
+
78
+ # Generate translation
79
+ translated_ids = model.generate(input_ids=input_ids, max_length=100, num_beams=4, early_stopping=True)
80
+
81
+ # Decode translated text
82
+ return tokenizer.decode(translated_ids[0], skip_special_tokens=True)
83
+
84
+ def translate_multiple(source_texts_with_id, target_language):
85
+ return [translate(source_text_with_id, target_language) for source_text_with_id in source_texts_with_id]
86
+
87
+ # Speech generation
88
+
89
+ tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
90
+
91
+ def read_news(text,input,output,language):
92
+ assert language in language_map.keys(), f"{language} language is not supported."
93
+ print("speech generation starting")
94
+ tts.tts_to_file(text=text,
95
+ file_path=output,
96
+ speaker_wav=input,
97
+ language=language)
98
+ print("speech generation done")
99
+ return output
100
+
101
+ # Gradio interface
102
+
103
+ def process(radio_value, textbox_value, audio_value, checkbox_value):
104
+ inputs = {
105
+ "language": radio_value,
106
+ "rss_feed_urls": textbox_value,
107
+ "audio": audio_value,
108
+ "selected_feeds": checkbox_value
109
+ }
110
+ print("Inputs to Gradio Blocks:")
111
+ print(inputs)
112
+
113
+ rss_feeds = get_rss_feeds(checkbox_value,textbox_value)
114
+ print("rss_feeds=",rss_feeds)
115
+
116
+ news = fetch_news_multiple_urls(rss_feeds)
117
+ print("news=",news[:2])
118
+
119
+ news_with_language_id = language_id(news)
120
+ print("news_with_language_id=",news_with_language_id[:2])
121
+
122
+ translated_news = translate_multiple(news_with_language_id, radio_value)
123
+ print("translated_news=",translated_news[:2])
124
+
125
+ language = next((key for key, val in language_map.items() if val == radio_value), None)
126
+ print("language=",language)
127
+
128
+ all_news = ' '.join(translated_news)
129
+ print("all_news=",all_news[:80])
130
+
131
+ output_path = "output.wav"
132
+
133
+ return read_news(all_news,audio_value,output_path,language)
134
+
135
+ with gr.Blocks() as demo:
136
+ gr.Markdown("Customize your newsletter and then click **Fetch News** to download the audio output.")
137
+ with gr.Row():
138
+ radio = gr.Radio(
139
+ label='Choose the language of the output',
140
+ info="If the output language doesn't match the language of an RSS feed, an AI model will take care of translation",
141
+ choices=["English", "French"]
142
+ )
143
+ with gr.Row():
144
+ textbox = gr.Textbox(
145
+ placeholder='https://www.francetvinfo.fr/titres.rss',
146
+ label='Add custom RSS feeds to your newsletter',
147
+ info='The provided urls needed to be written each in a separate line'
148
+ )
149
+ with gr.Row():
150
+ audio = gr.Audio(
151
+ label="Upload a sample audio of someone speaking. The voice of the output will match the voice of the input.",
152
+ type='filepath'
153
+ )
154
+ with gr.Row():
155
+ checkboxgroup = gr.CheckboxGroup(
156
+ ["NY Times", "Fox News", "Yahoo! News", "France 24", "France Info"],
157
+ label="RSS feeds",
158
+ info="Default RSS feeds"
159
+ )
160
+ with gr.Row():
161
+ btn = gr.Button(value='Fetch News')
162
+ with gr.Row():
163
+ out = gr.DownloadButton("📂 Click to download file")
164
+ btn.click(
165
+ fn=process,
166
+ inputs=[radio, textbox, audio, checkboxgroup],
167
+ outputs=out
168
+ )
169
+
170
+
171
+ demo.launch(debug=True)