Spaces:
Runtime error
Runtime error
import streamlit as st | |
import feedparser | |
from transformers import pipeline | |
import requests | |
import datetime | |
# Streamlitの設定 | |
st.set_page_config(page_title="今日のテクノロジーニュース", layout="wide") | |
st.title("📡 今日のテクノロジーニュース") | |
# RSSフィードのURL | |
rss_url = "https://rss.nytimes.com/services/xml/rss/nyt/Technology.xml" | |
# 利用可能な翻訳モデルのリスト | |
models = [ | |
{ | |
"name": "facebook/nllb-200-distilled-600M", | |
"description": "Translation • Updated Feb 15 • 322k • 504", | |
"src_lang": "eng_Latn", | |
"tgt_lang": "jpn_Jpan" | |
}, | |
{ | |
"name": "facebook/mbart-large-50-many-to-many-mmt", | |
"description": "Translation • Updated Sep 29, 2023 • 646k • 278", | |
"src_lang": "eng_Latn", | |
"tgt_lang": "jpn_Jpan" | |
}, | |
{ | |
"name": "facebook/nllb-200-3.3B", | |
"description": "Translation • Updated Feb 12, 2023 • 28.9k • 249", | |
"src_lang": "eng_Latn", | |
"tgt_lang": "jpn_Jpan" | |
}, | |
{ | |
"name": "google/madlad400-10b-mt", | |
"description": "Translation • Updated Apr 12 • 1.76k • 84", | |
"src_lang": "eng_Latn", | |
"tgt_lang": "jpn_Jpan" | |
}, | |
{ | |
"name": "ken11/mbart-ja-en", | |
"description": "Translation • Updated Oct 13, 2021 • 63 • 3", | |
"src_lang": "jpn_Jpan", | |
"tgt_lang": "eng_Latn" | |
}, | |
{ | |
"name": "facebook/nllb-200-1.3B", | |
"description": "Translation • Updated Feb 12, 2023 • 14.6k • 44", | |
"src_lang": "eng_Latn", | |
"tgt_lang": "jpn_Jpan" | |
}, | |
{ | |
"name": "facebook/nllb-200-distilled-1.3B", | |
"description": "Translation • Updated Feb 12, 2023 • 101k • 98", | |
"src_lang": "eng_Latn", | |
"tgt_lang": "jpn_Jpan" | |
}, | |
{ | |
"name": "alirezamsh/small100", | |
"description": "Translation • Updated Jul 23 • 1.85k • 60", | |
"src_lang": "eng_Latn", | |
"tgt_lang": "jpn_Jpan" | |
}, | |
{ | |
"name": "Unbabel/wmt22-cometkiwi-da", | |
"description": "Translation • Updated Oct 13, 2023 • 1 • 24", | |
"src_lang": "eng_Latn", | |
"tgt_lang": "jpn_Jpan" | |
}, | |
{ | |
"name": "ychenNLP/nllb-200-3.3B-easyproject", | |
"description": "Translation • Updated Aug 30, 2023 • 73 • 2", | |
"src_lang": "eng_Latn", | |
"tgt_lang": "jpn_Jpan" | |
}, | |
{ | |
"name": "JustFrederik/sugoi-v3.3-ja-en-ct2-float16", | |
"description": "Translation • Updated May 10, 2023 • 2", | |
"src_lang": "jpn_Jpan", | |
"tgt_lang": "eng_Latn" | |
}, | |
{ | |
"name": "JustFrederik/sugoi-v3.3-ja-en-ct2-int8", | |
"description": "Translation • Updated May 10, 2023 • 22 • 1", | |
"src_lang": "jpn_Jpan", | |
"tgt_lang": "eng_Latn" | |
}, | |
{ | |
"name": "JustFrederik/sugoi-v4-ja-en-ct2-float16", | |
"description": "Translation • Updated May 10, 2023 • 13 • 1", | |
"src_lang": "jpn_Jpan", | |
"tgt_lang": "eng_Latn" | |
}, | |
{ | |
"name": "JustFrederik/sugoi-v4-ja-en-ct2-int8", | |
"description": "Translation • Updated May 10, 2023", | |
"src_lang": "jpn_Jpan", | |
"tgt_lang": "eng_Latn" | |
}, | |
{ | |
"name": "JustFrederik/sugoi-v4-ja-en-ct2", | |
"description": "Translation • Updated May 10, 2023 • 20 • 1", | |
"src_lang": "jpn_Jpan", | |
"tgt_lang": "eng_Latn" | |
}, | |
{ | |
"name": "JustFrederik/sugoi-v3.3-ja-en-ct2", | |
"description": "Translation • Updated May 10, 2023", | |
"src_lang": "jpn_Jpan", | |
"tgt_lang": "eng_Latn" | |
}, | |
{ | |
"name": "JustFrederik/nllb-200-distilled-600M-ct2-int8", | |
"description": "Translation • Updated May 15, 2023 • 225", | |
"src_lang": "eng_Latn", | |
"tgt_lang": "jpn_Jpan" | |
}, | |
{ | |
"name": "JustFrederik/nllb-200-distilled-1.3B-ct2-int8", | |
"description": "Translation • Updated May 15, 2023 • 74 • 1", | |
"src_lang": "eng_Latn", | |
"tgt_lang": "jpn_Jpan" | |
}, | |
{ | |
"name": "JustFrederik/nllb-200-1.3B-ct2-int8", | |
"description": "Translation • Updated May 15, 2023 • 12", | |
"src_lang": "eng_Latn", | |
"tgt_lang": "jpn_Jpan" | |
}, | |
{ | |
"name": "JustFrederik/nllb-200-1.3B-ct2-float16", | |
"description": "Translation • Updated May 15, 2023 • 6", | |
"src_lang": "eng_Latn", | |
"tgt_lang": "jpn_Jpan" | |
}, | |
{ | |
"name": "JustFrederik/nllb-200-1.3B-ct2", | |
"description": "Translation • Updated May 15, 2023 • 14", | |
"src_lang": "eng_Latn", | |
"tgt_lang": "jpn_Jpan" | |
}, | |
{ | |
"name": "JustFrederik/nllb-200-distilled-1.3B-ct2", | |
"description": "Translation • Updated May 15, 2023 • 3", | |
"src_lang": "eng_Latn", | |
"tgt_lang": "jpn_Jpan" | |
}, | |
{ | |
"name": "JustFrederik/nllb-200-distilled-1.3B-ct2-float16", | |
"description": "Translation • Updated May 15, 2023 • 7 • 1", | |
"src_lang": "eng_Latn", | |
"tgt_lang": "jpn_Jpan" | |
}, | |
{ | |
"name": "JustFrederik/nllb-200-distilled-600M-ct2", | |
"description": "Translation • Updated May 15, 2023 • 4", | |
"src_lang": "eng_Latn", | |
"tgt_lang": "jpn_Jpan" | |
}, | |
{ | |
"name": "JustFrederik/nllb-200-distilled-600M-ct2-float16", | |
"description": "Translation • Updated May 15, 2023 • 8", | |
"src_lang": "eng_Latn", | |
"tgt_lang": "jpn_Jpan" | |
}, | |
{ | |
"name": "JustFrederik/nllb-200-3.3B-ct2-float16", | |
"description": "Translation • Updated May 15, 2023 • 26 • 3", | |
"src_lang": "eng_Latn", | |
"tgt_lang": "jpn_Jpan" | |
}, | |
{ | |
"name": "Babelscape/mrebel-large", | |
"description": "Translation • Updated Jun 21, 2023 • 67.5k • 66", | |
"src_lang": "eng_Latn", | |
"tgt_lang": "jpn_Jpan" | |
}, | |
{ | |
"name": "Babelscape/mrebel-large-32", | |
"description": "Translation • Updated Jun 23, 2023 • 97 • 6", | |
"src_lang": "eng_Latn", | |
"tgt_lang": "jpn_Jpan" | |
}, | |
{ | |
"name": "Babelscape/mrebel-base", | |
"description": "Translation • Updated Jun 23, 2023 • 66 • 5", | |
"src_lang": "eng_Latn", | |
"tgt_lang": "jpn_Jpan" | |
}, | |
{ | |
"name": "winstxnhdw/nllb-200-distilled-1.3B-ct2-int8", | |
"description": "Translation • Updated Aug 3, 2023 • 2.42k • 4", | |
"src_lang": "eng_Latn", | |
"tgt_lang": "jpn_Jpan" | |
}, | |
{ | |
"name": "michaelfeil/ct2fast-nllb-200-distilled-1.3B", | |
"description": "Translation • Updated Dec 10, 2023 • 10 • 1", | |
"src_lang": "eng_Latn", | |
"tgt_lang": "jpn_Jpan" | |
}, | |
{ | |
"name": "michaelfeil/ct2fast-nllb-200-3.3B", | |
"description": "Translation • Updated Jul 21, 2023 • 36 • 11", | |
"src_lang": "eng_Latn", | |
"tgt_lang": "jpn_Jpan" | |
}, | |
{ | |
"name": "qiyuw/WSPAlign-xlm-base", | |
"description": "Translation • Updated Mar 18 • 4", | |
"src_lang": "xlm_Latn", | |
"tgt_lang": "jpn_Jpan" | |
}, | |
# 既存のモデルを以下に追加できます | |
] | |
# プルダウンメニューでモデルを選択 | |
st.sidebar.header("翻訳モデルの選択") | |
selected_model = st.sidebar.selectbox( | |
"使用する翻訳モデルを選択してください:", | |
options=models, | |
format_func=lambda x: f"{x['name']} - {x['description']}" | |
) | |
def load_translation_model(model_name, src_lang, tgt_lang): | |
""" | |
選択された翻訳モデルをロードし、キャッシュします。 | |
""" | |
try: | |
translator = pipeline( | |
"translation", | |
model=model_name, | |
src_lang=src_lang, | |
tgt_lang=tgt_lang | |
) | |
return translator | |
except Exception as e: | |
st.error(f"翻訳モデルのロード中にエラーが発生しました: {e}") | |
return None | |
def translate_text(_translator, text): | |
""" | |
テキストを日本語に翻訳します。 | |
翻訳結果をキャッシュします。 | |
""" | |
if not _translator: | |
return "翻訳エラー" | |
try: | |
translation = _translator(text, max_length=500)[0]['translation_text'] | |
return translation | |
except Exception as e: | |
st.error(f"翻訳中にエラーが発生しました: {e}") | |
return "翻訳エラー" | |
def fetch_rss_feed(url): | |
""" | |
RSSフィードを取得し、XMLを保存してパースします。 | |
キャッシュの有効期限は1時間です。 | |
""" | |
try: | |
response = requests.get(url) | |
if response.status_code != 200: | |
st.error(f"RSSフィードの取得に失敗しました。ステータスコード: {response.status_code}") | |
return None | |
# フィードのXMLを保存(データセットとして蓄積) | |
now = datetime.datetime.now() | |
filename = now.strftime("feed_%Y%m%d_%H%M%S.xml") | |
with open(filename, 'wb') as f: | |
f.write(response.content) | |
# フィードをパース | |
feed = feedparser.parse(response.content) | |
return feed | |
except Exception as e: | |
st.error(f"RSSフィードの取得中にエラーが発生しました: {e}") | |
return None | |
# フィードを取得 | |
feed = fetch_rss_feed(rss_url) | |
if feed is None: | |
st.stop() # フィードの取得に失敗した場合、アプリを停止します | |
# 翻訳モデルをロード | |
translator = load_translation_model(selected_model['name'], selected_model['src_lang'], selected_model['tgt_lang']) | |
# フィード内の記事をパースしてタイトルと説明を翻訳 | |
for entry in feed.entries: | |
# タイトルと説明を取得 | |
title = entry.title | |
description = entry.description | |
# タイトルと説明を日本語に翻訳(翻訳結果をキャッシュ) | |
translated_title = translate_text(translator, title) | |
translated_description = translate_text(translator, description) | |
# Markdown形式で表示 | |
st.markdown(f"### **タイトル(英語):** {title}") | |
st.markdown(f"**タイトル(日本語):** {translated_title}") | |
st.markdown(f"**概要(英語):**") | |
st.write(description) | |
st.markdown(f"**概要(日本語):**") | |
st.write(translated_description) | |
st.markdown(f"[🌐 続きを読む]({entry.link})") | |
st.markdown("---") | |
# キャッシュをクリアするボタン | |
if st.button("キャッシュをクリア"): | |
load_translation_model.clear(selected_model['name'], selected_model['src_lang'], selected_model['tgt_lang']) | |
translate_text.clear() | |
fetch_rss_feed.clear() | |
st.success("キャッシュをクリアしました。") | |