TranslativeFeed / app.py
OzoneAsai's picture
Update app.py
2532716 verified
raw
history blame
10.8 kB
import streamlit as st
import feedparser
from transformers import pipeline
import requests
import datetime
# Streamlitの設定
st.set_page_config(page_title="今日のテクノロジーニュース", layout="wide")
st.title("📡 今日のテクノロジーニュース")
# RSSフィードのURL
rss_url = "https://rss.nytimes.com/services/xml/rss/nyt/Technology.xml"
# 利用可能な翻訳モデルのリスト
models = [
{
"name": "facebook/nllb-200-distilled-600M",
"description": "Translation • Updated Feb 15 • 322k • 504",
"src_lang": "eng_Latn",
"tgt_lang": "jpn_Jpan"
},
{
"name": "facebook/mbart-large-50-many-to-many-mmt",
"description": "Translation • Updated Sep 29, 2023 • 646k • 278",
"src_lang": "eng_Latn",
"tgt_lang": "jpn_Jpan"
},
{
"name": "facebook/nllb-200-3.3B",
"description": "Translation • Updated Feb 12, 2023 • 28.9k • 249",
"src_lang": "eng_Latn",
"tgt_lang": "jpn_Jpan"
},
{
"name": "google/madlad400-10b-mt",
"description": "Translation • Updated Apr 12 • 1.76k • 84",
"src_lang": "eng_Latn",
"tgt_lang": "jpn_Jpan"
},
{
"name": "ken11/mbart-ja-en",
"description": "Translation • Updated Oct 13, 2021 • 63 • 3",
"src_lang": "jpn_Jpan",
"tgt_lang": "eng_Latn"
},
{
"name": "facebook/nllb-200-1.3B",
"description": "Translation • Updated Feb 12, 2023 • 14.6k • 44",
"src_lang": "eng_Latn",
"tgt_lang": "jpn_Jpan"
},
{
"name": "facebook/nllb-200-distilled-1.3B",
"description": "Translation • Updated Feb 12, 2023 • 101k • 98",
"src_lang": "eng_Latn",
"tgt_lang": "jpn_Jpan"
},
{
"name": "alirezamsh/small100",
"description": "Translation • Updated Jul 23 • 1.85k • 60",
"src_lang": "eng_Latn",
"tgt_lang": "jpn_Jpan"
},
{
"name": "Unbabel/wmt22-cometkiwi-da",
"description": "Translation • Updated Oct 13, 2023 • 1 • 24",
"src_lang": "eng_Latn",
"tgt_lang": "jpn_Jpan"
},
{
"name": "ychenNLP/nllb-200-3.3B-easyproject",
"description": "Translation • Updated Aug 30, 2023 • 73 • 2",
"src_lang": "eng_Latn",
"tgt_lang": "jpn_Jpan"
},
{
"name": "JustFrederik/sugoi-v3.3-ja-en-ct2-float16",
"description": "Translation • Updated May 10, 2023 • 2",
"src_lang": "jpn_Jpan",
"tgt_lang": "eng_Latn"
},
{
"name": "JustFrederik/sugoi-v3.3-ja-en-ct2-int8",
"description": "Translation • Updated May 10, 2023 • 22 • 1",
"src_lang": "jpn_Jpan",
"tgt_lang": "eng_Latn"
},
{
"name": "JustFrederik/sugoi-v4-ja-en-ct2-float16",
"description": "Translation • Updated May 10, 2023 • 13 • 1",
"src_lang": "jpn_Jpan",
"tgt_lang": "eng_Latn"
},
{
"name": "JustFrederik/sugoi-v4-ja-en-ct2-int8",
"description": "Translation • Updated May 10, 2023",
"src_lang": "jpn_Jpan",
"tgt_lang": "eng_Latn"
},
{
"name": "JustFrederik/sugoi-v4-ja-en-ct2",
"description": "Translation • Updated May 10, 2023 • 20 • 1",
"src_lang": "jpn_Jpan",
"tgt_lang": "eng_Latn"
},
{
"name": "JustFrederik/sugoi-v3.3-ja-en-ct2",
"description": "Translation • Updated May 10, 2023",
"src_lang": "jpn_Jpan",
"tgt_lang": "eng_Latn"
},
{
"name": "JustFrederik/nllb-200-distilled-600M-ct2-int8",
"description": "Translation • Updated May 15, 2023 • 225",
"src_lang": "eng_Latn",
"tgt_lang": "jpn_Jpan"
},
{
"name": "JustFrederik/nllb-200-distilled-1.3B-ct2-int8",
"description": "Translation • Updated May 15, 2023 • 74 • 1",
"src_lang": "eng_Latn",
"tgt_lang": "jpn_Jpan"
},
{
"name": "JustFrederik/nllb-200-1.3B-ct2-int8",
"description": "Translation • Updated May 15, 2023 • 12",
"src_lang": "eng_Latn",
"tgt_lang": "jpn_Jpan"
},
{
"name": "JustFrederik/nllb-200-1.3B-ct2-float16",
"description": "Translation • Updated May 15, 2023 • 6",
"src_lang": "eng_Latn",
"tgt_lang": "jpn_Jpan"
},
{
"name": "JustFrederik/nllb-200-1.3B-ct2",
"description": "Translation • Updated May 15, 2023 • 14",
"src_lang": "eng_Latn",
"tgt_lang": "jpn_Jpan"
},
{
"name": "JustFrederik/nllb-200-distilled-1.3B-ct2",
"description": "Translation • Updated May 15, 2023 • 3",
"src_lang": "eng_Latn",
"tgt_lang": "jpn_Jpan"
},
{
"name": "JustFrederik/nllb-200-distilled-1.3B-ct2-float16",
"description": "Translation • Updated May 15, 2023 • 7 • 1",
"src_lang": "eng_Latn",
"tgt_lang": "jpn_Jpan"
},
{
"name": "JustFrederik/nllb-200-distilled-600M-ct2",
"description": "Translation • Updated May 15, 2023 • 4",
"src_lang": "eng_Latn",
"tgt_lang": "jpn_Jpan"
},
{
"name": "JustFrederik/nllb-200-distilled-600M-ct2-float16",
"description": "Translation • Updated May 15, 2023 • 8",
"src_lang": "eng_Latn",
"tgt_lang": "jpn_Jpan"
},
{
"name": "JustFrederik/nllb-200-3.3B-ct2-float16",
"description": "Translation • Updated May 15, 2023 • 26 • 3",
"src_lang": "eng_Latn",
"tgt_lang": "jpn_Jpan"
},
{
"name": "Babelscape/mrebel-large",
"description": "Translation • Updated Jun 21, 2023 • 67.5k • 66",
"src_lang": "eng_Latn",
"tgt_lang": "jpn_Jpan"
},
{
"name": "Babelscape/mrebel-large-32",
"description": "Translation • Updated Jun 23, 2023 • 97 • 6",
"src_lang": "eng_Latn",
"tgt_lang": "jpn_Jpan"
},
{
"name": "Babelscape/mrebel-base",
"description": "Translation • Updated Jun 23, 2023 • 66 • 5",
"src_lang": "eng_Latn",
"tgt_lang": "jpn_Jpan"
},
{
"name": "winstxnhdw/nllb-200-distilled-1.3B-ct2-int8",
"description": "Translation • Updated Aug 3, 2023 • 2.42k • 4",
"src_lang": "eng_Latn",
"tgt_lang": "jpn_Jpan"
},
{
"name": "michaelfeil/ct2fast-nllb-200-distilled-1.3B",
"description": "Translation • Updated Dec 10, 2023 • 10 • 1",
"src_lang": "eng_Latn",
"tgt_lang": "jpn_Jpan"
},
{
"name": "michaelfeil/ct2fast-nllb-200-3.3B",
"description": "Translation • Updated Jul 21, 2023 • 36 • 11",
"src_lang": "eng_Latn",
"tgt_lang": "jpn_Jpan"
},
{
"name": "qiyuw/WSPAlign-xlm-base",
"description": "Translation • Updated Mar 18 • 4",
"src_lang": "xlm_Latn",
"tgt_lang": "jpn_Jpan"
},
# 既存のモデルを以下に追加できます
]
# プルダウンメニューでモデルを選択
st.sidebar.header("翻訳モデルの選択")
selected_model = st.sidebar.selectbox(
"使用する翻訳モデルを選択してください:",
options=models,
format_func=lambda x: f"{x['name']} - {x['description']}"
)
@st.cache_resource
def load_translation_model(model_name, src_lang, tgt_lang):
"""
選択された翻訳モデルをロードし、キャッシュします。
"""
try:
translator = pipeline(
"translation",
model=model_name,
src_lang=src_lang,
tgt_lang=tgt_lang
)
return translator
except Exception as e:
st.error(f"翻訳モデルのロード中にエラーが発生しました: {e}")
return None
@st.cache_data
def translate_text(_translator, text):
"""
テキストを日本語に翻訳します。
翻訳結果をキャッシュします。
"""
if not _translator:
return "翻訳エラー"
try:
translation = _translator(text, max_length=500)[0]['translation_text']
return translation
except Exception as e:
st.error(f"翻訳中にエラーが発生しました: {e}")
return "翻訳エラー"
@st.cache_data(ttl=3600)
def fetch_rss_feed(url):
"""
RSSフィードを取得し、XMLを保存してパースします。
キャッシュの有効期限は1時間です。
"""
try:
response = requests.get(url)
if response.status_code != 200:
st.error(f"RSSフィードの取得に失敗しました。ステータスコード: {response.status_code}")
return None
# フィードのXMLを保存(データセットとして蓄積)
now = datetime.datetime.now()
filename = now.strftime("feed_%Y%m%d_%H%M%S.xml")
with open(filename, 'wb') as f:
f.write(response.content)
# フィードをパース
feed = feedparser.parse(response.content)
return feed
except Exception as e:
st.error(f"RSSフィードの取得中にエラーが発生しました: {e}")
return None
# フィードを取得
feed = fetch_rss_feed(rss_url)
if feed is None:
st.stop() # フィードの取得に失敗した場合、アプリを停止します
# 翻訳モデルをロード
translator = load_translation_model(selected_model['name'], selected_model['src_lang'], selected_model['tgt_lang'])
# フィード内の記事をパースしてタイトルと説明を翻訳
for entry in feed.entries:
# タイトルと説明を取得
title = entry.title
description = entry.description
# タイトルと説明を日本語に翻訳(翻訳結果をキャッシュ)
translated_title = translate_text(translator, title)
translated_description = translate_text(translator, description)
# Markdown形式で表示
st.markdown(f"### **タイトル(英語):** {title}")
st.markdown(f"**タイトル(日本語):** {translated_title}")
st.markdown(f"**概要(英語):**")
st.write(description)
st.markdown(f"**概要(日本語):**")
st.write(translated_description)
st.markdown(f"[🌐 続きを読む]({entry.link})")
st.markdown("---")
# キャッシュをクリアするボタン
if st.button("キャッシュをクリア"):
load_translation_model.clear(selected_model['name'], selected_model['src_lang'], selected_model['tgt_lang'])
translate_text.clear()
fetch_rss_feed.clear()
st.success("キャッシュをクリアしました。")