cmcmaster's picture
Update main.py
9ee0be3 verified
import json
import os
from datetime import datetime, timezone
from fasthtml.common import *
from huggingface_hub import HfApi, hf_hub_download
from starlette.responses import FileResponse
from generate_newsletter import process_new_papers
from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.triggers.cron import CronTrigger
# Initialize Hugging Face API
HF_TOKEN = os.environ.get("HF_TOKEN")
DATASET_NAME = "cmcmaster/this_week_in_rheumatology"
api = HfApi(token=HF_TOKEN)
# Initialize scheduler
scheduler = BackgroundScheduler()
# Schedule newsletter generation to run every Monday at 1 AM UTC
scheduler.add_job(process_new_papers,
trigger="interval",
hours=6,
kwargs={
'end_date': '{{ (execution_date - timedelta(days=execution_date.weekday() + 1)).strftime("%Y-%m-%d") }}',
'test': False
},
id='generate_newsletter',
name='Weekly newsletter generation',
replace_existing=True)
css = Style("""
body {
font-family: Georgia, Times, serif;
line-height: 1.6;
color: #333;
max-width: 800px;
margin: 0 auto;
padding: 20px;
background: #fff;
}
h1, h2 {
color: #2c3e50;
font-family: Georgia, Times, serif;
}
a {
color: #2c3e50;
text-decoration: none;
}
a:hover {
text-decoration: underline;
}
ul {
list-style-type: none;
padding: 0;
}
li {
margin-bottom: 10px;
}
.newsletter-content {
margin-top: 20px;
}
.download-links {
margin: 20px 0;
}
.download-link {
display: inline-block;
padding: 10px 20px;
background-color: #2c3e50;
color: white;
border-radius: 3px;
margin: 0 10px 10px 0;
font-family: Georgia, Times, serif;
}
.download-link:hover {
background-color: #34495e;
text-decoration: none;
}
""")
app = FastHTML(hdrs=(css, MarkdownJS(),
HighlightJS(
langs=['python', 'javascript', 'html', 'css'])))
# Start the scheduler when the app starts
@app.on_event("startup")
async def start_scheduler():
scheduler.start()
# Shut down the scheduler when the app stops
@app.on_event("shutdown")
async def shutdown_scheduler():
scheduler.shutdown()
def get_newsletter_list():
# Fetch the list of newsletters from the Hugging Face repository
files = api.list_repo_files(repo_id=DATASET_NAME, repo_type="dataset")
newsletters = [f for f in files if f.endswith('newsletter.json')]
return sorted(newsletters, reverse=True)
def get_newsletter_content(path):
# Download and parse the newsletter content
content = api.hf_hub_download(repo_id=DATASET_NAME,
filename=path,
repo_type="dataset")
with open(content, 'r') as f:
return json.load(f)
def check_format_exists(date: str, format: str) -> bool:
"""Check if a specific format exists for a given date"""
try:
api.hf_hub_download(
repo_id=DATASET_NAME,
filename=f"{date}/newsletter.{format}",
repo_type="dataset"
)
return True
except Exception:
return False
@app.get("/")
def index():
newsletters = get_newsletter_list()
links = [
Li(
A(datetime.strptime(n.split('/')[0], '%Y%m%d').strftime('%B %d, %Y'),
href=f"/newsletter/{n.split('/')[0]}")) for n in newsletters
]
return Titled("This Week in Rheumatology", H2("Available Newsletters"),
Ul(*links))
@app.get("/newsletter/{date}")
def newsletter(date: str):
path = f"{date}/newsletter.json"
try:
content = get_newsletter_content(path)
# Create download links div
download_links = []
# Check for PDF
if check_format_exists(date, "pdf"):
download_links.append(
A("Download PDF", href=f"/download/{date}/pdf", cls="download-link")
)
# Check for EPUB
if check_format_exists(date, "epub"):
download_links.append(
A("Download EPUB", href=f"/download/{date}/epub", cls="download-link")
)
return Titled(
f"This Week in Rheumatology - {content['date']}",
A("Back to Index", href="/"),
Div(*download_links, cls="download-links"),
Div(content['content'], cls="marked"))
except Exception as e:
return Titled("Error", H2("Newsletter not found"),
P(f"Unable to load newsletter for date: {date}"),
A("Back to Index", href="/"))
@app.get("/download/{date}/{format}")
def download_file(date: str, format: str):
try:
file_path = f"{date}/newsletter.{format}"
content = api.hf_hub_download(repo_id=DATASET_NAME,
filename=file_path,
repo_type="dataset")
# Set appropriate media type and filename
if format == "pdf":
media_type = "application/pdf"
elif format == "epub":
media_type = "application/epub+zip"
else:
raise ValueError(f"Unsupported format: {format}")
return FileResponse(content,
media_type=media_type,
filename=f"newsletter_{date}.{format}")
except Exception as e:
return Titled("Error", H2(f"{format.upper()} not found"),
P(f"Unable to load {format.upper()} for date: {date}"),
A("Back to Index", href="/"))
serve()