Spaces:
Sleeping
Sleeping
import json | |
import os | |
from datetime import datetime, timezone | |
from fasthtml.common import * | |
from huggingface_hub import HfApi, hf_hub_download | |
from starlette.responses import FileResponse | |
from generate_newsletter import process_new_papers | |
from apscheduler.schedulers.background import BackgroundScheduler | |
from apscheduler.triggers.cron import CronTrigger | |
# Initialize Hugging Face API | |
HF_TOKEN = os.environ.get("HF_TOKEN") | |
DATASET_NAME = "cmcmaster/this_week_in_rheumatology" | |
api = HfApi(token=HF_TOKEN) | |
# Initialize scheduler | |
scheduler = BackgroundScheduler() | |
# Schedule newsletter generation to run every Monday at 1 AM UTC | |
scheduler.add_job(process_new_papers, | |
trigger="interval", | |
hours=6, | |
kwargs={ | |
'end_date': '{{ (execution_date - timedelta(days=execution_date.weekday() + 1)).strftime("%Y-%m-%d") }}', | |
'test': False | |
}, | |
id='generate_newsletter', | |
name='Weekly newsletter generation', | |
replace_existing=True) | |
css = Style(""" | |
body { | |
font-family: Georgia, Times, serif; | |
line-height: 1.6; | |
color: #333; | |
max-width: 800px; | |
margin: 0 auto; | |
padding: 20px; | |
background: #fff; | |
} | |
h1, h2 { | |
color: #2c3e50; | |
font-family: Georgia, Times, serif; | |
} | |
a { | |
color: #2c3e50; | |
text-decoration: none; | |
} | |
a:hover { | |
text-decoration: underline; | |
} | |
ul { | |
list-style-type: none; | |
padding: 0; | |
} | |
li { | |
margin-bottom: 10px; | |
} | |
.newsletter-content { | |
margin-top: 20px; | |
} | |
.download-links { | |
margin: 20px 0; | |
} | |
.download-link { | |
display: inline-block; | |
padding: 10px 20px; | |
background-color: #2c3e50; | |
color: white; | |
border-radius: 3px; | |
margin: 0 10px 10px 0; | |
font-family: Georgia, Times, serif; | |
} | |
.download-link:hover { | |
background-color: #34495e; | |
text-decoration: none; | |
} | |
""") | |
app = FastHTML(hdrs=(css, MarkdownJS(), | |
HighlightJS( | |
langs=['python', 'javascript', 'html', 'css']))) | |
# Start the scheduler when the app starts | |
async def start_scheduler(): | |
scheduler.start() | |
# Shut down the scheduler when the app stops | |
async def shutdown_scheduler(): | |
scheduler.shutdown() | |
def get_newsletter_list(): | |
# Fetch the list of newsletters from the Hugging Face repository | |
files = api.list_repo_files(repo_id=DATASET_NAME, repo_type="dataset") | |
newsletters = [f for f in files if f.endswith('newsletter.json')] | |
return sorted(newsletters, reverse=True) | |
def get_newsletter_content(path): | |
# Download and parse the newsletter content | |
content = api.hf_hub_download(repo_id=DATASET_NAME, | |
filename=path, | |
repo_type="dataset") | |
with open(content, 'r') as f: | |
return json.load(f) | |
def check_format_exists(date: str, format: str) -> bool: | |
"""Check if a specific format exists for a given date""" | |
try: | |
api.hf_hub_download( | |
repo_id=DATASET_NAME, | |
filename=f"{date}/newsletter.{format}", | |
repo_type="dataset" | |
) | |
return True | |
except Exception: | |
return False | |
def index(): | |
newsletters = get_newsletter_list() | |
links = [ | |
Li( | |
A(datetime.strptime(n.split('/')[0], '%Y%m%d').strftime('%B %d, %Y'), | |
href=f"/newsletter/{n.split('/')[0]}")) for n in newsletters | |
] | |
return Titled("This Week in Rheumatology", H2("Available Newsletters"), | |
Ul(*links)) | |
def newsletter(date: str): | |
path = f"{date}/newsletter.json" | |
try: | |
content = get_newsletter_content(path) | |
# Create download links div | |
download_links = [] | |
# Check for PDF | |
if check_format_exists(date, "pdf"): | |
download_links.append( | |
A("Download PDF", href=f"/download/{date}/pdf", cls="download-link") | |
) | |
# Check for EPUB | |
if check_format_exists(date, "epub"): | |
download_links.append( | |
A("Download EPUB", href=f"/download/{date}/epub", cls="download-link") | |
) | |
return Titled( | |
f"This Week in Rheumatology - {content['date']}", | |
A("Back to Index", href="/"), | |
Div(*download_links, cls="download-links"), | |
Div(content['content'], cls="marked")) | |
except Exception as e: | |
return Titled("Error", H2("Newsletter not found"), | |
P(f"Unable to load newsletter for date: {date}"), | |
A("Back to Index", href="/")) | |
def download_file(date: str, format: str): | |
try: | |
file_path = f"{date}/newsletter.{format}" | |
content = api.hf_hub_download(repo_id=DATASET_NAME, | |
filename=file_path, | |
repo_type="dataset") | |
# Set appropriate media type and filename | |
if format == "pdf": | |
media_type = "application/pdf" | |
elif format == "epub": | |
media_type = "application/epub+zip" | |
else: | |
raise ValueError(f"Unsupported format: {format}") | |
return FileResponse(content, | |
media_type=media_type, | |
filename=f"newsletter_{date}.{format}") | |
except Exception as e: | |
return Titled("Error", H2(f"{format.upper()} not found"), | |
P(f"Unable to load {format.upper()} for date: {date}"), | |
A("Back to Index", href="/")) | |
serve() | |