import base64 import os from collections import defaultdict from datetime import date, datetime, timedelta from io import BytesIO import dotenv from datasets import load_dataset from dateutil.parser import parse from dateutil.tz import tzutc from fasthtml.common import * from huggingface_hub import login, whoami dotenv.load_dotenv() style = Style(""" .grid { margin-bottom: 1rem; } .card { display: flex; flex-direction: column; } .card img { margin-bottom: 0.5rem; } .card h5 { margin: 0; font-size: 0.9rem; line-height: 1.2; } .card a { color: inherit; text-decoration: none; } .card a:hover { text-decoration: underline; } """) app, rt = fast_app(html_style=(style,)) login(token=os.environ.get("HF_TOKEN")) hf_user = whoami(os.environ.get("HF_TOKEN"))["name"] HF_REPO_ID = f"{hf_user}/zotero-articles" abstract_ds = load_dataset(HF_REPO_ID, "abstracts", split="train") article_ds = load_dataset(HF_REPO_ID, "articles", split="train") image_ds = load_dataset(HF_REPO_ID, "images", split="train") image_ds = image_ds.filter(lambda x: x["page_number"] == 1) def parse_date(date_string): try: return parse(date_string).astimezone(tzutc()).date() except ValueError: return date.today() def get_week_start(date_obj): return date_obj - timedelta(days=date_obj.weekday()) week2articles = defaultdict(list) for article in article_ds: date_added = parse_date(article["date_added"]) week_start = get_week_start(date_added) week2articles[week_start].append(article["arxiv_id"]) weeks = sorted(week2articles.keys(), reverse=True) def get_article_details(arxiv_id): article = article_ds.filter(lambda x: x["arxiv_id"] == arxiv_id)[0] abstract = abstract_ds.filter(lambda x: x["arxiv_id"] == arxiv_id) image = image_ds.filter(lambda x: x["arxiv_id"] == arxiv_id) return article, abstract, image def generate_week_content(current_week): week_index = weeks.index(current_week) prev_week = weeks[week_index + 1] if week_index < len(weeks) - 1 else None next_week = weeks[week_index - 1] if week_index > 0 else None nav_buttons = Group( Button( "← Previous Week", hx_get=f"/week/{prev_week}" if prev_week else "#", hx_target="#content", hx_swap="innerHTML", disabled=not prev_week, ), Button( "Next Week →", hx_get=f"/week/{next_week}" if next_week else "#", hx_target="#content", hx_swap="innerHTML", disabled=not next_week, ), ) articles = week2articles[current_week] article_cards = [] for arxiv_id in articles: article, abstract, image = get_article_details(arxiv_id) article_title = ( article["contents"][0].get("paper_title", "article") if article["contents"] else "article" ) card_content = [ H5( A( article_title, href=f"https://arxiv.org/abs/{arxiv_id}", target="_blank", ) ) ] if image: pil_image = image[0]["image"] img_byte_arr = BytesIO() pil_image.save(img_byte_arr, format="JPEG") img_byte_arr = img_byte_arr.getvalue() image_url = f"data:image/jpeg;base64,{base64.b64encode(img_byte_arr).decode('utf-8')}" card_content.insert( 0, Img( src=image_url, alt="Article image", style="max-width: 100%; height: auto; margin-bottom: 15px;", ), ) article_cards.append(Card(*card_content, cls="mb-4")) grid = Grid( *article_cards, style="display: grid; grid-template-columns: repeat(3, 1fr); gap: 1rem;", ) week_end = current_week + timedelta(days=6) return Div( nav_buttons, H3( f"Week of {current_week.strftime('%B %d')} - {week_end.strftime('%B %d, %Y')} ({len(articles)} articles)" ), grid, nav_buttons, id="content", ) @rt("/") def get(): return Titled("AnswerAI Zotero Weekly", generate_week_content(weeks[0])) @rt("/week/{date}") def get(date: str): try: current_week = datetime.strptime(date, "%Y-%m-%d").date() return generate_week_content(current_week) except Exception as e: return Div(f"Error displaying articles: {str(e)}") serve()