Spaces:

rbiswasfc
/

zotero-refresh-pipeline

Sleeping

+import base64
+import os
+from collections import defaultdict
+from datetime import date, datetime, timedelta
+from io import BytesIO
+import dotenv
+from datasets import load_dataset
+from dateutil.parser import parse
+from dateutil.tz import tzutc
+from fasthtml.common import *
+from huggingface_hub import login, whoami
+dotenv.load_dotenv()
+style = Style("""
+                .grid { margin-bottom: 1rem; }
+                .card { display: flex; flex-direction: column; }
+                .card img { margin-bottom: 0.5rem; }
+                .card h5 { margin: 0; font-size: 0.9rem; line-height: 1.2; }
+                .card a { color: inherit; text-decoration: none; }
+                .card a:hover { text-decoration: underline; }
+            """)
+app, rt = fast_app(html_style=(style,))
+login(token=os.environ.get("HF_TOKEN"))
+hf_user = whoami(os.environ.get("HF_TOKEN"))["name"]
+HF_REPO_ID = f"{hf_user}/zotero-articles"
+abstract_ds = load_dataset(HF_REPO_ID, "abstracts", split="train")
+article_ds = load_dataset(HF_REPO_ID, "articles", split="train")
+image_ds = load_dataset(HF_REPO_ID, "images", split="train")
+image_ds = image_ds.filter(lambda x: x["page_number"] == 1)
+def parse_date(date_string):
+    try:
+        return parse(date_string).astimezone(tzutc()).date()
+    except ValueError:
+        return date.today()
+def get_week_start(date_obj):
+    return date_obj - timedelta(days=date_obj.weekday())
+week2articles = defaultdict(list)
+for article in article_ds:
+    date_added = parse_date(article["date_added"])
+    week_start = get_week_start(date_added)
+    week2articles[week_start].append(article["arxiv_id"])
+weeks = sorted(week2articles.keys(), reverse=True)
+def get_article_details(arxiv_id):
+    article = article_ds.filter(lambda x: x["arxiv_id"] == arxiv_id)[0]
+    abstract = abstract_ds.filter(lambda x: x["arxiv_id"] == arxiv_id)
+    image = image_ds.filter(lambda x: x["arxiv_id"] == arxiv_id)
+    return article, abstract, image
+def generate_week_content(current_week):
+    week_index = weeks.index(current_week)
+    prev_week = weeks[week_index + 1] if week_index < len(weeks) - 1 else None
+    next_week = weeks[week_index - 1] if week_index > 0 else None
+    nav_buttons = Group(
+        Button(
+            "← Previous Week",
+            hx_get=f"/week/{prev_week}" if prev_week else "#",
+            hx_target="#content",
+            hx_swap="innerHTML",
+            disabled=not prev_week,
+        ),
+        Button(
+            "Next Week →",
+            hx_get=f"/week/{next_week}" if next_week else "#",
+            hx_target="#content",
+            hx_swap="innerHTML",
+            disabled=not next_week,
+        ),
+    )
+    articles = week2articles[current_week]
+    article_cards = []
+    for arxiv_id in articles:
+        article, abstract, image = get_article_details(arxiv_id)
+        article_title = article["contents"][0].get("paper_title", "article") if article["contents"] else "article"
+        card_content = [H5(A(article_title, href=f"https://arxiv.org/abs/{arxiv_id}", target="_blank"))]
+        if image:
+            pil_image = image[0]["image"]
+            img_byte_arr = BytesIO()
+            pil_image.save(img_byte_arr, format="JPEG")
+            img_byte_arr = img_byte_arr.getvalue()
+            image_url = f"data:image/jpeg;base64,{base64.b64encode(img_byte_arr).decode('utf-8')}"
+            card_content.insert(
+                1, Img(src=image_url, alt="Article image", style="max-width: 100%; height: auto; margin-bottom: 15px;")
+            )
+        article_cards.append(Card(*card_content, cls="mb-4"))
+    grid = Grid(*article_cards, style="display: grid; grid-template-columns: repeat(3, 1fr); gap: 1rem;")
+    week_end = current_week + timedelta(days=6)
+    return Div(
+        nav_buttons,
+        H3(f"Week of {current_week.strftime('%B %d')} - {week_end.strftime('%B %d, %Y')} ({len(articles)} articles)"),
+        grid,
+        nav_buttons,
+        id="content",
+    )
+@rt("/")
+def get():
+    return Titled("AnswerAI Zotero Weekly", generate_week_content(weeks[0]))
+@rt("/week/{date}")
+def get(date: str):
+    try:
+        current_week = datetime.strptime(date, "%Y-%m-%d").date()
+        return generate_week_content(current_week)
+    except Exception as e:
+        return Div(f"Error displaying articles: {str(e)}")
+serve()

app.py CHANGED Viewed

@@ -1,53 +1,134 @@
 import os
 import dotenv
-import pandas as pd
 from datasets import load_dataset
 from fasthtml.common import *
 dotenv.load_dotenv()
-app, rt = fast_app()
-from huggingface_hub import login, whoami
 login(token=os.environ.get("HF_TOKEN"))
 hf_user = whoami(os.environ.get("HF_TOKEN"))["name"]
-HF_REPO_ID = f"{hf_user}/zotero-answer-ai-articles"
-@rt("/")
-async def get():
-    """Dataset viewer home page"""
-    return Titled(
-        "Zotero Dataset Viewer",
-        Form(
-            Input(id="dataset_subset", placeholder="Enter dataset subset"),
-            Button("Load Dataset"),
-            hx_post="/load_dataset",
-            hx_target="#dataset_content",
         ),
-        Div(id="dataset_content"),
     )
-@rt("/load_dataset")
-async def post(dataset_subset: str):
-    """Load and display dataset"""
-    try:
-        dataset = load_dataset(HF_REPO_ID, dataset_subset, split="train")
-        df = pd.DataFrame(dataset[:10])  # Load first 10 rows
-        table = df.to_html(classes="table", index=False)
-        return Div(
-            H2(f"Dataset: {HF_REPO_ID}, Subset: {dataset_subset}"),
-            P(f"Number of rows: {len(dataset)}"),
-            P(f"Columns: {', '.join(df.columns)}"),
-            Div(NotStr(table), cls="table-responsive"),
-        )
     except Exception as e:
-        return Div(f"Error loading dataset: {str(e)}")
 serve()

+import base64
 import os
+from collections import defaultdict
+from datetime import date, datetime, timedelta
+from io import BytesIO
 import dotenv
 from datasets import load_dataset
+from dateutil.parser import parse
+from dateutil.tz import tzutc
 from fasthtml.common import *
+from huggingface_hub import login, whoami
 dotenv.load_dotenv()
+style = Style("""
+                .grid { margin-bottom: 1rem; }
+                .card { display: flex; flex-direction: column; }
+                .card img { margin-bottom: 0.5rem; }
+                .card h5 { margin: 0; font-size: 0.9rem; line-height: 1.2; }
+                .card a { color: inherit; text-decoration: none; }
+                .card a:hover { text-decoration: underline; }
+            """)
+app, rt = fast_app(html_style=(style,))
 login(token=os.environ.get("HF_TOKEN"))
 hf_user = whoami(os.environ.get("HF_TOKEN"))["name"]
+HF_REPO_ID = f"{hf_user}/zotero-articles"
+abstract_ds = load_dataset(HF_REPO_ID, "abstracts", split="train")
+article_ds = load_dataset(HF_REPO_ID, "articles", split="train")
+image_ds = load_dataset(HF_REPO_ID, "images", split="train")
+image_ds = image_ds.filter(lambda x: x["page_number"] == 1)
+def parse_date(date_string):
+    try:
+        return parse(date_string).astimezone(tzutc()).date()
+    except ValueError:
+        return date.today()
+def get_week_start(date_obj):
+    return date_obj - timedelta(days=date_obj.weekday())
+week2articles = defaultdict(list)
+for article in article_ds:
+    date_added = parse_date(article["date_added"])
+    week_start = get_week_start(date_added)
+    week2articles[week_start].append(article["arxiv_id"])
+weeks = sorted(week2articles.keys(), reverse=True)
+def get_article_details(arxiv_id):
+    article = article_ds.filter(lambda x: x["arxiv_id"] == arxiv_id)[0]
+    abstract = abstract_ds.filter(lambda x: x["arxiv_id"] == arxiv_id)
+    image = image_ds.filter(lambda x: x["arxiv_id"] == arxiv_id)
+    return article, abstract, image
+def generate_week_content(current_week):
+    week_index = weeks.index(current_week)
+    prev_week = weeks[week_index + 1] if week_index < len(weeks) - 1 else None
+    next_week = weeks[week_index - 1] if week_index > 0 else None
+    nav_buttons = Group(
+        Button(
+            "← Previous Week",
+            hx_get=f"/week/{prev_week}" if prev_week else "#",
+            hx_target="#content",
+            hx_swap="innerHTML",
+            disabled=not prev_week,
+        ),
+        Button(
+            "Next Week →",
+            hx_get=f"/week/{next_week}" if next_week else "#",
+            hx_target="#content",
+            hx_swap="innerHTML",
+            disabled=not next_week,
         ),
     )
+    articles = week2articles[current_week]
+    article_cards = []
+    for arxiv_id in articles:
+        article, abstract, image = get_article_details(arxiv_id)
+        article_title = article["contents"][0].get("paper_title", "article") if article["contents"] else "article"
+        card_content = [H5(A(article_title, href=f"https://arxiv.org/abs/{arxiv_id}", target="_blank"))]
+        if image:
+            pil_image = image[0]["image"]
+            img_byte_arr = BytesIO()
+            pil_image.save(img_byte_arr, format="JPEG")
+            img_byte_arr = img_byte_arr.getvalue()
+            image_url = f"data:image/jpeg;base64,{base64.b64encode(img_byte_arr).decode('utf-8')}"
+            card_content.insert(
+                0, Img(src=image_url, alt="Article image", style="max-width: 100%; height: auto; margin-bottom: 15px;")
+            )
+        article_cards.append(Card(*card_content, cls="mb-4"))
+    grid = Grid(*article_cards, style="display: grid; grid-template-columns: repeat(3, 1fr); gap: 1rem;")
+    week_end = current_week + timedelta(days=6)
+    return Div(
+        nav_buttons,
+        H3(f"Week of {current_week.strftime('%B %d')} - {week_end.strftime('%B %d, %Y')} ({len(articles)} articles)"),
+        grid,
+        nav_buttons,
+        id="content",
+    )
+@rt("/")
+def get():
+    return Titled("AnswerAI Zotero Weekly", generate_week_content(weeks[0]))
+@rt("/week/{date}")
+def get(date: str):
+    try:
+        current_week = datetime.strptime(date, "%Y-%m-%d").date()
+        return generate_week_content(current_week)
     except Exception as e:
+        return Div(f"Error displaying articles: {str(e)}")
 serve()