rbiswasfc commited on
Commit
739e275
β€’
1 Parent(s): 3aba82e
Files changed (5) hide show
  1. .gitignore +2 -1
  2. app.py +0 -6
  3. app_old.py +154 -0
  4. main.py +1 -1
  5. profile_app.py +27 -0
.gitignore CHANGED
@@ -3,4 +3,5 @@
3
  data
4
  .ipynb_checkpoints
5
  __pycache__
6
- .sesskey
 
 
3
  data
4
  .ipynb_checkpoints
5
  __pycache__
6
+ .sesskey
7
+ *.prof
app.py CHANGED
@@ -59,12 +59,6 @@ arxiv2article = {article["arxiv_id"]: article for article in article_ds}
59
  arxiv2abstract = {abstract["arxiv_id"]: abstract for abstract in abstract_ds}
60
  arxiv2image = {image["arxiv_id"]: image for image in image_ds}
61
 
62
- # def get_article_details(arxiv_id):
63
- # article = article_ds.filter(lambda x: x["arxiv_id"] == arxiv_id)[0]
64
- # abstract = abstract_ds.filter(lambda x: x["arxiv_id"] == arxiv_id)
65
- # image = image_ds.filter(lambda x: x["arxiv_id"] == arxiv_id)
66
- # return article, abstract, image
67
-
68
 
69
  def get_article_details(arxiv_id):
70
  article = arxiv2article.get(arxiv_id, {})
 
59
  arxiv2abstract = {abstract["arxiv_id"]: abstract for abstract in abstract_ds}
60
  arxiv2image = {image["arxiv_id"]: image for image in image_ds}
61
 
 
 
 
 
 
 
62
 
63
  def get_article_details(arxiv_id):
64
  article = arxiv2article.get(arxiv_id, {})
app_old.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import os
3
+ from collections import defaultdict
4
+ from datetime import date, datetime, timedelta
5
+ from io import BytesIO
6
+
7
+ import dotenv
8
+ from datasets import load_dataset
9
+ from dateutil.parser import parse
10
+ from dateutil.tz import tzutc
11
+ from fasthtml.common import *
12
+ from huggingface_hub import login, whoami
13
+
14
+ dotenv.load_dotenv()
15
+
16
+ style = Style("""
17
+ .grid { margin-bottom: 1rem; }
18
+ .card { display: flex; flex-direction: column; }
19
+ .card img { margin-bottom: 0.5rem; }
20
+ .card h5 { margin: 0; font-size: 0.9rem; line-height: 1.2; }
21
+ .card a { color: inherit; text-decoration: none; }
22
+ .card a:hover { text-decoration: underline; }
23
+ """)
24
+
25
+ app, rt = fast_app(html_style=(style,))
26
+
27
+ login(token=os.environ.get("HF_TOKEN"))
28
+
29
+ hf_user = whoami(os.environ.get("HF_TOKEN"))["name"]
30
+ HF_REPO_ID_TXT = f"{hf_user}/zotero-answer-ai-texts"
31
+ HF_REPO_ID_IMG = f"{hf_user}/zotero-answer-ai-images"
32
+
33
+ abstract_ds = load_dataset(HF_REPO_ID_TXT, "abstracts", split="train")
34
+ article_ds = load_dataset(HF_REPO_ID_TXT, "articles", split="train")
35
+
36
+ image_ds = load_dataset(HF_REPO_ID_IMG, "images_first_page", split="train")
37
+
38
+
39
+ def parse_date(date_string):
40
+ try:
41
+ return parse(date_string).astimezone(tzutc()).date()
42
+ except ValueError:
43
+ return date.today()
44
+
45
+
46
+ def get_week_start(date_obj):
47
+ return date_obj - timedelta(days=date_obj.weekday())
48
+
49
+
50
+ week2articles = defaultdict(list)
51
+ for article in article_ds:
52
+ date_added = parse_date(article["date_added"])
53
+ week_start = get_week_start(date_added)
54
+ week2articles[week_start].append(article["arxiv_id"])
55
+
56
+ weeks = sorted(week2articles.keys(), reverse=True)
57
+
58
+ arxiv2article = {article["arxiv_id"]: article for article in article_ds}
59
+ arxiv2abstract = {abstract["arxiv_id"]: abstract for abstract in abstract_ds}
60
+ arxiv2image = {image["arxiv_id"]: image for image in image_ds}
61
+
62
+
63
+ def get_article_details(arxiv_id):
64
+ article = arxiv2article.get(arxiv_id, {})
65
+ abstract = arxiv2abstract.get(arxiv_id, {})
66
+ image = arxiv2image.get(arxiv_id, {})
67
+ return article, abstract, image
68
+
69
+
70
+ def generate_week_content(current_week):
71
+ week_index = weeks.index(current_week)
72
+ prev_week = weeks[week_index + 1] if week_index < len(weeks) - 1 else None
73
+ next_week = weeks[week_index - 1] if week_index > 0 else None
74
+
75
+ nav_buttons = Group(
76
+ Button(
77
+ "← Previous Week",
78
+ hx_get=f"/week/{prev_week}" if prev_week else "#",
79
+ hx_target="#content",
80
+ hx_swap="innerHTML",
81
+ disabled=not prev_week,
82
+ ),
83
+ Button(
84
+ "Next Week β†’",
85
+ hx_get=f"/week/{next_week}" if next_week else "#",
86
+ hx_target="#content",
87
+ hx_swap="innerHTML",
88
+ disabled=not next_week,
89
+ ),
90
+ )
91
+
92
+ articles = week2articles[current_week]
93
+ article_cards = []
94
+ for arxiv_id in articles:
95
+ article, abstract, image = get_article_details(arxiv_id)
96
+ article_title = article["contents"][0].get("paper_title", "article") if article["contents"] else "article"
97
+
98
+ card_content = [
99
+ H5(
100
+ A(
101
+ article_title,
102
+ href=f"https://arxiv.org/abs/{arxiv_id}",
103
+ target="_blank",
104
+ )
105
+ )
106
+ ]
107
+
108
+ if image:
109
+ pil_image = image["image"] # image[0]["image"]
110
+ img_byte_arr = BytesIO()
111
+ pil_image.save(img_byte_arr, format="JPEG")
112
+ img_byte_arr = img_byte_arr.getvalue()
113
+ image_url = f"data:image/jpeg;base64,{base64.b64encode(img_byte_arr).decode('utf-8')}"
114
+ card_content.insert(
115
+ 0,
116
+ Img(
117
+ src=image_url,
118
+ alt="Article image",
119
+ style="max-width: 100%; height: auto; margin-bottom: 15px;",
120
+ ),
121
+ )
122
+
123
+ article_cards.append(Card(*card_content, cls="mb-4"))
124
+
125
+ grid = Grid(
126
+ *article_cards,
127
+ style="display: grid; grid-template-columns: repeat(3, 1fr); gap: 1rem;",
128
+ )
129
+
130
+ week_end = current_week + timedelta(days=6)
131
+ return Div(
132
+ nav_buttons,
133
+ H3(f"Week of {current_week.strftime('%B %d')} - {week_end.strftime('%B %d, %Y')} ({len(articles)} articles)"),
134
+ grid,
135
+ nav_buttons,
136
+ id="content",
137
+ )
138
+
139
+
140
+ @rt("/")
141
+ def get():
142
+ return Titled("AnswerAI Zotero Weekly", generate_week_content(weeks[0]))
143
+
144
+
145
+ @rt("/week/{date}")
146
+ def get(date: str):
147
+ try:
148
+ current_week = datetime.strptime(date, "%Y-%m-%d").date()
149
+ return generate_week_content(current_week)
150
+ except Exception as e:
151
+ return Div(f"Error displaying articles: {str(e)}")
152
+
153
+
154
+ serve()
main.py CHANGED
@@ -598,7 +598,7 @@ def schedule_periodic_task():
598
  """
599
  Schedule the main task to run at the user-defined frequency
600
  """
601
- main() # run once initially
602
 
603
  frequency = "daily" # TODO: env
604
  if frequency == "hourly":
 
598
  """
599
  Schedule the main task to run at the user-defined frequency
600
  """
601
+ # main() # run once initially
602
 
603
  frequency = "daily" # TODO: env
604
  if frequency == "hourly":
profile_app.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cProfile
2
+
3
+ from fasthtml.common import *
4
+ from starlette.testclient import TestClient
5
+
6
+ # Import your FastHTML app and weeks
7
+ from app import app, weeks
8
+
9
+
10
+ def profile_app():
11
+ """Profile the FastHTML app!"""
12
+ client = TestClient(app)
13
+
14
+ # Test home page
15
+ client.get("/")
16
+
17
+ # Test 10 weeks (or fewer if less than 5 weeks available)
18
+ for week in weeks[: min(10, len(weeks))]:
19
+ client.get(f"/week/{week}")
20
+
21
+
22
+ if __name__ == "__main__":
23
+ # Run the profiling
24
+ cProfile.run("profile_app()", "profile_output.prof")
25
+
26
+ # python profile_app.py
27
+ # snakeviz profile_output.prof