rbiswasfc commited on
Commit
748a8f9
·
1 Parent(s): c43dfe6
Files changed (4) hide show
  1. .gitignore +2 -1
  2. .sesskey +1 -0
  3. app copy.py +134 -0
  4. app.py +110 -29
.gitignore CHANGED
@@ -1,4 +1,5 @@
1
  .env
2
  *.json
3
  data
4
- .ipynb_checkpoints
 
 
1
  .env
2
  *.json
3
  data
4
+ .ipynb_checkpoints
5
+ __pycache__
.sesskey ADDED
@@ -0,0 +1 @@
 
 
1
+ 98e6ddfe-5bf8-4c5e-b4ec-860ba87f4d78
app copy.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import os
3
+ from collections import defaultdict
4
+ from datetime import date, datetime, timedelta
5
+ from io import BytesIO
6
+
7
+ import dotenv
8
+ from datasets import load_dataset
9
+ from dateutil.parser import parse
10
+ from dateutil.tz import tzutc
11
+ from fasthtml.common import *
12
+ from huggingface_hub import login, whoami
13
+
14
+ dotenv.load_dotenv()
15
+
16
+ style = Style("""
17
+ .grid { margin-bottom: 1rem; }
18
+ .card { display: flex; flex-direction: column; }
19
+ .card img { margin-bottom: 0.5rem; }
20
+ .card h5 { margin: 0; font-size: 0.9rem; line-height: 1.2; }
21
+ .card a { color: inherit; text-decoration: none; }
22
+ .card a:hover { text-decoration: underline; }
23
+ """)
24
+
25
+ app, rt = fast_app(html_style=(style,))
26
+
27
+ login(token=os.environ.get("HF_TOKEN"))
28
+
29
+ hf_user = whoami(os.environ.get("HF_TOKEN"))["name"]
30
+ HF_REPO_ID = f"{hf_user}/zotero-articles"
31
+
32
+ abstract_ds = load_dataset(HF_REPO_ID, "abstracts", split="train")
33
+ article_ds = load_dataset(HF_REPO_ID, "articles", split="train")
34
+
35
+ image_ds = load_dataset(HF_REPO_ID, "images", split="train")
36
+ image_ds = image_ds.filter(lambda x: x["page_number"] == 1)
37
+
38
+
39
+ def parse_date(date_string):
40
+ try:
41
+ return parse(date_string).astimezone(tzutc()).date()
42
+ except ValueError:
43
+ return date.today()
44
+
45
+
46
+ def get_week_start(date_obj):
47
+ return date_obj - timedelta(days=date_obj.weekday())
48
+
49
+
50
+ week2articles = defaultdict(list)
51
+ for article in article_ds:
52
+ date_added = parse_date(article["date_added"])
53
+ week_start = get_week_start(date_added)
54
+ week2articles[week_start].append(article["arxiv_id"])
55
+
56
+ weeks = sorted(week2articles.keys(), reverse=True)
57
+
58
+
59
+ def get_article_details(arxiv_id):
60
+ article = article_ds.filter(lambda x: x["arxiv_id"] == arxiv_id)[0]
61
+ abstract = abstract_ds.filter(lambda x: x["arxiv_id"] == arxiv_id)
62
+ image = image_ds.filter(lambda x: x["arxiv_id"] == arxiv_id)
63
+ return article, abstract, image
64
+
65
+
66
+ def generate_week_content(current_week):
67
+ week_index = weeks.index(current_week)
68
+ prev_week = weeks[week_index + 1] if week_index < len(weeks) - 1 else None
69
+ next_week = weeks[week_index - 1] if week_index > 0 else None
70
+
71
+ nav_buttons = Group(
72
+ Button(
73
+ "← Previous Week",
74
+ hx_get=f"/week/{prev_week}" if prev_week else "#",
75
+ hx_target="#content",
76
+ hx_swap="innerHTML",
77
+ disabled=not prev_week,
78
+ ),
79
+ Button(
80
+ "Next Week →",
81
+ hx_get=f"/week/{next_week}" if next_week else "#",
82
+ hx_target="#content",
83
+ hx_swap="innerHTML",
84
+ disabled=not next_week,
85
+ ),
86
+ )
87
+
88
+ articles = week2articles[current_week]
89
+ article_cards = []
90
+ for arxiv_id in articles:
91
+ article, abstract, image = get_article_details(arxiv_id)
92
+ article_title = article["contents"][0].get("paper_title", "article") if article["contents"] else "article"
93
+
94
+ card_content = [H5(A(article_title, href=f"https://arxiv.org/abs/{arxiv_id}", target="_blank"))]
95
+
96
+ if image:
97
+ pil_image = image[0]["image"]
98
+ img_byte_arr = BytesIO()
99
+ pil_image.save(img_byte_arr, format="JPEG")
100
+ img_byte_arr = img_byte_arr.getvalue()
101
+ image_url = f"data:image/jpeg;base64,{base64.b64encode(img_byte_arr).decode('utf-8')}"
102
+ card_content.insert(
103
+ 1, Img(src=image_url, alt="Article image", style="max-width: 100%; height: auto; margin-bottom: 15px;")
104
+ )
105
+
106
+ article_cards.append(Card(*card_content, cls="mb-4"))
107
+
108
+ grid = Grid(*article_cards, style="display: grid; grid-template-columns: repeat(3, 1fr); gap: 1rem;")
109
+
110
+ week_end = current_week + timedelta(days=6)
111
+ return Div(
112
+ nav_buttons,
113
+ H3(f"Week of {current_week.strftime('%B %d')} - {week_end.strftime('%B %d, %Y')} ({len(articles)} articles)"),
114
+ grid,
115
+ nav_buttons,
116
+ id="content",
117
+ )
118
+
119
+
120
+ @rt("/")
121
+ def get():
122
+ return Titled("AnswerAI Zotero Weekly", generate_week_content(weeks[0]))
123
+
124
+
125
+ @rt("/week/{date}")
126
+ def get(date: str):
127
+ try:
128
+ current_week = datetime.strptime(date, "%Y-%m-%d").date()
129
+ return generate_week_content(current_week)
130
+ except Exception as e:
131
+ return Div(f"Error displaying articles: {str(e)}")
132
+
133
+
134
+ serve()
app.py CHANGED
@@ -1,53 +1,134 @@
 
1
  import os
 
 
 
2
 
3
  import dotenv
4
- import pandas as pd
5
  from datasets import load_dataset
 
 
6
  from fasthtml.common import *
 
7
 
8
  dotenv.load_dotenv()
9
 
10
- app, rt = fast_app()
11
- from huggingface_hub import login, whoami
 
 
 
 
 
 
 
 
12
 
13
  login(token=os.environ.get("HF_TOKEN"))
14
 
15
  hf_user = whoami(os.environ.get("HF_TOKEN"))["name"]
16
- HF_REPO_ID = f"{hf_user}/zotero-answer-ai-articles"
17
 
 
 
18
 
19
- @rt("/")
20
- async def get():
21
- """Dataset viewer home page"""
22
- return Titled(
23
- "Zotero Dataset Viewer",
24
- Form(
25
- Input(id="dataset_subset", placeholder="Enter dataset subset"),
26
- Button("Load Dataset"),
27
- hx_post="/load_dataset",
28
- hx_target="#dataset_content",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  ),
30
- Div(id="dataset_content"),
31
  )
32
 
 
 
 
 
 
33
 
34
- @rt("/load_dataset")
35
- async def post(dataset_subset: str):
36
- """Load and display dataset"""
37
- try:
38
- dataset = load_dataset(HF_REPO_ID, dataset_subset, split="train")
39
- df = pd.DataFrame(dataset[:10]) # Load first 10 rows
40
- table = df.to_html(classes="table", index=False)
 
 
 
 
 
 
41
 
42
- return Div(
43
- H2(f"Dataset: {HF_REPO_ID}, Subset: {dataset_subset}"),
44
- P(f"Number of rows: {len(dataset)}"),
45
- P(f"Columns: {', '.join(df.columns)}"),
46
- Div(NotStr(table), cls="table-responsive"),
47
- )
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  except Exception as e:
50
- return Div(f"Error loading dataset: {str(e)}")
51
 
52
 
53
  serve()
 
1
+ import base64
2
  import os
3
+ from collections import defaultdict
4
+ from datetime import date, datetime, timedelta
5
+ from io import BytesIO
6
 
7
  import dotenv
 
8
  from datasets import load_dataset
9
+ from dateutil.parser import parse
10
+ from dateutil.tz import tzutc
11
  from fasthtml.common import *
12
+ from huggingface_hub import login, whoami
13
 
14
  dotenv.load_dotenv()
15
 
16
+ style = Style("""
17
+ .grid { margin-bottom: 1rem; }
18
+ .card { display: flex; flex-direction: column; }
19
+ .card img { margin-bottom: 0.5rem; }
20
+ .card h5 { margin: 0; font-size: 0.9rem; line-height: 1.2; }
21
+ .card a { color: inherit; text-decoration: none; }
22
+ .card a:hover { text-decoration: underline; }
23
+ """)
24
+
25
+ app, rt = fast_app(html_style=(style,))
26
 
27
  login(token=os.environ.get("HF_TOKEN"))
28
 
29
  hf_user = whoami(os.environ.get("HF_TOKEN"))["name"]
30
+ HF_REPO_ID = f"{hf_user}/zotero-articles"
31
 
32
+ abstract_ds = load_dataset(HF_REPO_ID, "abstracts", split="train")
33
+ article_ds = load_dataset(HF_REPO_ID, "articles", split="train")
34
 
35
+ image_ds = load_dataset(HF_REPO_ID, "images", split="train")
36
+ image_ds = image_ds.filter(lambda x: x["page_number"] == 1)
37
+
38
+
39
+ def parse_date(date_string):
40
+ try:
41
+ return parse(date_string).astimezone(tzutc()).date()
42
+ except ValueError:
43
+ return date.today()
44
+
45
+
46
+ def get_week_start(date_obj):
47
+ return date_obj - timedelta(days=date_obj.weekday())
48
+
49
+
50
+ week2articles = defaultdict(list)
51
+ for article in article_ds:
52
+ date_added = parse_date(article["date_added"])
53
+ week_start = get_week_start(date_added)
54
+ week2articles[week_start].append(article["arxiv_id"])
55
+
56
+ weeks = sorted(week2articles.keys(), reverse=True)
57
+
58
+
59
+ def get_article_details(arxiv_id):
60
+ article = article_ds.filter(lambda x: x["arxiv_id"] == arxiv_id)[0]
61
+ abstract = abstract_ds.filter(lambda x: x["arxiv_id"] == arxiv_id)
62
+ image = image_ds.filter(lambda x: x["arxiv_id"] == arxiv_id)
63
+ return article, abstract, image
64
+
65
+
66
+ def generate_week_content(current_week):
67
+ week_index = weeks.index(current_week)
68
+ prev_week = weeks[week_index + 1] if week_index < len(weeks) - 1 else None
69
+ next_week = weeks[week_index - 1] if week_index > 0 else None
70
+
71
+ nav_buttons = Group(
72
+ Button(
73
+ "← Previous Week",
74
+ hx_get=f"/week/{prev_week}" if prev_week else "#",
75
+ hx_target="#content",
76
+ hx_swap="innerHTML",
77
+ disabled=not prev_week,
78
+ ),
79
+ Button(
80
+ "Next Week →",
81
+ hx_get=f"/week/{next_week}" if next_week else "#",
82
+ hx_target="#content",
83
+ hx_swap="innerHTML",
84
+ disabled=not next_week,
85
  ),
 
86
  )
87
 
88
+ articles = week2articles[current_week]
89
+ article_cards = []
90
+ for arxiv_id in articles:
91
+ article, abstract, image = get_article_details(arxiv_id)
92
+ article_title = article["contents"][0].get("paper_title", "article") if article["contents"] else "article"
93
 
94
+ card_content = [H5(A(article_title, href=f"https://arxiv.org/abs/{arxiv_id}", target="_blank"))]
95
+
96
+ if image:
97
+ pil_image = image[0]["image"]
98
+ img_byte_arr = BytesIO()
99
+ pil_image.save(img_byte_arr, format="JPEG")
100
+ img_byte_arr = img_byte_arr.getvalue()
101
+ image_url = f"data:image/jpeg;base64,{base64.b64encode(img_byte_arr).decode('utf-8')}"
102
+ card_content.insert(
103
+ 0, Img(src=image_url, alt="Article image", style="max-width: 100%; height: auto; margin-bottom: 15px;")
104
+ )
105
+
106
+ article_cards.append(Card(*card_content, cls="mb-4"))
107
 
108
+ grid = Grid(*article_cards, style="display: grid; grid-template-columns: repeat(3, 1fr); gap: 1rem;")
 
 
 
 
 
109
 
110
+ week_end = current_week + timedelta(days=6)
111
+ return Div(
112
+ nav_buttons,
113
+ H3(f"Week of {current_week.strftime('%B %d')} - {week_end.strftime('%B %d, %Y')} ({len(articles)} articles)"),
114
+ grid,
115
+ nav_buttons,
116
+ id="content",
117
+ )
118
+
119
+
120
+ @rt("/")
121
+ def get():
122
+ return Titled("AnswerAI Zotero Weekly", generate_week_content(weeks[0]))
123
+
124
+
125
+ @rt("/week/{date}")
126
+ def get(date: str):
127
+ try:
128
+ current_week = datetime.strptime(date, "%Y-%m-%d").date()
129
+ return generate_week_content(current_week)
130
  except Exception as e:
131
+ return Div(f"Error displaying articles: {str(e)}")
132
 
133
 
134
  serve()