Spaces:
Running
Running
background scheduler
Browse files- app.py +47 -15
- requirements.txt +2 -1
- update.py +41 -7
app.py
CHANGED
@@ -1,14 +1,18 @@
|
|
1 |
import json
|
2 |
import os
|
3 |
from datetime import datetime, timezone, timedelta
|
|
|
4 |
|
5 |
import meilisearch
|
6 |
from fasthtml.common import *
|
7 |
from markdown import markdown
|
8 |
from dotenv import load_dotenv
|
|
|
|
|
|
|
9 |
|
10 |
from constants import MeilisearchIndexFields
|
11 |
-
from update import process_webhook
|
12 |
|
13 |
loaded = load_dotenv("./.env", override=True)
|
14 |
print("Loaded .env file:", loaded)
|
@@ -19,7 +23,21 @@ ms_client = meilisearch.Client(MS_URL, MS_SEARCH_KEY)
|
|
19 |
|
20 |
css_content = open("styles.css").read()
|
21 |
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
|
25 |
md_exts = "codehilite", "smarty", "extra", "sane_lists"
|
@@ -29,7 +47,8 @@ def Markdown(s, exts=md_exts, **kw):
|
|
29 |
return Div(NotStr(markdown(s, extensions=exts)), **kw)
|
30 |
|
31 |
|
32 |
-
scroll_script = Script(
|
|
|
33 |
document.addEventListener('DOMContentLoaded', function() {
|
34 |
var scrollButton = document.getElementById('scroll-top-btn');
|
35 |
|
@@ -46,7 +65,8 @@ document.addEventListener('DOMContentLoaded', function() {
|
|
46 |
document.documentElement.scrollTop = 0; // For Chrome, Firefox, IE and Opera
|
47 |
};
|
48 |
});
|
49 |
-
"""
|
|
|
50 |
|
51 |
|
52 |
def date_range_inputs(start_date, end_date):
|
@@ -80,7 +100,7 @@ def search_form(start_date, end_date):
|
|
80 |
|
81 |
|
82 |
def iso_to_unix_timestamp(iso_string):
|
83 |
-
dt =
|
84 |
return int(dt.timestamp())
|
85 |
|
86 |
|
@@ -94,7 +114,10 @@ def make_query(query, start_date, end_date, page=1, limit=10):
|
|
94 |
twenty_three_hours_59_minutes_59_seconds_in_seconds = (23 * 60 + 59) * 60 + 59
|
95 |
|
96 |
after_timestamp = iso_to_unix_timestamp(start_date)
|
97 |
-
before_timestamp =
|
|
|
|
|
|
|
98 |
|
99 |
options = {
|
100 |
"limit": limit,
|
@@ -102,13 +125,18 @@ def make_query(query, start_date, end_date, page=1, limit=10):
|
|
102 |
"filter": f"{MeilisearchIndexFields.UPDATED_AT.value} >= {after_timestamp} AND {MeilisearchIndexFields.UPDATED_AT.value} < {before_timestamp}",
|
103 |
"attributesToCrop": [MeilisearchIndexFields.CONTENT.value],
|
104 |
"cropLength": 30,
|
105 |
-
"attributesToHighlight": [
|
|
|
|
|
|
|
106 |
"highlightPreTag": '<span class="highlight">',
|
107 |
"highlightPostTag": "</span>",
|
|
|
108 |
}
|
109 |
|
110 |
-
|
111 |
-
|
|
|
112 |
|
113 |
|
114 |
def search_results(query, start_date, end_date, page=1):
|
@@ -136,7 +164,9 @@ def make_card(result):
|
|
136 |
result = result["_formatted"]
|
137 |
|
138 |
url = result[MeilisearchIndexFields.URL.value]
|
139 |
-
date = unix_timestamp_to_nice_format(
|
|
|
|
|
140 |
|
141 |
return Div(
|
142 |
Div(
|
@@ -156,7 +186,7 @@ def make_pagination(current_page, total_hits, limit=10):
|
|
156 |
|
157 |
if current_page > 1:
|
158 |
children.append(
|
159 |
-
|
160 |
"Previous",
|
161 |
hx_post=f"/search?page={current_page-1}",
|
162 |
hx_target="#search-results",
|
@@ -178,8 +208,10 @@ def make_pagination(current_page, total_hits, limit=10):
|
|
178 |
|
179 |
return Div(*children, cls="pagination")
|
180 |
|
181 |
-
|
182 |
-
|
|
|
|
|
183 |
style="""
|
184 |
position: fixed;
|
185 |
bottom: 20px;
|
@@ -191,9 +223,10 @@ scroll_button = Button("Scroll to Top",
|
|
191 |
border-radius: 5px;
|
192 |
padding: 10px 15px;
|
193 |
cursor: pointer;
|
194 |
-
"""
|
195 |
)
|
196 |
|
|
|
197 |
@rt("/")
|
198 |
def get():
|
199 |
end_date = datetime.now()
|
@@ -217,7 +250,6 @@ def post(query: str, start_date: str, end_date: str, page: int = 1):
|
|
217 |
|
218 |
@app.post("/webhook")
|
219 |
async def hf_webhook(request):
|
220 |
-
|
221 |
return await process_webhook(request)
|
222 |
|
223 |
|
|
|
1 |
import json
|
2 |
import os
|
3 |
from datetime import datetime, timezone, timedelta
|
4 |
+
from dateutil import parser as dateparser
|
5 |
|
6 |
import meilisearch
|
7 |
from fasthtml.common import *
|
8 |
from markdown import markdown
|
9 |
from dotenv import load_dotenv
|
10 |
+
from apscheduler.schedulers.background import BackgroundScheduler
|
11 |
+
from apscheduler.triggers.cron import CronTrigger
|
12 |
+
from contextlib import asynccontextmanager
|
13 |
|
14 |
from constants import MeilisearchIndexFields
|
15 |
+
from update import process_webhook, update_webhooks
|
16 |
|
17 |
loaded = load_dotenv("./.env", override=True)
|
18 |
print("Loaded .env file:", loaded)
|
|
|
23 |
|
24 |
css_content = open("styles.css").read()
|
25 |
|
26 |
+
|
27 |
+
@asynccontextmanager
|
28 |
+
async def lifespan(app):
|
29 |
+
# Setup
|
30 |
+
scheduler = BackgroundScheduler()
|
31 |
+
scheduler.add_job(update_webhooks, CronTrigger.from_crontab("0 */3 * * *"))
|
32 |
+
scheduler.start()
|
33 |
+
|
34 |
+
yield
|
35 |
+
|
36 |
+
# Cleanup
|
37 |
+
scheduler.shutdown()
|
38 |
+
|
39 |
+
|
40 |
+
app, rt = fast_app(hdrs=(Style(css_content),), lifespan=lifespan)
|
41 |
|
42 |
|
43 |
md_exts = "codehilite", "smarty", "extra", "sane_lists"
|
|
|
47 |
return Div(NotStr(markdown(s, extensions=exts)), **kw)
|
48 |
|
49 |
|
50 |
+
scroll_script = Script(
|
51 |
+
"""
|
52 |
document.addEventListener('DOMContentLoaded', function() {
|
53 |
var scrollButton = document.getElementById('scroll-top-btn');
|
54 |
|
|
|
65 |
document.documentElement.scrollTop = 0; // For Chrome, Firefox, IE and Opera
|
66 |
};
|
67 |
});
|
68 |
+
"""
|
69 |
+
)
|
70 |
|
71 |
|
72 |
def date_range_inputs(start_date, end_date):
|
|
|
100 |
|
101 |
|
102 |
def iso_to_unix_timestamp(iso_string):
|
103 |
+
dt = dateparser.isoparse(iso_string)
|
104 |
return int(dt.timestamp())
|
105 |
|
106 |
|
|
|
114 |
twenty_three_hours_59_minutes_59_seconds_in_seconds = (23 * 60 + 59) * 60 + 59
|
115 |
|
116 |
after_timestamp = iso_to_unix_timestamp(start_date)
|
117 |
+
before_timestamp = (
|
118 |
+
iso_to_unix_timestamp(end_date)
|
119 |
+
+ twenty_three_hours_59_minutes_59_seconds_in_seconds
|
120 |
+
)
|
121 |
|
122 |
options = {
|
123 |
"limit": limit,
|
|
|
125 |
"filter": f"{MeilisearchIndexFields.UPDATED_AT.value} >= {after_timestamp} AND {MeilisearchIndexFields.UPDATED_AT.value} < {before_timestamp}",
|
126 |
"attributesToCrop": [MeilisearchIndexFields.CONTENT.value],
|
127 |
"cropLength": 30,
|
128 |
+
"attributesToHighlight": [
|
129 |
+
MeilisearchIndexFields.CONTENT.value,
|
130 |
+
MeilisearchIndexFields.TITLE.value,
|
131 |
+
],
|
132 |
"highlightPreTag": '<span class="highlight">',
|
133 |
"highlightPostTag": "</span>",
|
134 |
+
"distinct": MeilisearchIndexFields.URL.value,
|
135 |
}
|
136 |
|
137 |
+
return ms_client.index(MeilisearchIndexFields.INDEX_NAME.value).search(
|
138 |
+
query=query, opt_params=options
|
139 |
+
)
|
140 |
|
141 |
|
142 |
def search_results(query, start_date, end_date, page=1):
|
|
|
164 |
result = result["_formatted"]
|
165 |
|
166 |
url = result[MeilisearchIndexFields.URL.value]
|
167 |
+
date = unix_timestamp_to_nice_format(
|
168 |
+
int(result[MeilisearchIndexFields.UPDATED_AT.value])
|
169 |
+
)
|
170 |
|
171 |
return Div(
|
172 |
Div(
|
|
|
186 |
|
187 |
if current_page > 1:
|
188 |
children.append(
|
189 |
+
Button(
|
190 |
"Previous",
|
191 |
hx_post=f"/search?page={current_page-1}",
|
192 |
hx_target="#search-results",
|
|
|
208 |
|
209 |
return Div(*children, cls="pagination")
|
210 |
|
211 |
+
|
212 |
+
scroll_button = Button(
|
213 |
+
"Scroll to Top",
|
214 |
+
id="scroll-top-btn",
|
215 |
style="""
|
216 |
position: fixed;
|
217 |
bottom: 20px;
|
|
|
223 |
border-radius: 5px;
|
224 |
padding: 10px 15px;
|
225 |
cursor: pointer;
|
226 |
+
""",
|
227 |
)
|
228 |
|
229 |
+
|
230 |
@rt("/")
|
231 |
def get():
|
232 |
end_date = datetime.now()
|
|
|
250 |
|
251 |
@app.post("/webhook")
|
252 |
async def hf_webhook(request):
|
|
|
253 |
return await process_webhook(request)
|
254 |
|
255 |
|
requirements.txt
CHANGED
@@ -5,4 +5,5 @@ fasthtml-hf
|
|
5 |
markdown
|
6 |
meilisearch
|
7 |
huggingface_hub
|
8 |
-
requests
|
|
|
|
5 |
markdown
|
6 |
meilisearch
|
7 |
huggingface_hub
|
8 |
+
requests
|
9 |
+
apscheduler
|
update.py
CHANGED
@@ -171,28 +171,62 @@ def update_discussion_status(payload):
|
|
171 |
print("Update request:", update_request)
|
172 |
|
173 |
|
|
|
|
|
|
|
|
|
|
|
174 |
|
175 |
def update_webhooks():
|
176 |
"""
|
177 |
-
|
178 |
"""
|
|
|
|
|
179 |
|
180 |
existing_webhooks = api.list_webhooks()
|
181 |
|
182 |
webhook_url = os.environ["HF_WEBHOOK_URL"]
|
183 |
|
184 |
-
|
185 |
|
186 |
-
if len(
|
187 |
print("More than one webhook found")
|
188 |
-
print(
|
189 |
print("updating the first one")
|
190 |
|
191 |
-
id2update =
|
|
|
|
|
|
|
|
|
|
|
192 |
|
193 |
# get trending models
|
194 |
|
195 |
-
trending_models = api.list_models(sort="likes7d", direction=-1, limit=
|
196 |
|
197 |
to_add = []
|
198 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
print("Update request:", update_request)
|
172 |
|
173 |
|
174 |
+
def is_user(user_or_org):
|
175 |
+
api_url = f"https://huggingface.co/api/users/{user_or_org}/overview"
|
176 |
+
response = requests.get(api_url)
|
177 |
+
return response.status_code == 200
|
178 |
+
|
179 |
|
180 |
def update_webhooks():
|
181 |
"""
|
182 |
+
Update the old webhook every so often with trending models.
|
183 |
"""
|
184 |
+
|
185 |
+
print("Updating webhook")
|
186 |
|
187 |
existing_webhooks = api.list_webhooks()
|
188 |
|
189 |
webhook_url = os.environ["HF_WEBHOOK_URL"]
|
190 |
|
191 |
+
webhook2update = [x for x in existing_webhooks if x.url == webhook_url]
|
192 |
|
193 |
+
if len(webhook2update) > 1:
|
194 |
print("More than one webhook found")
|
195 |
+
print(webhook2update)
|
196 |
print("updating the first one")
|
197 |
|
198 |
+
id2update = webhook2update[0].id
|
199 |
+
|
200 |
+
watch_dict = {}
|
201 |
+
|
202 |
+
for ww in webhook2update[0].watched:
|
203 |
+
watch_dict[ww.name] = ww.type
|
204 |
|
205 |
# get trending models
|
206 |
|
207 |
+
trending_models = api.list_models(sort="likes7d", direction=-1, limit=1000)
|
208 |
|
209 |
to_add = []
|
210 |
+
|
211 |
+
for m in trending_models:
|
212 |
+
org_or_user = m.id.split("/")[0]
|
213 |
+
if org_or_user in watch_dict:
|
214 |
+
continue
|
215 |
+
if is_user(org_or_user):
|
216 |
+
to_add.append({"name": m.id, "type": "user"})
|
217 |
+
else:
|
218 |
+
to_add.append({"name": m.id, "type": "org"})
|
219 |
+
|
220 |
+
new_watched = webhook2update[0].watched + to_add
|
221 |
+
|
222 |
+
print("There are now", len(new_watched), "items in the watched list")
|
223 |
+
|
224 |
+
api.update_webhook(
|
225 |
+
id=id2update,
|
226 |
+
url=webhook_url,
|
227 |
+
watched=new_watched,
|
228 |
+
domains=["discussion"],
|
229 |
+
secret=WEBHOOK_SECRET,
|
230 |
+
)
|
231 |
+
|
232 |
+
|