Commit
•
fceefe7
1
Parent(s):
075c34d
Removed health and added webhook
Browse files
app.py
CHANGED
@@ -1,22 +1,28 @@
|
|
1 |
import os
|
2 |
from pathlib import Path
|
3 |
-
from datetime import datetime
|
4 |
|
5 |
import gradio as gr
|
6 |
from bs4 import BeautifulSoup
|
|
|
7 |
from rich.console import Console
|
8 |
from rich.syntax import Syntax
|
9 |
|
|
|
|
|
10 |
proj_dir = Path(__name__).parent
|
11 |
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
|
16 |
-
|
17 |
-
if
|
18 |
raise gr.Error("FREQUENCY environment variable must be 'daily' or 'hourly'")
|
19 |
|
|
|
|
|
|
|
|
|
20 |
|
21 |
def log_file_to_html_string():
|
22 |
log_file = "mylog.log"
|
@@ -63,8 +69,8 @@ pre, code {
|
|
63 |
|
64 |
intro_md = f"""
|
65 |
# Reddit Dataset Creator
|
66 |
-
This is a reddit dataset creator which builds and updates [{
|
67 |
-
which pulls from [/r/{
|
68 |
|
69 |
As shown in the below diagram this space pulls data from reddit via [PRAW](https://praw.readthedocs.io/en/stable/), processes it, and puts it in a corresponding dataset.
|
70 |
"""
|
@@ -98,22 +104,15 @@ log files. I use gradio for `app` and map that to the open port of huggingface s
|
|
98 |
|
99 |
The only communication between `app` and `main` is the log file.
|
100 |
"""
|
101 |
-
def health(text):
|
102 |
-
# Get the current date and time
|
103 |
-
current_time = datetime.now()
|
104 |
-
|
105 |
-
# Print it in the format YYYY-MM-DD HH:MM:SS
|
106 |
-
print(current_time.strftime("%Y-%m-%d %H:%M:%S"))
|
107 |
-
return "Healthy"
|
108 |
|
109 |
-
with gr.Blocks() as
|
110 |
with gr.Tab("Application"):
|
111 |
gr.Markdown(intro_md)
|
112 |
gr.Image(proj_dir / 'media' / 'reddit_scraper.drawio.png')
|
113 |
gr.Markdown("# Logs")
|
114 |
output = gr.HTML(log_file_to_html_string, every=1)
|
115 |
-
|
116 |
-
|
117 |
() => {
|
118 |
document.body.classList.toggle('dark');
|
119 |
document.querySelector('gradio-app').style.backgroundColor = 'var(--color-background-primary)'
|
@@ -130,7 +129,15 @@ with gr.Blocks() as demo:
|
|
130 |
with gr.Column():
|
131 |
output_text = gr.Textbox(label="Output Text")
|
132 |
|
133 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
|
135 |
if __name__ == '__main__':
|
136 |
-
|
|
|
|
1 |
import os
|
2 |
from pathlib import Path
|
|
|
3 |
|
4 |
import gradio as gr
|
5 |
from bs4 import BeautifulSoup
|
6 |
+
from huggingface_hub import WebhookPayload, WebhooksServer
|
7 |
from rich.console import Console
|
8 |
from rich.syntax import Syntax
|
9 |
|
10 |
+
from utilities.my_logger import setup_logger
|
11 |
+
|
12 |
proj_dir = Path(__name__).parent
|
13 |
|
14 |
+
SUBREDDIT = os.environ["SUBREDDIT"]
|
15 |
+
USERNAME = os.environ["USERNAME"]
|
16 |
+
DATASET_NAME = f"{USERNAME}/dataset-creator-reddit-{SUBREDDIT}"
|
17 |
|
18 |
+
FREQUENCY = os.environ.get("FREQUENCY", '').lower()
|
19 |
+
if FREQUENCY not in ["daily", "hourly"]:
|
20 |
raise gr.Error("FREQUENCY environment variable must be 'daily' or 'hourly'")
|
21 |
|
22 |
+
SECRET = os.getenv("HF_WEBHOOK_SECRET")
|
23 |
+
|
24 |
+
logger = setup_logger(__name__)
|
25 |
+
|
26 |
|
27 |
def log_file_to_html_string():
|
28 |
log_file = "mylog.log"
|
|
|
69 |
|
70 |
intro_md = f"""
|
71 |
# Reddit Dataset Creator
|
72 |
+
This is a reddit dataset creator which builds and updates [{DATASET_NAME}](https://huggingface.co/datasets/{DATASET_NAME})
|
73 |
+
which pulls from [/r/{SUBREDDIT}](http://www.reddit.com/r/{SUBREDDIT}). Check the dataset for more details.
|
74 |
|
75 |
As shown in the below diagram this space pulls data from reddit via [PRAW](https://praw.readthedocs.io/en/stable/), processes it, and puts it in a corresponding dataset.
|
76 |
"""
|
|
|
104 |
|
105 |
The only communication between `app` and `main` is the log file.
|
106 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
|
108 |
+
with gr.Blocks() as ui:
|
109 |
with gr.Tab("Application"):
|
110 |
gr.Markdown(intro_md)
|
111 |
gr.Image(proj_dir / 'media' / 'reddit_scraper.drawio.png')
|
112 |
gr.Markdown("# Logs")
|
113 |
output = gr.HTML(log_file_to_html_string, every=1)
|
114 |
+
ui.load(None,
|
115 |
+
_js="""
|
116 |
() => {
|
117 |
document.body.classList.toggle('dark');
|
118 |
document.querySelector('gradio-app').style.backgroundColor = 'var(--color-background-primary)'
|
|
|
129 |
with gr.Column():
|
130 |
output_text = gr.Textbox(label="Output Text")
|
131 |
|
132 |
+
app = WebhooksServer(ui=ui, webhook_secret=SECRET)
|
133 |
+
|
134 |
+
|
135 |
+
@app.add_webhook("/community")
|
136 |
+
async def community(payload: WebhookPayload):
|
137 |
+
if payload.event.scope.startswith("repo"):
|
138 |
+
logger.info(f"Webhook received from {DATASET_NAME} indicating a repo {payload.event.action}")
|
139 |
+
|
140 |
|
141 |
if __name__ == '__main__':
|
142 |
+
app.run()
|
143 |
+
# ui.queue().launch(server_name="0.0.0.0", show_error=True, server_port=7860)
|