kkastr
commited on
Commit
•
f6c60e6
1
Parent(s):
dc005c4
branch for huggingface spaces
Browse files- README.md +13 -1
- app.py +5 -14
- download_model.py +0 -8
README.md
CHANGED
@@ -1,4 +1,16 @@
|
|
1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
## TODO
|
4 |
|
|
|
1 |
+
---
|
2 |
+
title: Summit
|
3 |
+
emoji: 📊
|
4 |
+
colorFrom: green
|
5 |
+
colorTo: blue
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 3.19.1
|
8 |
+
python_version: 3.11.0
|
9 |
+
app_file: app.py
|
10 |
+
pinned: false
|
11 |
+
---
|
12 |
+
|
13 |
+
## Reddit Thread Summarizer (Gradio)
|
14 |
|
15 |
## TODO
|
16 |
|
app.py
CHANGED
@@ -3,7 +3,6 @@ import re
|
|
3 |
import sys
|
4 |
import nltk
|
5 |
import praw
|
6 |
-
import tomllib
|
7 |
import gradio as gr
|
8 |
import pandas as pd
|
9 |
import praw.exceptions
|
@@ -64,13 +63,11 @@ def getComments(url, debug=False):
|
|
64 |
df = pd.read_csv("./debug_comments.csv")
|
65 |
return df
|
66 |
|
67 |
-
|
|
|
|
|
68 |
|
69 |
-
reddit = praw.Reddit(
|
70 |
-
client_id=api_keys['client_id'] ,
|
71 |
-
client_secret=api_keys['client_secret'] ,
|
72 |
-
user_agent=api_keys['user_agent']
|
73 |
-
)
|
74 |
|
75 |
try:
|
76 |
submission = reddit.submission(url=url)
|
@@ -127,7 +124,7 @@ def summarizer(url: str) -> str:
|
|
127 |
|
128 |
lst_summaries = []
|
129 |
|
130 |
-
nlp = pipeline('summarization', model="
|
131 |
|
132 |
for grp in chunked_df:
|
133 |
# treating a group of comments as one block of text
|
@@ -146,12 +143,6 @@ def summarizer(url: str) -> str:
|
|
146 |
|
147 |
|
148 |
if __name__ == "__main__":
|
149 |
-
if not os.path.isfile('./api_params.toml'):
|
150 |
-
print("""
|
151 |
-
Could not find api params config file in directory.
|
152 |
-
Please create api_params.toml by following the instructions in the README.
|
153 |
-
""")
|
154 |
-
sys.exit(1)
|
155 |
|
156 |
with gr.Blocks(css=".gradio-container {max-width: 900px !important; width: 100%}") as demo:
|
157 |
submission_url = gr.Textbox(label='Post URL')
|
|
|
3 |
import sys
|
4 |
import nltk
|
5 |
import praw
|
|
|
6 |
import gradio as gr
|
7 |
import pandas as pd
|
8 |
import praw.exceptions
|
|
|
63 |
df = pd.read_csv("./debug_comments.csv")
|
64 |
return df
|
65 |
|
66 |
+
client_id = os.environ['REDDIT_CLIENT_ID']
|
67 |
+
client_secret = os.environ['REDDIT_CLIENT_SECRET']
|
68 |
+
user_agent = os.environ['REDDIT_USER_AGENT']
|
69 |
|
70 |
+
reddit = praw.Reddit(client_id=client_id, client_secret=client_secret, user_agent=user_agent)
|
|
|
|
|
|
|
|
|
71 |
|
72 |
try:
|
73 |
submission = reddit.submission(url=url)
|
|
|
124 |
|
125 |
lst_summaries = []
|
126 |
|
127 |
+
nlp = pipeline('summarization', model="sshleifer/distilbart-cnn-12-6")
|
128 |
|
129 |
for grp in chunked_df:
|
130 |
# treating a group of comments as one block of text
|
|
|
143 |
|
144 |
|
145 |
if __name__ == "__main__":
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
|
147 |
with gr.Blocks(css=".gradio-container {max-width: 900px !important; width: 100%}") as demo:
|
148 |
submission_url = gr.Textbox(label='Post URL')
|
download_model.py
DELETED
@@ -1,8 +0,0 @@
|
|
1 |
-
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
2 |
-
|
3 |
-
tokenizer = AutoTokenizer.from_pretrained("sshleifer/distilbart-cnn-12-6")
|
4 |
-
|
5 |
-
model = AutoModelForSeq2SeqLM.from_pretrained("sshleifer/distilbart-cnn-12-6")
|
6 |
-
|
7 |
-
tokenizer.save_pretrained("./model")
|
8 |
-
model.save_pretrained("./model")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|