Spaces:
Runtime error
Runtime error
aus10powell
commited on
Commit
·
b20b18b
1
Parent(s):
8897995
Update app.py
Browse files
app.py
CHANGED
@@ -8,7 +8,8 @@ import json
|
|
8 |
import logging
|
9 |
import sys
|
10 |
import spacy
|
11 |
-
|
|
|
12 |
import pandas as pd
|
13 |
import numpy as np
|
14 |
import os
|
@@ -22,17 +23,21 @@ from fastapi.staticfiles import StaticFiles
|
|
22 |
from fastapi.templating import Jinja2Templates
|
23 |
|
24 |
from rouge_score import rouge_scorer
|
|
|
25 |
import scripts.sentiment as sentiment
|
26 |
import scripts.twitter_scraper as ts
|
27 |
from scripts import sentiment
|
28 |
from scripts.summarization import bert_summarization
|
29 |
from scripts.twitter_scraper import get_latest_account_tweets
|
30 |
-
from scripts import
|
|
|
31 |
import scripts.utils as utils
|
|
|
32 |
from scripts import generative
|
33 |
import nltk
|
34 |
|
35 |
logging.basicConfig(level=logging.INFO)
|
|
|
36 |
|
37 |
app = FastAPI()
|
38 |
templates = Jinja2Templates(directory="templates")
|
@@ -82,35 +87,34 @@ async def get_accounts() -> List[dict]:
|
|
82 |
|
83 |
@app.get("/tweets/{username}")
|
84 |
def get_tweets_username(username: str) -> dict:
|
85 |
-
# if username in username_list:
|
86 |
-
# query = f"from:{username} since:{start_date} until:{end_date}"
|
87 |
-
# return ts.get_tweets(query=query)
|
88 |
-
# else:
|
89 |
-
# return {"detail": "Account not in scope of project."}
|
90 |
-
|
91 |
-
# Method 1: Using Tweepy method
|
92 |
-
# df_tweets = get_latest_account_tweets(username)
|
93 |
-
|
94 |
# Method 2: Use Snscrape
|
95 |
df_tweets = ts.get_tweets(handle=username)
|
96 |
|
97 |
if isinstance(df_tweets, pd.DataFrame):
|
98 |
-
print(df_tweets.head(2))
|
99 |
-
print(df_tweets.shape)
|
100 |
df_tweets = df_tweets[["handle", "created_at","retweet_count","view_count","like_count", "full_text"]]
|
101 |
-
df_tweets["created_at"] = df_tweets["created_at"].dt.strftime(
|
102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
df_tweets_html = df_tweets.to_html(classes="center", index=False, escape=False)
|
104 |
-
df_tweets.to_html(open(
|
105 |
df_tweets_data = df_tweets.to_dict(orient="records")
|
106 |
-
|
107 |
-
response_data = {
|
108 |
-
"html": df_tweets_html,
|
109 |
-
"data": df_tweets_data
|
110 |
-
}
|
111 |
|
112 |
return JSONResponse(content=response_data, status_code=200)
|
113 |
-
# return HTMLResponse(content=df_tweets_html, status_code=200)
|
114 |
else:
|
115 |
print("Error: Failed to retrieve tweets.")
|
116 |
return df_tweets
|
@@ -214,6 +218,7 @@ async def get_sentiment(username: str) -> Dict[str, Dict[str, float]]:
|
|
214 |
}
|
215 |
|
216 |
|
|
|
217 |
@app.post("/api/generate")
|
218 |
async def generate_text(request: Request):
|
219 |
"""Generate text from a prompt.
|
@@ -269,10 +274,9 @@ async def generate_summary(request: Request):
|
|
269 |
|
270 |
print("*" * 50)
|
271 |
data = await request.json()
|
272 |
-
print(
|
273 |
# Get the list of text
|
274 |
-
tweets = [t[
|
275 |
-
|
276 |
|
277 |
# Concatenate tweets into a single string
|
278 |
text = " .".join(tweets)
|
@@ -281,35 +285,25 @@ async def generate_summary(request: Request):
|
|
281 |
nlp.add_pipe("sentencizer")
|
282 |
|
283 |
sentences = nlp(text).sents
|
284 |
-
|
285 |
-
# phrases = Phrases(
|
286 |
-
# sentences, min_count=1, threshold=1, connector_words=ENGLISH_CONNECTOR_WORDS
|
287 |
-
# )
|
288 |
-
# first_sentence = next(iter(sentences))
|
289 |
-
# first_sentence
|
290 |
sentences = list(sentences)
|
291 |
-
# # Shuffle the list
|
292 |
-
# random.shuffle(sentences)
|
293 |
-
# Option 1
|
294 |
-
# sampled_tweets = random.sample(tweets, int(0.1 * len(tweets)))
|
295 |
|
296 |
# Option 2
|
297 |
sampled_sentences = random.sample(sentences, int(0.1 * len(sentences)))
|
298 |
-
|
299 |
sampled_sentences = [sentiment.tweet_cleaner(s.text) for s in sampled_sentences]
|
300 |
|
301 |
# Join the strings into one text blob
|
302 |
tweet_blob = " ".join(sampled_sentences)
|
303 |
|
304 |
# Generate the summary
|
305 |
-
summary = bert_summarization(
|
306 |
-
|
307 |
-
)
|
308 |
-
print("Summary:",summary)
|
309 |
# Return the summary
|
310 |
return {"tweets_summary": summary}
|
311 |
|
312 |
|
|
|
313 |
@app.get("/examples1")
|
314 |
async def read_examples():
|
315 |
with open("templates/charts/handle_sentiment_breakdown.html") as f:
|
@@ -322,3 +316,9 @@ async def read_examples():
|
|
322 |
with open("templates/charts/handle_sentiment_timesteps.html") as f:
|
323 |
html = f.read()
|
324 |
return HTMLResponse(content=html)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
import logging
|
9 |
import sys
|
10 |
import spacy
|
11 |
+
|
12 |
+
# sys.setrecursionlimit(20000)
|
13 |
import pandas as pd
|
14 |
import numpy as np
|
15 |
import os
|
|
|
23 |
from fastapi.templating import Jinja2Templates
|
24 |
|
25 |
from rouge_score import rouge_scorer
|
26 |
+
# Scripts
|
27 |
import scripts.sentiment as sentiment
|
28 |
import scripts.twitter_scraper as ts
|
29 |
from scripts import sentiment
|
30 |
from scripts.summarization import bert_summarization
|
31 |
from scripts.twitter_scraper import get_latest_account_tweets
|
32 |
+
from scripts.sentiment import twitter_sentiment_api_score
|
33 |
+
from scripts import twitter_scraper as ts
|
34 |
import scripts.utils as utils
|
35 |
+
from scripts import translation
|
36 |
from scripts import generative
|
37 |
import nltk
|
38 |
|
39 |
logging.basicConfig(level=logging.INFO)
|
40 |
+
pd.set_option('display.max_colwidth', 20)
|
41 |
|
42 |
app = FastAPI()
|
43 |
templates = Jinja2Templates(directory="templates")
|
|
|
87 |
|
88 |
@app.get("/tweets/{username}")
|
89 |
def get_tweets_username(username: str) -> dict:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
# Method 2: Use Snscrape
|
91 |
df_tweets = ts.get_tweets(handle=username)
|
92 |
|
93 |
if isinstance(df_tweets, pd.DataFrame):
|
|
|
|
|
94 |
df_tweets = df_tweets[["handle", "created_at","retweet_count","view_count","like_count", "full_text"]]
|
95 |
+
df_tweets["created_at"] = df_tweets["created_at"].dt.strftime(
|
96 |
+
"%Y-%m-%d %H:%M:%S"
|
97 |
+
)
|
98 |
+
df_tweets = df_tweets.sort_values("created_at", ascending=False)
|
99 |
+
|
100 |
+
# Additional processing
|
101 |
+
logging.info("Running sentiment on tweets")
|
102 |
+
sentiments = twitter_sentiment_api_score(
|
103 |
+
df_tweets['full_text'].to_list(), use_api=False
|
104 |
+
)
|
105 |
+
df_tweets["sentiment"] = [s['argmax'] for s in sentiments]
|
106 |
+
if username == "alikarimi_ak8":
|
107 |
+
p = translation.PersianTextProcessor()
|
108 |
+
df_tweets['full_text_translated'] = df_tweets["full_text"].apply(lambda c: p.translate_text(persian_text = c))
|
109 |
+
|
110 |
+
|
111 |
df_tweets_html = df_tweets.to_html(classes="center", index=False, escape=False)
|
112 |
+
df_tweets.to_html(open("df_tweets_html.html", "w"))
|
113 |
df_tweets_data = df_tweets.to_dict(orient="records")
|
114 |
+
|
115 |
+
response_data = {"html": df_tweets_html, "data": df_tweets_data}
|
|
|
|
|
|
|
116 |
|
117 |
return JSONResponse(content=response_data, status_code=200)
|
|
|
118 |
else:
|
119 |
print("Error: Failed to retrieve tweets.")
|
120 |
return df_tweets
|
|
|
218 |
}
|
219 |
|
220 |
|
221 |
+
## APIs: Primarily called by the index page
|
222 |
@app.post("/api/generate")
|
223 |
async def generate_text(request: Request):
|
224 |
"""Generate text from a prompt.
|
|
|
274 |
|
275 |
print("*" * 50)
|
276 |
data = await request.json()
|
277 |
+
print("data", data["tweetsData"])
|
278 |
# Get the list of text
|
279 |
+
tweets = [t["full_text"] for t in data["tweetsData"]]
|
|
|
280 |
|
281 |
# Concatenate tweets into a single string
|
282 |
text = " .".join(tweets)
|
|
|
285 |
nlp.add_pipe("sentencizer")
|
286 |
|
287 |
sentences = nlp(text).sents
|
288 |
+
|
|
|
|
|
|
|
|
|
|
|
289 |
sentences = list(sentences)
|
|
|
|
|
|
|
|
|
290 |
|
291 |
# Option 2
|
292 |
sampled_sentences = random.sample(sentences, int(0.1 * len(sentences)))
|
293 |
+
|
294 |
sampled_sentences = [sentiment.tweet_cleaner(s.text) for s in sampled_sentences]
|
295 |
|
296 |
# Join the strings into one text blob
|
297 |
tweet_blob = " ".join(sampled_sentences)
|
298 |
|
299 |
# Generate the summary
|
300 |
+
summary = bert_summarization(tweet_blob)
|
301 |
+
print("Summary:", summary)
|
|
|
|
|
302 |
# Return the summary
|
303 |
return {"tweets_summary": summary}
|
304 |
|
305 |
|
306 |
+
## Historical Tweets pages
|
307 |
@app.get("/examples1")
|
308 |
async def read_examples():
|
309 |
with open("templates/charts/handle_sentiment_breakdown.html") as f:
|
|
|
316 |
with open("templates/charts/handle_sentiment_timesteps.html") as f:
|
317 |
html = f.read()
|
318 |
return HTMLResponse(content=html)
|
319 |
+
|
320 |
+
|
321 |
+
# uvicorn --workers=2 app:app
|
322 |
+
if __name__ == "__main__":
|
323 |
+
# uvicorn.run(app, host="0.0.0.0", port=8000)
|
324 |
+
uvicorn.run("app:app", host="127.0.0.1", port=5050, reload=True)
|