Spaces:

sfmajors
/

FB-MLOps-test

Runtime error

App Files Files Community

sfmajors commited on Jul 18, 2022

Commit

d563a73

•

1 Parent(s): 9bbed42

Fixing referencing

Browse files

Files changed (15) hide show

TSLASentimentAnalyzer/.ipynb_checkpoints/app-checkpoint.py +71 -0
TSLASentimentAnalyzer/.ipynb_checkpoints/classifier-checkpoint.py +7 -0
TSLASentimentAnalyzer/.ipynb_checkpoints/config-checkpoint.py +24 -0
TSLASentimentAnalyzer/.ipynb_checkpoints/scraper-checkpoint.py +59 -0
TSLASentimentAnalyzer/README.md +35 -0
TSLASentimentAnalyzer/__pycache__/app.cpython-310.pyc +0 -0
TSLASentimentAnalyzer/__pycache__/classifier.cpython-310.pyc +0 -0
TSLASentimentAnalyzer/__pycache__/config.cpython-310.pyc +0 -0
TSLASentimentAnalyzer/__pycache__/scraper.cpython-310.pyc +0 -0
TSLASentimentAnalyzer/app.py +71 -0
TSLASentimentAnalyzer/classifier.py +7 -0
TSLASentimentAnalyzer/config.py +24 -0
TSLASentimentAnalyzer/scraper.py +59 -0
TSLASentimentAnalyzer/sentiment_data.csv +10 -0
app.py +1 -1

TSLASentimentAnalyzer/.ipynb_checkpoints/app-checkpoint.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import streamlit as st
+import pandas as pd
+import matplotlib.pyplot as plt
+from scraper import RedditScraper
+import pandas as pd
+from classifier import predict
+from config import settings
+from transformers import pipeline
+from loguru import logger
+reddit = RedditScraper()
+st.title("$TSLA Market Sentiment Analyzer using r/TSLA Subreddit")
+def load_data(number, scraping_option):
+    st.write("loading new data")
+    # st.write(scraping_option)
+    comments = []
+    for submission in scraping_option(number):
+        comments.extend(reddit.get_comment_forest(submission.comments))
+        logger.debug(
+            submission.title,
+            submission.num_comments,
+            len(reddit.get_comment_forest(submission.comments)),
+        )
+    df = pd.DataFrame(comments)
+    return df
+def select_scrap_type(option):
+    if option == "Hot":
+        st.write("Selected Hot submissions")
+        return reddit.get_hot
+    if option == "Rising":
+        st.write("Selected rising submissions")
+        return reddit.get_rising
+    if option == "New":
+        st.write("Selected new submissions")
+        return reddit.get_new
+st.info(
+    "Option has been deactivated as the same submissions were scraped because the subreddit is not too active"
+)
+select = st.selectbox("choose option", ["Hot", "Rising", "New"], disabled=True)
+number = st.number_input("Insert a number", step=1, max_value=30, min_value=3)
+sentiment_pipeline = pipeline("sentiment-analysis", settings.model_path)
+data = load_data(number, select_scrap_type("Hot"))
+if st.button("Analyze"):
+    results = sentiment_pipeline(list(data["comment"]))
+    data["label"] = [res["label"] for res in results]
+    data["sentiment_score"] = [res["score"] for res in results]
+    st.write(data.groupby("label").count())
+    sizes = list(data.groupby("label").count()["comment"])
+    labels = "Negative", "Positive"
+    fig1, ax1 = plt.subplots()
+    ax1.pie(sizes, labels=labels, autopct="%1.1f%%", shadow=True, startangle=90)
+    ax1.axis("equal")
+    st.pyplot(fig1)
+st.write(data)

TSLASentimentAnalyzer/.ipynb_checkpoints/classifier-checkpoint.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from transformers import pipeline
+# sentiment_pipeline = pipeline("sentiment-analysis")
+# data = ["I love you", "I hate you"]
+def predict(data, custom_model: str ="finiteautomata/bertweet-base-sentiment-analysis"):
+  sentiment_pipeline = pipeline("sentiment-analysis")
+  return sentiment_pipeline(data)

TSLASentimentAnalyzer/.ipynb_checkpoints/config-checkpoint.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from typing import Set
+from pydantic import (
+    BaseModel,
+    BaseSettings,
+    RedisDsn,
+    PostgresDsn,
+    AmqpDsn,
+    Field,
+)
+class Settings(BaseSettings):
+    reddit_api_client_id: str
+    reddit_api_client_secret: str
+    stock_data_api_key: str
+    reddit_api_user_agent: str = "USERAGENT"
+    model_path: str = "fourthbrain-demo/model_trained_by_me2"
+    class Config:
+        env_file = ".env"  # defaults to no prefix, i.e. ""
+settings = Settings()

TSLASentimentAnalyzer/.ipynb_checkpoints/scraper-checkpoint.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import praw
+from config import settings
+from praw.models import MoreComments
+from loguru import logger
+class RedditScraper:
+    def __init__(self, subreddit: str = "TSLA"):
+        reddit = praw.Reddit(
+            client_id=settings.reddit_api_client_id,
+            client_secret=settings.reddit_api_client_secret,
+            user_agent=settings.reddit_api_user_agent,
+        )
+        self.subreddit = reddit.subreddit(subreddit)
+    def get_hot(self, posts: int = 10):
+        return self.subreddit.hot(limit=posts)
+    def get_new(self, posts: int = 10):
+        return self.subreddit.new(limit=posts)
+    def get_rising(self, posts: int = 10):
+        return self.subreddit.rising(limit=posts)
+    def get_top(self, posts: int = 10):
+        return self.subreddit.top(limit=posts)
+    def get_top_comments(self, submission, threshold: int = 5):
+        return [
+            comment.body
+            for comment in submission.comments
+            if comment.score >= threshold
+        ]
+    def get_comment_forest(self, comment_forest, all_comments=[]):
+        all_comments = []
+        if isinstance(comment_forest, MoreComments):
+            comments_list = comment_forest.comments()
+        else:
+            comments_list = comment_forest.list()
+        logger.debug(str(comment_forest), len(comments_list))
+        for comment in comments_list:
+            if isinstance(comment, MoreComments):
+                logger.info("more comments")
+                logger.debug(self.get_comment_forest(comment))
+                continue
+            item = {}
+            item["comment"] = comment.body
+            item["title"] = comment.submission.title
+            item["id"] = comment.id
+            item["created_at"] = int(comment.created_utc)
+            item["score"] = comment.score
+            all_comments.append(item)
+        return all_comments
+        if comment_forest.list():
+            for reply in comment_forest:
+                all_comments.append(reply)
+            return self.get_comment_forest(reply.replies, all_comments)
+        return all_comments

TSLASentimentAnalyzer/README.md ADDED Viewed

	@@ -0,0 +1,35 @@

+---
+title: Demo
+emoji: 📉
+colorFrom: gray
+colorTo: green
+sdk: streamlit
+sdk_version: 1.10.0
+app_file: app.py
+pinned: false
+license: apache-2.0
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+# quick start
+1. install requirements with `pip install -r requirements`
+2. create a .env file in the root directory and add the following variables:
+   `REDDIT_API_CLIENT_ID` : the client ID of your reddit app
+   `REDDIT_API_CLIENT_SECRET`: the client secret of your reddit app
+   follow this tutorial to generate them. <https://www.jcchouinard.com/get-reddit-api-credentials-with-praw/>
+3. run the streamlit app using :  `streamlit run app.py`
+# scraping
+the app use praw library to scrape submissions from reddit. A class named `scraper.RedditScraper` implements and abstracts that feature.
+# sentiment analysis model
+The model used in the application is a fine-tuned BERT-based model trained with labeled data scraped from TSLA subbreddit using a script that uses the scraping module `scraper.RedditScraper`.
+The data is available in <https://huggingface.co/datasets/fourthbrain-demo/reddit-comments-demo> , availalbe in 2 versions (used later with DVC), and splitted into train/test datasets.

TSLASentimentAnalyzer/__pycache__/app.cpython-310.pyc ADDED Viewed

Binary file (2.37 kB). View file

TSLASentimentAnalyzer/__pycache__/classifier.cpython-310.pyc ADDED Viewed

Binary file (535 Bytes). View file

TSLASentimentAnalyzer/__pycache__/config.cpython-310.pyc ADDED Viewed

Binary file (999 Bytes). View file

TSLASentimentAnalyzer/__pycache__/scraper.cpython-310.pyc ADDED Viewed

Binary file (2.47 kB). View file

TSLASentimentAnalyzer/app.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import streamlit as st
+import pandas as pd
+import matplotlib.pyplot as plt
+from scraper import RedditScraper
+import pandas as pd
+from classifier import predict
+from config import settings
+from transformers import pipeline
+from loguru import logger
+reddit = RedditScraper()
+st.title("$TSLA Market Sentiment Analyzer using r/TSLA Subreddit")
+def load_data(number, scraping_option):
+    st.write("loading new data")
+    # st.write(scraping_option)
+    comments = []
+    for submission in scraping_option(number):
+        comments.extend(reddit.get_comment_forest(submission.comments))
+        logger.debug(
+            submission.title,
+            submission.num_comments,
+            len(reddit.get_comment_forest(submission.comments)),
+        )
+    df = pd.DataFrame(comments)
+    return df
+def select_scrap_type(option):
+    if option == "Hot":
+        st.write("Selected Hot submissions")
+        return reddit.get_hot
+    if option == "Rising":
+        st.write("Selected rising submissions")
+        return reddit.get_rising
+    if option == "New":
+        st.write("Selected new submissions")
+        return reddit.get_new
+st.info(
+    "Option has been deactivated as the same submissions were scraped because the subreddit is not too active"
+)
+select = st.selectbox("choose option", ["Hot", "Rising", "New"], disabled=True)
+number = st.number_input("Insert a number", step=1, max_value=30, min_value=3)
+sentiment_pipeline = pipeline("sentiment-analysis", settings.model_path)
+data = load_data(number, select_scrap_type("Hot"))
+if st.button("Analyze"):
+    results = sentiment_pipeline(list(data["comment"]))
+    data["label"] = [res["label"] for res in results]
+    data["sentiment_score"] = [res["score"] for res in results]
+    st.write(data.groupby("label").count())
+    sizes = list(data.groupby("label").count()["comment"])
+    labels = "Negative", "Positive"
+    fig1, ax1 = plt.subplots()
+    ax1.pie(sizes, labels=labels, autopct="%1.1f%%", shadow=True, startangle=90)
+    ax1.axis("equal")
+    st.pyplot(fig1)
+st.write(data)

TSLASentimentAnalyzer/classifier.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from transformers import pipeline
+# sentiment_pipeline = pipeline("sentiment-analysis")
+# data = ["I love you", "I hate you"]
+def predict(data, custom_model: str ="finiteautomata/bertweet-base-sentiment-analysis"):
+  sentiment_pipeline = pipeline("sentiment-analysis")
+  return sentiment_pipeline(data)

TSLASentimentAnalyzer/config.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from typing import Set
+from pydantic import (
+    BaseModel,
+    BaseSettings,
+    RedisDsn,
+    PostgresDsn,
+    AmqpDsn,
+    Field,
+)
+class Settings(BaseSettings):
+    reddit_api_client_id: str
+    reddit_api_client_secret: str
+    stock_data_api_key: str
+    reddit_api_user_agent: str = "USERAGENT"
+    model_path: str = "fourthbrain-demo/model_trained_by_me2"
+    class Config:
+        env_file = ".env"  # defaults to no prefix, i.e. ""
+settings = Settings()

TSLASentimentAnalyzer/scraper.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import praw
+from config import settings
+from praw.models import MoreComments
+from loguru import logger
+class RedditScraper:
+    def __init__(self, subreddit: str = "TSLA"):
+        reddit = praw.Reddit(
+            client_id=settings.reddit_api_client_id,
+            client_secret=settings.reddit_api_client_secret,
+            user_agent=settings.reddit_api_user_agent,
+        )
+        self.subreddit = reddit.subreddit(subreddit)
+    def get_hot(self, posts: int = 10):
+        return self.subreddit.hot(limit=posts)
+    def get_new(self, posts: int = 10):
+        return self.subreddit.new(limit=posts)
+    def get_rising(self, posts: int = 10):
+        return self.subreddit.rising(limit=posts)
+    def get_top(self, posts: int = 10):
+        return self.subreddit.top(limit=posts)
+    def get_top_comments(self, submission, threshold: int = 5):
+        return [
+            comment.body
+            for comment in submission.comments
+            if comment.score >= threshold
+        ]
+    def get_comment_forest(self, comment_forest, all_comments=[]):
+        all_comments = []
+        if isinstance(comment_forest, MoreComments):
+            comments_list = comment_forest.comments()
+        else:
+            comments_list = comment_forest.list()
+        logger.debug(str(comment_forest), len(comments_list))
+        for comment in comments_list:
+            if isinstance(comment, MoreComments):
+                logger.info("more comments")
+                logger.debug(self.get_comment_forest(comment))
+                continue
+            item = {}
+            item["comment"] = comment.body
+            item["title"] = comment.submission.title
+            item["id"] = comment.id
+            item["created_at"] = int(comment.created_utc)
+            item["score"] = comment.score
+            all_comments.append(item)
+        return all_comments
+        if comment_forest.list():
+            for reply in comment_forest:
+                all_comments.append(reply)
+            return self.get_comment_forest(reply.replies, all_comments)
+        return all_comments

TSLASentimentAnalyzer/sentiment_data.csv ADDED Viewed

	@@ -0,0 +1,10 @@

+,timestamp,counter,close,volume,sentiment_score,close_lag1,perc_change_close,sentiment_score_lag1,perc_change_sentiment,sentiment_SMA3mo
+1,2022-07-09,625,0.0,,0.9639244723320007,752.28998,-1.0,0.980573832988739,-0.016979201460018443,0.0
+2,2022-07-10,324,0.0,,0.9888325300481584,0.0,0.0,0.9639244723320007,0.0258402586832329,0.9777769451229661
+3,2022-07-11,121,703.03003,33080400,0.9755014126951044,0.0,inf,0.9888325300481584,-0.01348167353718104,0.9760861383584212
+4,2022-07-12,9,699.21002,29310300,0.9687565366427103,703.03003,-0.005433637024011655,0.9755014126951044,-0.006914265796662843,0.9776968264619911
+5,2022-07-13,196,711.12,32651500,0.991240360907146,699.21002,0.017033480155218626,0.9687565366427103,0.023208952315671386,0.9784994367483201
+6,2022-07-14,100,714.94,26185800,0.9773943841457366,711.12,0.005371807852401916,0.991240360907146,-0.01396833432885843,0.9791304272318643
+7,2022-07-15,49,0.0,,0.9558297651154655,714.94,-1.0,0.9773943841457366,-0.022063375214826014,0.9748215033894493
+8,2022-07-16,64,0.0,,0.9682549461722374,0.0,0.0,0.9558297651154655,0.012999366111256171,0.9671596984778131
+9,2022-07-17,121,0.0,,0.9894618229432539,0.0,0.0,0.9682549461722374,0.02190216208536067,0.9711821780769855

app.py CHANGED Viewed

@@ -13,7 +13,7 @@ import time
 from plotly.subplots import make_subplots
 # Read CSV file into pandas and extract timestamp data
-dfSentiment = pd.read_csv("../TSLASentimentAnalyzer/sentiment_data.csv")
 dfSentiment['timestamp'] = [datetime.strptime(dt, '%Y-%m-%d') for dt in dfSentiment['timestamp'].tolist()]
 # Multi-select columns to build chart

 from plotly.subplots import make_subplots
 # Read CSV file into pandas and extract timestamp data
+dfSentiment = pd.read_csv("./TSLASentimentAnalyzer/sentiment_data.csv")
 dfSentiment['timestamp'] = [datetime.strptime(dt, '%Y-%m-%d') for dt in dfSentiment['timestamp'].tolist()]
 # Multi-select columns to build chart