david-meltzer commited on
Commit
5fa49b4
·
1 Parent(s): d54d7ad

initial commit

Browse files
Files changed (3) hide show
  1. app.py +66 -0
  2. requirements.txt +3 -0
  3. setup.py +24 -0
app.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import requests
3
+ import streamlit as st
4
+ import json
5
+ def main():
6
+
7
+
8
+
9
+ # Use feature-extraction API to get sentence embeddings.
10
+ api_url = f"https://api-inference.huggingface.co/pipeline/feature-extraction/{model_id}"
11
+ # Token to access Huggingface Inference API.
12
+ headers = {"Authorization": f"Bearer {st.secrets['HF_token']}"}
13
+
14
+
15
+ st.title("Semantic Search for Questions on Reddit.")
16
+
17
+ st.write("This application lets you perform sentiment analysis on book reviews.\
18
+ Simply input a review into the text below and the application will give two predictions for what the \
19
+ rating is on a scale of 0-5. The models will also produce the score they assigned their prediction. The score is\
20
+ between 0 and 1 and quantifies the confidence the model has in its prediction.\
21
+ \n\n \
22
+ Specifically, we consider two pre-trained models, [BERT-tiny](https://huggingface.co/dhmeltzer/bert-tiny-goodreads-wandb) and [DistilBERT](https://huggingface.co/dhmeltzer/distilbert-goodreads-wandb)\
23
+ which have been fine-tuned on a dataset of Goodreads book \
24
+ reviews, see [here](https://www.kaggle.com/competitions/goodreads-books-reviews-290312/data) for the original dataset. \
25
+ These models are deployed on AWS and are accessed using a REST API. To deploy the models we used a combination of AWS Sagemaker, Lambda, and API Gateway.\
26
+ \n\n \
27
+ To read more about this project and specifically how we cleaned the data and trained the models, see the following GitHub (repository)[https://github.com/david-meltzer/Goodreads-Sentiment-Analysis].")
28
+
29
+
30
+ AWS_key = st.secrets['AWS-key']
31
+
32
+ checkpoints = {}
33
+ checkpoints['DistilBERT'] = 'https://85a720iwy2.execute-api.us-east-1.amazonaws.com/add_apis/distilbert-goodreads'
34
+ checkpoints['BERT-tiny'] = 'https://055dugvmzl.execute-api.us-east-1.amazonaws.com/beta/'
35
+
36
+ # User search with default question.
37
+ user_input = st.text_area("Search box", "I loved the Lord of the Rings trilogy. \
38
+ It is a classic and beautifully written story and J.R.R. Tolkein really made Middle-Earth come to life. \
39
+ My favorite part of the book though was when the hobbits met Tom Bombadil, it's too bad he was not in the movies.")
40
+
41
+
42
+ convert_dict = {}
43
+ for i in range(6):
44
+ convert_dict[f'LABEL_{i}'] = i
45
+
46
+ # Fetch results
47
+ if user_input:
48
+ # Get IDs for each search result.
49
+ for model_name, URL in checkpoints.items():
50
+
51
+ headers={'x-api-key': AWS_key}
52
+
53
+ input_data = json.dumps({'inputs':user_input})
54
+ r = requests.post(URL,
55
+ data=input_data,
56
+ headers=headers).json()[0]
57
+
58
+ label, score = convert_dict[r['label']], r['score']
59
+
60
+ st.write(f"**Model Name**: {model_name}")
61
+ st.write(f"**Predicted Review**: {label}")
62
+ st.write(f"**Confidence**: {score}")
63
+ st.write("-"*20)
64
+
65
+ if __name__ == "__main__":
66
+ main()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ folium
2
+ streamlit==1.14.0
3
+ altair<5
setup.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from setuptools import setup
2
+
3
+ common_kwargs = dict(
4
+ version="0.1.0",
5
+ license="MIT",
6
+ author="David Meltzer",
7
+ author_email="davidhmeltzer@gmail.com",
8
+ classifiers=[
9
+ "Intended Audience :: Developers",
10
+ "Intended Audience :: Science/Research",
11
+ "License :: OSI Approved :: MIT License",
12
+ "Natural Language :: English",
13
+ "Operating System :: OS Independent",
14
+ "Programming Language :: Python :: 3.9",
15
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
16
+ ],
17
+ python_requires=">=3.9",
18
+ include_package_data=False,
19
+ )
20
+
21
+ setup(
22
+ name="sentiment_goodreads",
23
+ **common_kwargs
24
+ )