|
--- |
|
license: cc-by-3.0 |
|
datasets: |
|
- stanfordnlp/imdb |
|
language: |
|
- en |
|
metrics: |
|
- accuracy |
|
tags: |
|
- sentiment |
|
- small |
|
- silly |
|
- RETON |
|
--- |
|
|
|
model structure: |
|
|
|
|
|
Input Text |
|
|
|
| |
|
|
|
Custom Tokenizer (Text -> Tokens) |
|
|
|
| |
|
|
|
Padding (Equal Length Sequences) |
|
|
|
| |
|
|
|
Embedding Layer (Tokens -> Dense Vectors) |
|
|
|
| |
|
|
|
LSTM Layer (Capture Sequential Patterns) |
|
|
|
| |
|
|
|
Attention Mechanism (Focus on Important Tokens) |
|
|
|
| |
|
|
|
Dense Layer (Fully Connected) |
|
|
|
| |
|
|
|
Softmax Activation (Output: Positive or Negative Sentiment |
|
|
|
|
|
|
|
|
|
it archieved 95.71% accuracy on the dataset provided |
|
|
|
to test it use smth like: |
|
|
|
|
|
```python |
|
import tensorflow as tf |
|
from tensorflow.keras.preprocessing.sequence import pad_sequences |
|
from tensorflow.keras.datasets import imdb |
|
|
|
|
|
MAX_VOCAB_SIZE = 10000 |
|
MAX_SEQUENCE_LENGTH = 200 |
|
model = tf.keras.models.load_model('RETO-SENTIMENT.h5', custom_objects={'AttentionLayer': AttentionLayer}) |
|
|
|
class AttentionLayer(tf.keras.layers.Layer): |
|
def __init__(self, **kwargs): |
|
super(AttentionLayer, self).__init__(**kwargs) |
|
|
|
def call(self, inputs): |
|
attention_weights = tf.nn.softmax(tf.reduce_sum(inputs, axis=-1), axis=-1) |
|
attention_weights = tf.expand_dims(attention_weights, -1) |
|
weighted_sum = inputs * attention_weights |
|
return tf.reduce_sum(weighted_sum, axis=1) |
|
|
|
word_index = imdb.get_word_index() |
|
reverse_word_index = {value: key for key, value in word_index.items()} |
|
|
|
|
|
def preprocess_text(input_text): |
|
tokens = [word_index.get(word.lower(), 2) for word in input_text.split()] # 2 is unknown |
|
padded_sequence = pad_sequences([tokens], maxlen=MAX_SEQUENCE_LENGTH) |
|
return padded_sequence |
|
|
|
|
|
def predict_sentiment(input_text): |
|
preprocessed_text = preprocess_text(input_text) |
|
prediction = model.predict(preprocessed_text) |
|
sentiment = "Positive" if prediction[0][0] > 0.5 else "Negative" |
|
confidence = prediction[0][0] if sentiment == "Positive" else 1 - prediction[0][0] |
|
return sentiment, confidence |
|
|
|
|
|
if __name__ == "__main__": |
|
test_sentences = [ |
|
#add sentences here |
|
] |
|
|
|
for sentence in test_sentences: |
|
sentiment, confidence = predict_sentiment(sentence) |
|
print(f"Input: {sentence}") |
|
print(f"Predicted Sentiment: {sentiment} (Confidence: {confidence:.2f})\n") |
|
``` |