Spaces:

gefedya
/

YSDA_transfprmers_nlp_ops

Runtime error

File size: 2,626 Bytes

53dcf47
 
 
13d8cd8
5ac7ad7
53dcf47
 
 
 
 
2b96aaf
53dcf47
 
 
 
 
 
 
74ce976
53dcf47
5928531
5249038
74ce976
53dcf47
 
 
 
 
bf4c3e8
 
 
53dcf47
4efd8f8
cf7bd74
53dcf47
bf4c3e8
 
53dcf47
 
 
 
 
 
 
 
 
 
13d8cd8
53dcf47
 
74ce976
2fb3f62
53dcf47

import streamlit as st
from datasets import load_dataset
from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoModel, Trainer, TrainingArguments, LineByLineTextDataset
# import json
import torch


@st.cache()
def get_model():
    model = AutoModelForSequenceClassification.from_pretrained("siebert/sentiment-roberta-large-english", num_labels=2)
    model.load_state_dict(torch.load('model_cached_2.pth', map_location=torch.device('cpu')))
    return model

@st.cache()
def get_tokenizer():
    tokenizer = AutoTokenizer.from_pretrained("siebert/sentiment-roberta-large-english")
    return tokenizer

def make_prediction(to_analyze):
    model = get_model()
    tokenizer = get_tokenizer()
    to_return = model(**tokenizer([to_analyze,], return_tensors='pt'))
    return to_return
    


st.header("Sentiment analysis on twitter datasets")
st.markdown("Here is a sentiment model further trained on a slice of a twitter dataset")
# st.markdown("""
# <img width=700px src='https://imagez.tmz.com/image/73/4by3/2020/10/05/735aaee2f6b9464ca220e62ef797dab0_md.jpg'> 
# """, unsafe_allow_html=True)
st.markdown("""
<img width=700px 
src='https://static.boredpanda.com/blog/wp-content/uploads/2017/05/celebrities-mean-tweets-reactions-309-592ebf04f173c__700.jpg'>""", unsafe_allow_html=True)


text = st.markdown("Try typing something here! \n You will see how much better our model is compared to the base model! No kidding")
# ^-- показать текстовое поле. В поле text лежит строка, которая находится там в данный момент

### Loading and tokenizing data
# data = load_dataset("carblacac/twitter-sentiment-analysis")
# tokenizer = AutoTokenizer.from_pretrained("siebert/sentiment-roberta-large-english")
# dataset = data.map(lambda xs: tokenizer(xs["text"], truncation=True, padding='max_length'))
# dataset = dataset.rename_column("feeling", "labels")

with st.form(key='input_form'):
    to_analyze = st.text_input(label='Input text to be analyzed')
    button = st.form_submit_button(label='Analyze')
if button:
    if to_analyze:
        pred = make_prediction(to_analyze)
        st.markdown("Negative" if torch.argmax(pred.logits).item() == 0 else "Positive")
    else:
        st.markdown("Empty request. Please resubmit")

# classifier = pipeline('sentiment-analysis', model="distilbert-base-uncased-finetuned-sst-2-english")
# raw_predictions = classifier(text)
# тут уже знакомый вам код с huggingface.transformers -- его можно заменить на что угодно от fairseq до catboost