Spaces:
Runtime error
Runtime error
File size: 6,568 Bytes
5ba5a89 37d5811 5ba5a89 2229429 5ba5a89 eca5e0e 567bb6c 37d5811 567bb6c 37d5811 5ba5a89 eca5e0e 5ba5a89 37d5811 5ba5a89 37d5811 5ba5a89 37d5811 5ba5a89 37d5811 5ba5a89 37d5811 5ba5a89 567bb6c 5ba5a89 eca5e0e 5ba5a89 37d5811 3da8840 323db28 2229429 3a29a54 5ba5a89 eca5e0e 06245e7 eca5e0e 06245e7 323db28 eca5e0e 323db28 eca5e0e 323db28 5ba5a89 323db28 5ba5a89 43d8590 5ba5a89 06245e7 2229429 43d8590 eca5e0e 3a29a54 5ba5a89 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
from transformers import pipeline
import torch
import streamlit as st
from textblob import TextBlob
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import os
import re
import pandas as pd
def translate_text_blob(text):
blob = TextBlob(text)
return str(blob.translate(from_lang="pt", to="en"))
def sentiment_vader(text):
vader_object = SentimentIntensityAnalyzer()
sentiment_dict = vader_object.polarity_scores(text)
negative = sentiment_dict['neg']
neutral = sentiment_dict['neu']
positive = sentiment_dict['pos']
compound = sentiment_dict['compound']
if sentiment_dict['compound'] >= 0.05 :
overall_sentiment = "Positive"
elif sentiment_dict['compound'] <= - 0.05 :
overall_sentiment = "Negative"
else :
overall_sentiment = "Neutral"
return overall_sentiment.upper()
def classify_by_company(text):
path = r"\Companies"
for filename in os.listdir(path):
with open(path + '\\' + filename, 'r') as f:
companies = [word[:-1] for word in f.readlines()]
companies = "|".join(companies)
companies = "/" + companies + "/gm"
if re.search(companies, text):
return filename[:-4] + " - Infered by company name in text"
return ""
def load_models(parameters_list):
translation_map = {
#Translation PT to EN
"TextBlob" : "TextBlob",
"M2M100" : "facebook/m2m100_418M",
"OPUS" : "Helsinki-NLP/opus-mt-mul-en",
"T5" : "unicamp-dl/translation-pt-en-t5",
"mBART" : "Narrativa/mbart-large-50-finetuned-opus-en-pt-translation",
}
sentiment_map = {
#Sentiment Analysis
"VADER" : "VADER",
"FinBERT" : "ProsusAI/finbert",
"DistilBERT" : "distilbert-base-uncased-finetuned-sst-2-english",
"BERT" : "nlptown/bert-base-multilingual-uncased-sentiment",
}
zeroshot_map = {
#Zeroshot Classification
"RoBERTa" : "joeddav/xlm-roberta-large-xnli",
"mDeBERTa" : "MoritzLaurer/mDeBERTa-v3-base-mnli-xnli",
"DistilroBERTa" : "cross-encoder/nli-distilroberta-base",
}
candidate_labels = [
"Industrial Goods",
"Communications",
"Cyclic Consumption",
"Non-cyclical Consumption",
"Financial",
"Basic Materials",
#"Others",
"Oil, Gas and Biofuels",
"Health",
#"Initial Sector",
"Information Technology",
"Public utility"
]
device_num = 0 if torch.cuda.is_available() else -1
if parameters_list[0] == "TextBlob":
out_translation = translate_text_blob(parameters_list[3])
else:
translation = pipeline("translation_pt_to_en", model=translation_map[parameters_list[0]], tokenizer=translation_map[parameters_list[0]], device=device_num)
out_translation = translation(parameters_list[3])[0]["translation_text"]
if parameters_list[1] == "VADER":
out_sentiment = sentiment_vader(out_translation)
else:
sentiment = pipeline("sentiment-analysis", model=sentiment_map[parameters_list[1]], tokenizer=sentiment_map[parameters_list[1]], device=device_num)
out_sentiment = sentiment(out_translation)[0]["label"].upper()
company_classification = classify_by_company(parameters_list[3].upper())
if company_classification:
out_classification = company_classification
else:
classification = pipeline("zero-shot-classification", model=zeroshot_map[parameters_list[2]], tokenizer=zeroshot_map[parameters_list[2]], device=device_num)
out_classification = classification(out_translation, candidate_labels)["labels"][0] + " - Infered by {}".format(parameters_list[2])
return out_translation, out_sentiment, out_classification
df = pd.read_csv(r"\Data\Hugging Face_DF.csv")
header = st.container()
model = st.container()
model_1, model_2 = st.columns(2)
dataset = st.container()
analysis = st.container()
analysis_1, analysis_2 = st.columns(2)
with header:
st.title("IC 2022 Classificação de Dados Financeiros")
st.write("Lorem ipsum dolor sit amet, consectetur adipiscing elit. Praesent sapien tortor, suscipit quis ornare ut, laoreet vitae nisi. Mauris quis consectetur risus, non blandit mauris. Sed ut odio tempor, ullamcorper leo eu, mollis eros.")
with model:
st.header("Modelo para Tradução e Classificação!")
with model_1:
translation_pt_to_en = st.selectbox('Qual modelo você deseja usar para tradução?', ('TextBlob', 'M2M100', 'OPUS', 'T5', 'mBART'))
sentiment_analysis = st.selectbox('Qual modelo você deseja usar para análise de sentimento?', ('VADER', 'FinBERT', 'DistilBERT', 'BERT'))
zero_shot_classification = st.selectbox('Qual modelo você deseja usar para classificação?', ('RoBERTa', 'mDeBERTa', 'DistilroBERTa'))
text = st.text_input(label="Coloque seu texto sobre mercado financeiro em português!", value=r"As ações da Raia Drogasil subiram em 98% desde o último bimestre, segundo as avaliações da revista!")
submit = st.button('Gerar análises!')
with model_2:
if submit:
with st.spinner('Wait for it...'):
parameters = [translation_pt_to_en, sentiment_analysis, zero_shot_classification, text]
outputs = load_models(parameters)
st.write("Translation..................................................................: \n {} \n \n".format(outputs[0]))
st.write("Sentiment...................................................................: \n {} \n \n".format(outputs[1]))
st.write("Classification...............................................................: \n {} \n \n".format(outputs[2]))
with dataset:
st.header("Dados utilizados no projeto!")
st.write("Os dados blablablabla")
st.dataframe(df)
st.subheader("Descrição das colunas:")
st.write("\t*- Texts:* Coluna que mostra os textos financeiros")
st.write("\t*- Texts:* Coluna que mostra os textos financeiros")
st.write("\t*- Texts:* Coluna que mostra os textos financeiros")
st.write("\t*- Texts:* Coluna que mostra os textos financeiros")
st.write("\t*- Texts:* Coluna que mostra os textos financeiros")
with analysis:
st.header("Visualização dos dados utilizados através de WordClouds")
with analysis_1:
pass
|