wavesoumen's picture
init
0170d5c verified
import streamlit as st
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, T5ForConditionalGeneration, T5Tokenizer
import nltk
import torch
from textblob import TextBlob
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('stopwords')
# Load models and tokenizers
tag_tokenizer = AutoTokenizer.from_pretrained("fabiochiu/t5-base-tag-generation")
tag_model = AutoModelForSeq2SeqLM.from_pretrained("fabiochiu/t5-base-tag-generation")
summary_model_name = 'utrobinmv/t5_summary_en_ru_zh_base_2048'
summary_model = T5ForConditionalGeneration.from_pretrained(summary_model_name)
summary_tokenizer = T5Tokenizer.from_pretrained(summary_model_name)
# Function to generate tags
def generate_tags(text):
with torch.no_grad():
inputs = tag_tokenizer(text, max_length=256, truncation=True, return_tensors="pt")
output = tag_model.generate(**inputs, num_beams=8, do_sample=True, min_length=10, max_length=64, num_return_sequences=1)
decoded_output = tag_tokenizer.batch_decode(output, skip_special_tokens=True)[0]
tags = list(set(decoded_output.strip().split(", ")))
return tags
# Function to generate summaries
def generate_summary(text, prefix):
src_text = prefix + text
input_ids = summary_tokenizer(src_text, return_tensors="pt")
generated_tokens = summary_model.generate(**input_ids)
result = summary_tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
return result[0]
# Function to extract keywords and generate hashtags
def extract_keywords(content):
text = content.lower()
sentences = nltk.sent_tokenize(text)
keywords = []
for sentence in sentences:
words = nltk.word_tokenize(sentence)
tags = nltk.pos_tag(words)
for word, tag in tags:
if tag.startswith('NN'):
keywords.append(word)
return keywords
def generate_hashtags(content, max_hashtags=10):
keywords = extract_keywords(content)
hashtags = []
for keyword in keywords:
hashtag = "#" + keyword
if len(hashtag) <= 20:
hashtags.append(hashtag)
return hashtags[:max_hashtags]
# Function to extract point of view
def extract_point_of_view(text):
stop_words = set(stopwords.words('english'))
words = word_tokenize(str(text))
filtered_words = [word for word in words if word.casefold() not in stop_words]
text = ' '.join(filtered_words)
blob = TextBlob(text)
polarity = blob.sentiment.polarity
subjectivity = blob.sentiment.subjectivity
if polarity > 0.5:
point_of_view = "Positive"
elif polarity < -0.5:
point_of_view = "Negative"
else:
point_of_view = "Neutral"
return point_of_view
# Streamlit application
st.title("LinkedIn Post Analysis AI")
text = st.text_area("Enter the LinkedIn Post:")
if st.button("Analyze:"):
if text:
# Generate tags
tags = generate_tags(text)
st.subheader("The Most Tracked KeyWords:")
st.write(tags)
# Generate summaries
summary1 = generate_summary(text, 'summary: ')
summary2 = generate_summary(text, 'summary brief: ')
st.subheader("Summary Title 1:")
st.write(summary1)
st.subheader("Summary Title 2:")
st.write(summary2)
# Generate hashtags
hashtags = generate_hashtags(text)
st.subheader("Generated Hashtags for the Post")
st.write(hashtags)
# Extract point of view
point_of_view = extract_point_of_view(text)
st.subheader("Tone of the Post:")
st.write(point_of_view)
else:
st.warning("Please enter text to analyze.")