File size: 4,415 Bytes
1db46cf
 
 
 
 
ce083b1
 
1db46cf
 
 
 
 
5ad2dc4
1db46cf
 
f85a256
ce083b1
75d538e
1db46cf
 
 
f85a256
ce083b1
1db46cf
 
 
 
 
 
 
ce083b1
1db46cf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d4179ef
 
ce083b1
 
 
 
 
 
c22fc44
ce083b1
 
 
 
 
1db46cf
 
 
 
 
 
 
 
 
 
f85a256
 
1db46cf
d4c506e
f85a256
d4c506e
 
f85a256
 
d4c506e
 
 
d4179ef
1db46cf
 
 
 
 
 
ce083b1
c22fc44
ae10a2a
 
d4179ef
ce083b1
1db46cf
f85a256
 
1db46cf
 
f85a256
1db46cf
 
 
f85a256
 
1db46cf
f85a256
ae10a2a
1db46cf
 
d4c506e
ae10a2a
 
ffaf614
 
ce083b1
ae10a2a
ce083b1
d4179ef
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import streamlit as st
import json
import requests
import time
from newspaper import Article
import nltk
nltk.download('punkt')

# Page title layout
c1, c2 = st.columns([0.32, 2])

with c1:
     st.image("images/newspaper.png", width=85)

with c2:
    st.title("Website Article Summarize")
st.markdown("**Generate summaries of articles from websites using abstractive summarization with Language Model and Library NewsPaper.**")
st.caption("Created by Bayhaqy.")

# Sidebar content
st.sidebar.subheader("About the app")
st.sidebar.info("This app uses optional 🤗HuggingFace's Model [facebook/bart-large-cnn](https://huggingface.co/facebook/bart-large-cnn) \
or [pegasus_indonesian_base-finetune](https://huggingface.co/pegasus_indonesian_base-finetune) model and Library NewsPaper.")
st.sidebar.write("\n\n")
st.sidebar.markdown("**Get a free API key from HuggingFace:**")
st.sidebar.markdown("* Create a [free account](https://huggingface.co/join) or [login](https://huggingface.co/login)")
st.sidebar.markdown("* Go to **Settings** and then **Access Tokens**")
st.sidebar.markdown("* Create a new Token (select 'read' role)")
st.sidebar.markdown("* Paste your API key in the text box")
st.sidebar.divider()
st.sidebar.write("Please make sure you choose the correct model and is not behind a paywall.")
st.sidebar.write("\n\n")
st.sidebar.divider()

# Inputs 
st.subheader("Enter the URL of the article you want to summarize")
default_url = "https://"
url = st.text_input("URL:", default_url)

headers_ = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36'
    }

fetch_button = st.button("Fetch article")

if fetch_button:
    article_url = url
    session = requests.Session()

    try:
        response_ = session.get(article_url, headers=headers_, timeout=10)
    
        if response_.status_code == 200:

            with st.spinner('Fetching your article...'):
                time.sleep(3)
                st.success('Your article is ready for summarization!')     

                article = Article(url)
                article.download()
                article.parse()
                
                title = article.title
                text = article.text
                
                st.divider()
                st.subheader("Real Article")
                st.markdown(f"Your article: **{title}**")
                st.markdown(f"**{text}**")
                st.divider()

        else:
            st.write("Error occurred while fetching article.")

    except Exception as e:
        st.write(f"Error occurred while fetching article: {e}")

# HuggingFace API KEY input
API_KEY = st.text_input("Enter your HuggingFace API key", type="password")

headers = {"Authorization": f"Bearer {API_KEY}"}


# Selectbox to choose between API URLs
selected_api_url = st.selectbox("Select Model", options=["bart-large-cnn", "pegasus_indonesian_base-finetune"])

# Determine the selected Model
if selected_api_url == "bart-large-cnn":
    API_URL = "https://api-inference.huggingface.co/models/facebook/bart-large-cnn"
else:
    API_URL = "https://api-inference.huggingface.co/models/thonyyy/pegasus_indonesian_base-finetune"

submit_button = st.button("Submit to Summarize")

# Download and parse the article
if submit_button:
    article = Article(url)
    article.download()
    article.parse()
    article.nlp()
    
    title = article.title
    text = article.text
    html = article.html
    summ = article.summary

    # HuggingFace API request function summary
    def query_sum(payload):
        response = requests.post(API_URL, headers=headers, json=payload)
        return response.json()

    with st.spinner('Doing some AI magic, please wait...'):
        time.sleep(1)

        # Query the API Summary
        output_sum = query_sum({"inputs": text, })

        # Display the results
        summary = output_sum[0]['summary_text'].replace('<n>', " ") 

        st.divider()
        st.subheader("Summary AI")
        st.markdown(f"Your article: **{title}**")
        st.markdown(f"**{summary}**")

        st.divider()
        st.subheader("Summary Library NewsPaper")
        st.markdown(f"Your article: **{title}**")
        st.markdown(f"**{summ}**")

        st.divider()
        st.subheader("Real Article")
        st.markdown(f"Your article: **{title}**")
        st.markdown(f"**{text}**")