Spaces:
Sleeping
Sleeping
firs commit
Browse files- Dockerfile +14 -0
- requirements.txt +11 -0
- static/graph.png +0 -0
- stopwords.txt +758 -0
- summary.py +128 -0
- templates/index.html +37 -0
- templates/result.html +17 -0
- templates/summary.html +18 -0
- templates/teks.html +32 -0
Dockerfile
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.9
|
2 |
+
|
3 |
+
RUN useradd -m -u 1000 user
|
4 |
+
USER user
|
5 |
+
ENV PATH="/home/user/.local/bin:$PATH"
|
6 |
+
|
7 |
+
WORKDIR /app
|
8 |
+
|
9 |
+
COPY --chown=user ./requirements.txt requirements.txt
|
10 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
11 |
+
|
12 |
+
COPY --chown=user . /app
|
13 |
+
|
14 |
+
CMD [ "python", "-m" , "flask", "run", "--host=0.0.0.0", "--port", "7860"]
|
requirements.txt
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Flask==3.1.0
|
2 |
+
requests==2.32.3
|
3 |
+
networkx==3.4.2
|
4 |
+
matplotlib==3.9.2
|
5 |
+
beautifulsoup4==4.12.3
|
6 |
+
pandas==2.2.3
|
7 |
+
regex
|
8 |
+
Sastrawi==1.0.1
|
9 |
+
nltk==3.9.1
|
10 |
+
scikit-learn==1.5.2
|
11 |
+
pickle4
|
static/graph.png
ADDED
stopwords.txt
ADDED
@@ -0,0 +1,758 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ada
|
2 |
+
adalah
|
3 |
+
adanya
|
4 |
+
adapun
|
5 |
+
agak
|
6 |
+
agaknya
|
7 |
+
agar
|
8 |
+
akan
|
9 |
+
akankah
|
10 |
+
akhir
|
11 |
+
akhiri
|
12 |
+
akhirnya
|
13 |
+
aku
|
14 |
+
akulah
|
15 |
+
amat
|
16 |
+
amatlah
|
17 |
+
anda
|
18 |
+
andalah
|
19 |
+
antar
|
20 |
+
antara
|
21 |
+
antaranya
|
22 |
+
apa
|
23 |
+
apaan
|
24 |
+
apabila
|
25 |
+
apakah
|
26 |
+
apalagi
|
27 |
+
apatah
|
28 |
+
artinya
|
29 |
+
asal
|
30 |
+
asalkan
|
31 |
+
atas
|
32 |
+
atau
|
33 |
+
ataukah
|
34 |
+
ataupun
|
35 |
+
awal
|
36 |
+
awalnya
|
37 |
+
bagai
|
38 |
+
bagaikan
|
39 |
+
bagaimana
|
40 |
+
bagaimanakah
|
41 |
+
bagaimanapun
|
42 |
+
bagi
|
43 |
+
bagian
|
44 |
+
bahkan
|
45 |
+
bahwa
|
46 |
+
bahwasanya
|
47 |
+
baik
|
48 |
+
bakal
|
49 |
+
bakalan
|
50 |
+
balik
|
51 |
+
banyak
|
52 |
+
bapak
|
53 |
+
baru
|
54 |
+
bawah
|
55 |
+
beberapa
|
56 |
+
begini
|
57 |
+
beginian
|
58 |
+
beginikah
|
59 |
+
beginilah
|
60 |
+
begitu
|
61 |
+
begitukah
|
62 |
+
begitulah
|
63 |
+
begitupun
|
64 |
+
bekerja
|
65 |
+
belakang
|
66 |
+
belakangan
|
67 |
+
belum
|
68 |
+
belumlah
|
69 |
+
benar
|
70 |
+
benarkah
|
71 |
+
benarlah
|
72 |
+
berada
|
73 |
+
berakhir
|
74 |
+
berakhirlah
|
75 |
+
berakhirnya
|
76 |
+
berapa
|
77 |
+
berapakah
|
78 |
+
berapalah
|
79 |
+
berapapun
|
80 |
+
berarti
|
81 |
+
berawal
|
82 |
+
berbagai
|
83 |
+
berdatangan
|
84 |
+
beri
|
85 |
+
berikan
|
86 |
+
berikut
|
87 |
+
berikutnya
|
88 |
+
berjumlah
|
89 |
+
berkali-kali
|
90 |
+
berkata
|
91 |
+
berkehendak
|
92 |
+
berkeinginan
|
93 |
+
berkenaan
|
94 |
+
berlainan
|
95 |
+
berlalu
|
96 |
+
berlangsung
|
97 |
+
berlebihan
|
98 |
+
bermacam
|
99 |
+
bermacam-macam
|
100 |
+
bermaksud
|
101 |
+
bermula
|
102 |
+
bersama
|
103 |
+
bersama-sama
|
104 |
+
bersiap
|
105 |
+
bersiap-siap
|
106 |
+
bertanya
|
107 |
+
bertanya-tanya
|
108 |
+
berturut
|
109 |
+
berturut-turut
|
110 |
+
bertutur
|
111 |
+
berujar
|
112 |
+
berupa
|
113 |
+
besar
|
114 |
+
betul
|
115 |
+
betulkah
|
116 |
+
biasa
|
117 |
+
biasanya
|
118 |
+
bila
|
119 |
+
bilakah
|
120 |
+
bisa
|
121 |
+
bisakah
|
122 |
+
boleh
|
123 |
+
bolehkah
|
124 |
+
bolehlah
|
125 |
+
buat
|
126 |
+
bukan
|
127 |
+
bukankah
|
128 |
+
bukanlah
|
129 |
+
bukannya
|
130 |
+
bulan
|
131 |
+
bung
|
132 |
+
cara
|
133 |
+
caranya
|
134 |
+
cukup
|
135 |
+
cukupkah
|
136 |
+
cukuplah
|
137 |
+
cuma
|
138 |
+
dahulu
|
139 |
+
dalam
|
140 |
+
dan
|
141 |
+
dapat
|
142 |
+
dari
|
143 |
+
daripada
|
144 |
+
datang
|
145 |
+
dekat
|
146 |
+
demi
|
147 |
+
demikian
|
148 |
+
demikianlah
|
149 |
+
dengan
|
150 |
+
depan
|
151 |
+
di
|
152 |
+
dia
|
153 |
+
diakhiri
|
154 |
+
diakhirinya
|
155 |
+
dialah
|
156 |
+
diantara
|
157 |
+
diantaranya
|
158 |
+
diberi
|
159 |
+
diberikan
|
160 |
+
diberikannya
|
161 |
+
dibuat
|
162 |
+
dibuatnya
|
163 |
+
didapat
|
164 |
+
didatangkan
|
165 |
+
digunakan
|
166 |
+
diibaratkan
|
167 |
+
diibaratkannya
|
168 |
+
diingat
|
169 |
+
diingatkan
|
170 |
+
diinginkan
|
171 |
+
dijawab
|
172 |
+
dijelaskan
|
173 |
+
dijelaskannya
|
174 |
+
dikarenakan
|
175 |
+
dikatakan
|
176 |
+
dikatakannya
|
177 |
+
dikerjakan
|
178 |
+
diketahui
|
179 |
+
diketahuinya
|
180 |
+
dikira
|
181 |
+
dilakukan
|
182 |
+
dilalui
|
183 |
+
dilihat
|
184 |
+
dimaksud
|
185 |
+
dimaksudkan
|
186 |
+
dimaksudkannya
|
187 |
+
dimaksudnya
|
188 |
+
diminta
|
189 |
+
dimintai
|
190 |
+
dimisalkan
|
191 |
+
dimulai
|
192 |
+
dimulailah
|
193 |
+
dimulainya
|
194 |
+
dimungkinkan
|
195 |
+
dini
|
196 |
+
dipastikan
|
197 |
+
diperbuat
|
198 |
+
diperbuatnya
|
199 |
+
dipergunakan
|
200 |
+
diperkirakan
|
201 |
+
diperlihatkan
|
202 |
+
diperlukan
|
203 |
+
diperlukannya
|
204 |
+
dipersoalkan
|
205 |
+
dipertanyakan
|
206 |
+
dipunyai
|
207 |
+
diri
|
208 |
+
dirinya
|
209 |
+
disampaikan
|
210 |
+
disebut
|
211 |
+
disebutkan
|
212 |
+
disebutkannya
|
213 |
+
disini
|
214 |
+
disinilah
|
215 |
+
ditambahkan
|
216 |
+
ditandaskan
|
217 |
+
ditanya
|
218 |
+
ditanyai
|
219 |
+
ditanyakan
|
220 |
+
ditegaskan
|
221 |
+
ditujukan
|
222 |
+
ditunjuk
|
223 |
+
ditunjuki
|
224 |
+
ditunjukkan
|
225 |
+
ditunjukkannya
|
226 |
+
ditunjuknya
|
227 |
+
dituturkan
|
228 |
+
dituturkannya
|
229 |
+
diucapkan
|
230 |
+
diucapkannya
|
231 |
+
diungkapkan
|
232 |
+
dong
|
233 |
+
dua
|
234 |
+
dulu
|
235 |
+
empat
|
236 |
+
enggak
|
237 |
+
enggaknya
|
238 |
+
entah
|
239 |
+
entahlah
|
240 |
+
guna
|
241 |
+
gunakan
|
242 |
+
hal
|
243 |
+
hampir
|
244 |
+
hanya
|
245 |
+
hanyalah
|
246 |
+
hari
|
247 |
+
harus
|
248 |
+
haruslah
|
249 |
+
harusnya
|
250 |
+
hendak
|
251 |
+
hendaklah
|
252 |
+
hendaknya
|
253 |
+
hingga
|
254 |
+
ia
|
255 |
+
ialah
|
256 |
+
ibarat
|
257 |
+
ibaratkan
|
258 |
+
ibaratnya
|
259 |
+
ibu
|
260 |
+
ikut
|
261 |
+
ingat
|
262 |
+
ingat-ingat
|
263 |
+
ingin
|
264 |
+
inginkah
|
265 |
+
inginkan
|
266 |
+
ini
|
267 |
+
inikah
|
268 |
+
inilah
|
269 |
+
itu
|
270 |
+
itukah
|
271 |
+
itulah
|
272 |
+
jadi
|
273 |
+
jadilah
|
274 |
+
jadinya
|
275 |
+
jangan
|
276 |
+
jangankan
|
277 |
+
janganlah
|
278 |
+
jauh
|
279 |
+
jawab
|
280 |
+
jawaban
|
281 |
+
jawabnya
|
282 |
+
jelas
|
283 |
+
jelaskan
|
284 |
+
jelaslah
|
285 |
+
jelasnya
|
286 |
+
jika
|
287 |
+
jikalau
|
288 |
+
juga
|
289 |
+
jumlah
|
290 |
+
jumlahnya
|
291 |
+
justru
|
292 |
+
kala
|
293 |
+
kalau
|
294 |
+
kalaulah
|
295 |
+
kalaupun
|
296 |
+
kalian
|
297 |
+
kami
|
298 |
+
kamilah
|
299 |
+
kamu
|
300 |
+
kamulah
|
301 |
+
kan
|
302 |
+
kapan
|
303 |
+
kapankah
|
304 |
+
kapanpun
|
305 |
+
karena
|
306 |
+
karenanya
|
307 |
+
kasus
|
308 |
+
kata
|
309 |
+
katakan
|
310 |
+
katakanlah
|
311 |
+
katanya
|
312 |
+
ke
|
313 |
+
keadaan
|
314 |
+
kebetulan
|
315 |
+
kecil
|
316 |
+
kedua
|
317 |
+
keduanya
|
318 |
+
keinginan
|
319 |
+
kelamaan
|
320 |
+
kelihatan
|
321 |
+
kelihatannya
|
322 |
+
kelima
|
323 |
+
keluar
|
324 |
+
kembali
|
325 |
+
kemudian
|
326 |
+
kemungkinan
|
327 |
+
kemungkinannya
|
328 |
+
kenapa
|
329 |
+
kepada
|
330 |
+
kepadanya
|
331 |
+
kesampaian
|
332 |
+
keseluruhan
|
333 |
+
keseluruhannya
|
334 |
+
keterlaluan
|
335 |
+
ketika
|
336 |
+
khususnya
|
337 |
+
kini
|
338 |
+
kinilah
|
339 |
+
kira
|
340 |
+
kira-kira
|
341 |
+
kiranya
|
342 |
+
kita
|
343 |
+
kitalah
|
344 |
+
kok
|
345 |
+
kurang
|
346 |
+
lagi
|
347 |
+
lagian
|
348 |
+
lah
|
349 |
+
lain
|
350 |
+
lainnya
|
351 |
+
lalu
|
352 |
+
lama
|
353 |
+
lamanya
|
354 |
+
lanjut
|
355 |
+
lanjutnya
|
356 |
+
lebih
|
357 |
+
lewat
|
358 |
+
lima
|
359 |
+
luar
|
360 |
+
macam
|
361 |
+
maka
|
362 |
+
makanya
|
363 |
+
makin
|
364 |
+
malah
|
365 |
+
malahan
|
366 |
+
mampu
|
367 |
+
mampukah
|
368 |
+
mana
|
369 |
+
manakala
|
370 |
+
manalagi
|
371 |
+
masa
|
372 |
+
masalah
|
373 |
+
masalahnya
|
374 |
+
masih
|
375 |
+
masihkah
|
376 |
+
masing
|
377 |
+
masing-masing
|
378 |
+
mau
|
379 |
+
maupun
|
380 |
+
melainkan
|
381 |
+
melakukan
|
382 |
+
melalui
|
383 |
+
melihat
|
384 |
+
melihatnya
|
385 |
+
memang
|
386 |
+
memastikan
|
387 |
+
memberi
|
388 |
+
memberikan
|
389 |
+
membuat
|
390 |
+
memerlukan
|
391 |
+
memihak
|
392 |
+
meminta
|
393 |
+
memintakan
|
394 |
+
memisalkan
|
395 |
+
memperbuat
|
396 |
+
mempergunakan
|
397 |
+
memperkirakan
|
398 |
+
memperlihatkan
|
399 |
+
mempersiapkan
|
400 |
+
mempersoalkan
|
401 |
+
mempertanyakan
|
402 |
+
mempunyai
|
403 |
+
memulai
|
404 |
+
memungkinkan
|
405 |
+
menaiki
|
406 |
+
menambahkan
|
407 |
+
menandaskan
|
408 |
+
menanti
|
409 |
+
menanti-nanti
|
410 |
+
menantikan
|
411 |
+
menanya
|
412 |
+
menanyai
|
413 |
+
menanyakan
|
414 |
+
mendapat
|
415 |
+
mendapatkan
|
416 |
+
mendatang
|
417 |
+
mendatangi
|
418 |
+
mendatangkan
|
419 |
+
menegaskan
|
420 |
+
mengakhiri
|
421 |
+
mengapa
|
422 |
+
mengatakan
|
423 |
+
mengatakannya
|
424 |
+
mengenai
|
425 |
+
mengerjakan
|
426 |
+
mengetahui
|
427 |
+
menggunakan
|
428 |
+
menghendaki
|
429 |
+
mengibaratkan
|
430 |
+
mengibaratkannya
|
431 |
+
mengingat
|
432 |
+
mengingatkan
|
433 |
+
menginginkan
|
434 |
+
mengira
|
435 |
+
mengucapkan
|
436 |
+
mengucapkannya
|
437 |
+
mengungkapkan
|
438 |
+
menjadi
|
439 |
+
menjawab
|
440 |
+
menjelaskan
|
441 |
+
menuju
|
442 |
+
menunjuk
|
443 |
+
menunjuki
|
444 |
+
menunjukkan
|
445 |
+
menunjuknya
|
446 |
+
menurut
|
447 |
+
menuturkan
|
448 |
+
menyampaikan
|
449 |
+
menyangkut
|
450 |
+
menyatakan
|
451 |
+
menyebutkan
|
452 |
+
menyeluruh
|
453 |
+
menyiapkan
|
454 |
+
merasa
|
455 |
+
mereka
|
456 |
+
merekalah
|
457 |
+
merupakan
|
458 |
+
meski
|
459 |
+
meskipun
|
460 |
+
meyakini
|
461 |
+
meyakinkan
|
462 |
+
minta
|
463 |
+
mirip
|
464 |
+
misal
|
465 |
+
misalkan
|
466 |
+
misalnya
|
467 |
+
mula
|
468 |
+
mulai
|
469 |
+
mulailah
|
470 |
+
mulanya
|
471 |
+
mungkin
|
472 |
+
mungkinkah
|
473 |
+
nah
|
474 |
+
naik
|
475 |
+
namun
|
476 |
+
nanti
|
477 |
+
nantinya
|
478 |
+
nyaris
|
479 |
+
nyatanya
|
480 |
+
oleh
|
481 |
+
olehnya
|
482 |
+
pada
|
483 |
+
padahal
|
484 |
+
padanya
|
485 |
+
pak
|
486 |
+
paling
|
487 |
+
panjang
|
488 |
+
pantas
|
489 |
+
para
|
490 |
+
pasti
|
491 |
+
pastilah
|
492 |
+
penting
|
493 |
+
pentingnya
|
494 |
+
per
|
495 |
+
percuma
|
496 |
+
perlu
|
497 |
+
perlukah
|
498 |
+
perlunya
|
499 |
+
pernah
|
500 |
+
persoalan
|
501 |
+
pertama
|
502 |
+
pertama-tama
|
503 |
+
pertanyaan
|
504 |
+
pertanyakan
|
505 |
+
pihak
|
506 |
+
pihaknya
|
507 |
+
pukul
|
508 |
+
pula
|
509 |
+
pun
|
510 |
+
punya
|
511 |
+
rasa
|
512 |
+
rasanya
|
513 |
+
rata
|
514 |
+
rupanya
|
515 |
+
saat
|
516 |
+
saatnya
|
517 |
+
saja
|
518 |
+
sajalah
|
519 |
+
saling
|
520 |
+
sama
|
521 |
+
sama-sama
|
522 |
+
sambil
|
523 |
+
sampai
|
524 |
+
sampai-sampai
|
525 |
+
sampaikan
|
526 |
+
sana
|
527 |
+
sangat
|
528 |
+
sangatlah
|
529 |
+
satu
|
530 |
+
saya
|
531 |
+
sayalah
|
532 |
+
se
|
533 |
+
sebab
|
534 |
+
sebabnya
|
535 |
+
sebagai
|
536 |
+
sebagaimana
|
537 |
+
sebagainya
|
538 |
+
sebagian
|
539 |
+
sebaik
|
540 |
+
sebaik-baiknya
|
541 |
+
sebaiknya
|
542 |
+
sebaliknya
|
543 |
+
sebanyak
|
544 |
+
sebegini
|
545 |
+
sebegitu
|
546 |
+
sebelum
|
547 |
+
sebelumnya
|
548 |
+
sebenarnya
|
549 |
+
seberapa
|
550 |
+
sebesar
|
551 |
+
sebetulnya
|
552 |
+
sebisanya
|
553 |
+
sebuah
|
554 |
+
sebut
|
555 |
+
sebutlah
|
556 |
+
sebutnya
|
557 |
+
secara
|
558 |
+
secukupnya
|
559 |
+
sedang
|
560 |
+
sedangkan
|
561 |
+
sedemikian
|
562 |
+
sedikit
|
563 |
+
sedikitnya
|
564 |
+
seenaknya
|
565 |
+
segala
|
566 |
+
segalanya
|
567 |
+
segera
|
568 |
+
seharusnya
|
569 |
+
sehingga
|
570 |
+
seingat
|
571 |
+
sejak
|
572 |
+
sejauh
|
573 |
+
sejenak
|
574 |
+
sejumlah
|
575 |
+
sekadar
|
576 |
+
sekadarnya
|
577 |
+
sekali
|
578 |
+
sekali-kali
|
579 |
+
sekalian
|
580 |
+
sekaligus
|
581 |
+
sekalipun
|
582 |
+
sekarang
|
583 |
+
sekarang
|
584 |
+
sekecil
|
585 |
+
seketika
|
586 |
+
sekiranya
|
587 |
+
sekitar
|
588 |
+
sekitarnya
|
589 |
+
sekurang-kurangnya
|
590 |
+
sekurangnya
|
591 |
+
sela
|
592 |
+
selain
|
593 |
+
selaku
|
594 |
+
selalu
|
595 |
+
selama
|
596 |
+
selama-lamanya
|
597 |
+
selamanya
|
598 |
+
selanjutnya
|
599 |
+
seluruh
|
600 |
+
seluruhnya
|
601 |
+
semacam
|
602 |
+
semakin
|
603 |
+
semampu
|
604 |
+
semampunya
|
605 |
+
semasa
|
606 |
+
semasih
|
607 |
+
semata
|
608 |
+
semata-mata
|
609 |
+
semaunya
|
610 |
+
sementara
|
611 |
+
semisal
|
612 |
+
semisalnya
|
613 |
+
sempat
|
614 |
+
semua
|
615 |
+
semuanya
|
616 |
+
semula
|
617 |
+
sendiri
|
618 |
+
sendirian
|
619 |
+
sendirinya
|
620 |
+
seolah
|
621 |
+
seolah-olah
|
622 |
+
seorang
|
623 |
+
sepanjang
|
624 |
+
sepantasnya
|
625 |
+
sepantasnyalah
|
626 |
+
seperlunya
|
627 |
+
seperti
|
628 |
+
sepertinya
|
629 |
+
sepihak
|
630 |
+
sering
|
631 |
+
seringnya
|
632 |
+
serta
|
633 |
+
serupa
|
634 |
+
sesaat
|
635 |
+
sesama
|
636 |
+
sesampai
|
637 |
+
sesegera
|
638 |
+
sesekali
|
639 |
+
seseorang
|
640 |
+
sesuatu
|
641 |
+
sesuatunya
|
642 |
+
sesudah
|
643 |
+
sesudahnya
|
644 |
+
setelah
|
645 |
+
setempat
|
646 |
+
setengah
|
647 |
+
seterusnya
|
648 |
+
setiap
|
649 |
+
setiba
|
650 |
+
setibanya
|
651 |
+
setidak-tidaknya
|
652 |
+
setidaknya
|
653 |
+
setinggi
|
654 |
+
seusai
|
655 |
+
sewaktu
|
656 |
+
siap
|
657 |
+
siapa
|
658 |
+
siapakah
|
659 |
+
siapapun
|
660 |
+
sini
|
661 |
+
sinilah
|
662 |
+
soal
|
663 |
+
soalnya
|
664 |
+
suatu
|
665 |
+
sudah
|
666 |
+
sudahkah
|
667 |
+
sudahlah
|
668 |
+
supaya
|
669 |
+
tadi
|
670 |
+
tadinya
|
671 |
+
tahu
|
672 |
+
tahun
|
673 |
+
tak
|
674 |
+
tambah
|
675 |
+
tambahnya
|
676 |
+
tampak
|
677 |
+
tampaknya
|
678 |
+
tandas
|
679 |
+
tandasnya
|
680 |
+
tanpa
|
681 |
+
tanya
|
682 |
+
tanyakan
|
683 |
+
tanyanya
|
684 |
+
tapi
|
685 |
+
tegas
|
686 |
+
tegasnya
|
687 |
+
telah
|
688 |
+
tempat
|
689 |
+
tengah
|
690 |
+
tentang
|
691 |
+
tentu
|
692 |
+
tentulah
|
693 |
+
tentunya
|
694 |
+
tepat
|
695 |
+
terakhir
|
696 |
+
terasa
|
697 |
+
terbanyak
|
698 |
+
terdahulu
|
699 |
+
terdapat
|
700 |
+
terdiri
|
701 |
+
terhadap
|
702 |
+
terhadapnya
|
703 |
+
teringat
|
704 |
+
teringat-ingat
|
705 |
+
terjadi
|
706 |
+
terjadilah
|
707 |
+
terjadinya
|
708 |
+
terkira
|
709 |
+
terlalu
|
710 |
+
terlebih
|
711 |
+
terlihat
|
712 |
+
termasuk
|
713 |
+
ternyata
|
714 |
+
tersampaikan
|
715 |
+
tersebut
|
716 |
+
tersebutlah
|
717 |
+
tertentu
|
718 |
+
tertuju
|
719 |
+
terus
|
720 |
+
terutama
|
721 |
+
tetap
|
722 |
+
tetapi
|
723 |
+
tiap
|
724 |
+
tiba
|
725 |
+
tiba-tiba
|
726 |
+
tidak
|
727 |
+
tidakkah
|
728 |
+
tidaklah
|
729 |
+
tiga
|
730 |
+
tinggi
|
731 |
+
toh
|
732 |
+
tunjuk
|
733 |
+
turut
|
734 |
+
tutur
|
735 |
+
tuturnya
|
736 |
+
ucap
|
737 |
+
ucapnya
|
738 |
+
ujar
|
739 |
+
ujarnya
|
740 |
+
umum
|
741 |
+
umumnya
|
742 |
+
ungkap
|
743 |
+
ungkapnya
|
744 |
+
untuk
|
745 |
+
usah
|
746 |
+
usai
|
747 |
+
waduh
|
748 |
+
wah
|
749 |
+
wahai
|
750 |
+
waktu
|
751 |
+
waktunya
|
752 |
+
walau
|
753 |
+
walaupun
|
754 |
+
wong
|
755 |
+
yaitu
|
756 |
+
yakin
|
757 |
+
yakni
|
758 |
+
yang
|
summary.py
ADDED
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flask import Flask, request, render_template, jsonify
|
2 |
+
import pandas as pd
|
3 |
+
import requests
|
4 |
+
import os
|
5 |
+
import re
|
6 |
+
import networkx as nx
|
7 |
+
from nltk.tokenize import word_tokenize, sent_tokenize
|
8 |
+
from nltk.corpus import stopwords
|
9 |
+
from bs4 import BeautifulSoup
|
10 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
11 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
12 |
+
import matplotlib.pyplot as plt
|
13 |
+
import nltk
|
14 |
+
|
15 |
+
# Inisialisasi NLTK
|
16 |
+
nltk.download("stopwords")
|
17 |
+
nltk.download("punkt")
|
18 |
+
|
19 |
+
# Inisialisasi Flask
|
20 |
+
app = Flask(__name__)
|
21 |
+
|
22 |
+
# Fungsi untuk scraping berita
|
23 |
+
def scrape_news(url):
|
24 |
+
isi = []
|
25 |
+
judul = []
|
26 |
+
|
27 |
+
headers = {
|
28 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
29 |
+
}
|
30 |
+
try:
|
31 |
+
response = requests.get(url, headers=headers)
|
32 |
+
response.raise_for_status()
|
33 |
+
article_full = BeautifulSoup(response.content, "html.parser")
|
34 |
+
judul_artikel = article_full.find("h1", class_="mb-4 text-32 font-extrabold")
|
35 |
+
if judul_artikel:
|
36 |
+
judul_artikel = judul_artikel.text.strip()
|
37 |
+
else:
|
38 |
+
judul_artikel = "Judul tidak ditemukan"
|
39 |
+
artikel_element = article_full.find("div", class_="detail-text")
|
40 |
+
if artikel_element:
|
41 |
+
artikel_teks = [p.get_text(strip=True) for p in artikel_element.find_all("p")]
|
42 |
+
artikel_content = "\n".join(artikel_teks)
|
43 |
+
else:
|
44 |
+
artikel_content = "Konten artikel tidak ditemukan"
|
45 |
+
isi.append(artikel_content)
|
46 |
+
judul.append(judul_artikel)
|
47 |
+
except requests.exceptions.RequestException as e:
|
48 |
+
judul.append("Error")
|
49 |
+
isi.append(f"Gagal mengambil data: {e}")
|
50 |
+
|
51 |
+
return pd.DataFrame({"judul": judul, "isi": isi})
|
52 |
+
|
53 |
+
# Fungsi preprocessing
|
54 |
+
def preprocess_text(content):
|
55 |
+
content = content.lower()
|
56 |
+
content = re.sub(r"[0-9]|[/(){}\[\]\|@,;_]|[^a-z .]+", " ", content)
|
57 |
+
content = re.sub(r"\s+", " ", content).strip()
|
58 |
+
tokens = word_tokenize(content)
|
59 |
+
stopword = set(stopwords.words("indonesian"))
|
60 |
+
tokens = [word for word in tokens if word not in stopword]
|
61 |
+
return " ".join(tokens)
|
62 |
+
|
63 |
+
# Fungsi untuk membuat ringkasan dan visualisasi graf
|
64 |
+
def summarize_and_visualize(content):
|
65 |
+
kalimat = sent_tokenize(content)
|
66 |
+
preprocessed_text = preprocess_text(content)
|
67 |
+
kalimat_preprocessing = sent_tokenize(preprocessed_text)
|
68 |
+
|
69 |
+
# TF-IDF dan cosine similarity
|
70 |
+
tfidf_vectorizer = TfidfVectorizer()
|
71 |
+
tfidf_matrix = tfidf_vectorizer.fit_transform(kalimat_preprocessing)
|
72 |
+
cossim_prep = cosine_similarity(tfidf_matrix, tfidf_matrix)
|
73 |
+
|
74 |
+
# Analisis jaringan dengan NetworkX
|
75 |
+
G = nx.DiGraph()
|
76 |
+
for i in range(len(cossim_prep)):
|
77 |
+
G.add_node(i)
|
78 |
+
for j in range(len(cossim_prep)):
|
79 |
+
if cossim_prep[i][j] > 0.1 and i != j:
|
80 |
+
G.add_edge(i, j)
|
81 |
+
|
82 |
+
# Hitung closeness centrality dan buat ringkasan
|
83 |
+
closeness_scores = nx.closeness_centrality(G)
|
84 |
+
sorted_closeness = sorted(closeness_scores.items(), key=lambda x: x[1], reverse=True)
|
85 |
+
ringkasan = " ".join(kalimat[node] for node, _ in sorted_closeness[:3])
|
86 |
+
|
87 |
+
# Visualisasi graf
|
88 |
+
plt.figure(figsize=(10, 8))
|
89 |
+
pos = nx.spring_layout(G, k=2)
|
90 |
+
nx.draw_networkx_nodes(G, pos, node_size=500, node_color="b")
|
91 |
+
nx.draw_networkx_edges(G, pos, edge_color="red", arrows=True)
|
92 |
+
nx.draw_networkx_labels(G, pos, font_size=10)
|
93 |
+
plt.title("Graph Representation of Sentence Similarity")
|
94 |
+
# Periksa apakah file graph.png sudah ada
|
95 |
+
graph_path = "static/graph.png"
|
96 |
+
if os.path.exists(graph_path):
|
97 |
+
os.remove(graph_path) # Hapus file jika sudah ada
|
98 |
+
|
99 |
+
# Simpan graf sebagai file baru
|
100 |
+
plt.savefig(graph_path)
|
101 |
+
plt.close()
|
102 |
+
|
103 |
+
return ringkasan
|
104 |
+
|
105 |
+
# Route utama untuk scraping dan analisis
|
106 |
+
@app.route("/", methods=["GET", "POST"])
|
107 |
+
def index():
|
108 |
+
if request.method == "POST":
|
109 |
+
url = request.form.get("url")
|
110 |
+
if url:
|
111 |
+
# Scraping berita
|
112 |
+
df = scrape_news(url)
|
113 |
+
if not df.empty:
|
114 |
+
content = df["isi"].iloc[0]
|
115 |
+
title = df["judul"].iloc[0]
|
116 |
+
|
117 |
+
# Preprocessing, summarizing, and visualizing
|
118 |
+
ringkasan = summarize_and_visualize(content)
|
119 |
+
return render_template("result.html", title=title, content=content, summary=ringkasan, graph_url="static/graph.png")
|
120 |
+
else:
|
121 |
+
return render_template("summary.html", error="Gagal mengambil data dari URL.")
|
122 |
+
else:
|
123 |
+
return render_template("summary.html", error="URL tidak boleh kosong.")
|
124 |
+
return render_template("summary.html")
|
125 |
+
|
126 |
+
# Menjalankan aplikasi Flask
|
127 |
+
if __name__ == "__main__":
|
128 |
+
app.run(debug=True, port=5002)
|
templates/index.html
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html lang="id">
|
3 |
+
<head>
|
4 |
+
<meta charset="UTF-8">
|
5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
6 |
+
<title>Klasifikasi Berita</title>
|
7 |
+
<link rel="stylesheet" href="{{ url_for('static', filename='styles.css') }}"> <!-- Link ke file CSS -->
|
8 |
+
</head>
|
9 |
+
<body>
|
10 |
+
<div class="container">
|
11 |
+
<h1>Klasifikasi Berita</h1>
|
12 |
+
<form action="{{ url_for('index') }}" method="POST">
|
13 |
+
<label for="link_news">Masukkan Link Berita:</label>
|
14 |
+
<input type="url" id="link_news" name="link_news" placeholder="https://www.cnbcindonesia.com/news/..." required>
|
15 |
+
<br>
|
16 |
+
<label for="model">Pilih Model Klasifikasi:</label>
|
17 |
+
<select id="model" name="model" required>
|
18 |
+
<option value="logistic_regression">Logistic Regression</option>
|
19 |
+
<option value="lr_modelNcompo5">Logistic Regression dengan SVD (5 Komponen)</option>
|
20 |
+
<option value="lr_modelNcompo10">Logistic Regression dengan SVD (10 Komponen)</option>
|
21 |
+
</select>
|
22 |
+
|
23 |
+
<button type="submit">Klasifikasikan</button>
|
24 |
+
</form>
|
25 |
+
|
26 |
+
{% if error %}
|
27 |
+
<p class="error">{{ error }}</p>
|
28 |
+
{% endif %}
|
29 |
+
|
30 |
+
{% if result %}
|
31 |
+
<h2>Hasil Klasifikasi: {{ result }}</h2>
|
32 |
+
<p>Probabilitas Kategori Berita: {{ prob_news }}%</p>
|
33 |
+
<p>Probabilitas Kategori Penelitian: {{ prob_research }}%</p>
|
34 |
+
{% endif %}
|
35 |
+
</div>
|
36 |
+
</body>
|
37 |
+
</html>
|
templates/result.html
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html lang="en">
|
3 |
+
<head>
|
4 |
+
<meta charset="UTF-8">
|
5 |
+
<title>Summary Result</title>
|
6 |
+
</head>
|
7 |
+
<body>
|
8 |
+
<h1>{{ title }}</h1>
|
9 |
+
<h2>Konten:</h2>
|
10 |
+
<p>{{ content[:1000] }}...</p>
|
11 |
+
<h2>Ringkasan:</h2>
|
12 |
+
<p>{{ summary }}</p>
|
13 |
+
<h2>Visualisasi Graf:</h2>
|
14 |
+
<img src="{{ graph_url }}" alt="Graph Visualization">
|
15 |
+
<a href="/">Kembali</a>
|
16 |
+
</body>
|
17 |
+
</html>
|
templates/summary.html
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html lang="en">
|
3 |
+
<head>
|
4 |
+
<meta charset="UTF-8">
|
5 |
+
<title>News Scraper</title>
|
6 |
+
</head>
|
7 |
+
<body>
|
8 |
+
<h1>Web Article Summarizer with Graph Visualization</h1>
|
9 |
+
<form method="post">
|
10 |
+
<label for="url">Masukkan URL artikel:</label><br>
|
11 |
+
<input type="text" id="url" name="url"><br><br>
|
12 |
+
<input type="submit" value="Generate Summary">
|
13 |
+
</form>
|
14 |
+
{% if error %}
|
15 |
+
<p style="color:red;">{{ error }}</p>
|
16 |
+
{% endif %}
|
17 |
+
</body>
|
18 |
+
</html>
|
templates/teks.html
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!-- templates/teks.html -->
|
2 |
+
<!DOCTYPE html>
|
3 |
+
<html lang="id">
|
4 |
+
<head>
|
5 |
+
<meta charset="UTF-8">
|
6 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
7 |
+
<title>Klasifikasi Teks</title>
|
8 |
+
</head>
|
9 |
+
<body>
|
10 |
+
<h1>Klasifikasi Teks Berita</h1>
|
11 |
+
<form action="/teks" method="post">
|
12 |
+
<label for="user_input">Masukkan Teks Berita:</label>
|
13 |
+
<textarea name="user_input" id="user_input" rows="4" required></textarea>
|
14 |
+
<label for="model">Pilih Model:</label>
|
15 |
+
<select name="model" id="model">
|
16 |
+
<option value="logistic_regression">Model Default</option>
|
17 |
+
<option value="lr_modelNcompo5">Model Nkompo 5</option>
|
18 |
+
<option value="lr_modelNcompo10">Model Nkompo 10</option>
|
19 |
+
</select>
|
20 |
+
<input type="submit" value="Prediksi">
|
21 |
+
</form>
|
22 |
+
|
23 |
+
{% if error %}
|
24 |
+
<p style="color:red;">{{ error }}</p>
|
25 |
+
{% endif %}
|
26 |
+
{% if result %}
|
27 |
+
<h2>Hasil Klasifikasi: {{ result }}</h2>
|
28 |
+
<p>Probabilitas Berita: {{ prob_news }}</p>
|
29 |
+
<p>Probabilitas Riset: {{ prob_research }}</p>
|
30 |
+
{% endif %}
|
31 |
+
</body>
|
32 |
+
</html>
|