sneakykilli commited on
Commit
39220e8
1 Parent(s): e411c5e

Add BERTopic model

Browse files
README.md ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ---
3
+ tags:
4
+ - bertopic
5
+ library_name: bertopic
6
+ pipeline_tag: text-classification
7
+ ---
8
+
9
+ # Singapore_BERTopic
10
+
11
+ This is a [BERTopic](https://github.com/MaartenGr/BERTopic) model.
12
+ BERTopic is a flexible and modular topic modeling framework that allows for the generation of easily interpretable topics from large datasets.
13
+
14
+ ## Usage
15
+
16
+ To use this model, please install BERTopic:
17
+
18
+ ```
19
+ pip install -U bertopic
20
+ ```
21
+
22
+ You can use the model as follows:
23
+
24
+ ```python
25
+ from bertopic import BERTopic
26
+ topic_model = BERTopic.load("sneakykilli/Singapore_BERTopic")
27
+
28
+ topic_model.get_topic_info()
29
+ ```
30
+
31
+ ## Topic overview
32
+
33
+ * Number of topics: 10
34
+ * Number of training documents: 160
35
+
36
+ <details>
37
+ <summary>Click here for an overview of all topics.</summary>
38
+
39
+ | Topic ID | Topic Keywords | Topic Frequency | Label |
40
+ |----------|----------------|-----------------|-------|
41
+ | -1 | airline - airlines - flights - refund - flight | 6 | -1_airline_airlines_flights_refund |
42
+ | 0 | airline - airlines - flights - singapore - meals | 31 | 0_airline_airlines_flights_singapore |
43
+ | 1 | refund - airline - airlines - complaint - singapore | 43 | 1_refund_airline_airlines_complaint |
44
+ | 2 | baggage - luggage - airlines - airline - bags | 20 | 2_baggage_luggage_airlines_airline |
45
+ | 3 | airlines - passengers - seats - flight - cabin | 14 | 3_airlines_passengers_seats_flight |
46
+ | 4 | refund - repayment - sia - customer - complaints | 11 | 4_refund_repayment_sia_customer |
47
+ | 5 | airlines - airline - fees - singapore - flights | 10 | 5_airlines_airline_fees_singapore |
48
+ | 6 | refund - airline - cancellation - booking - cancel | 9 | 6_refund_airline_cancellation_booking |
49
+ | 7 | miles - airlines - airline - mileage - loyalty | 9 | 7_miles_airlines_airline_mileage |
50
+ | 8 | airline - flight - reviews - booking - customer | 7 | 8_airline_flight_reviews_booking |
51
+
52
+ </details>
53
+
54
+ ## Training hyperparameters
55
+
56
+ * calculate_probabilities: False
57
+ * language: None
58
+ * low_memory: False
59
+ * min_topic_size: 5
60
+ * n_gram_range: (1, 1)
61
+ * nr_topics: None
62
+ * seed_topic_list: None
63
+ * top_n_words: 10
64
+ * verbose: False
65
+ * zeroshot_min_similarity: 0.7
66
+ * zeroshot_topic_list: None
67
+
68
+ ## Framework versions
69
+
70
+ * Numpy: 1.24.3
71
+ * HDBSCAN: 0.8.33
72
+ * UMAP: 0.5.5
73
+ * Pandas: 2.0.3
74
+ * Scikit-Learn: 1.2.2
75
+ * Sentence-transformers: 2.3.1
76
+ * Transformers: 4.36.2
77
+ * Numba: 0.57.1
78
+ * Plotly: 5.16.1
79
+ * Python: 3.10.12
config.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "calculate_probabilities": false,
3
+ "language": null,
4
+ "low_memory": false,
5
+ "min_topic_size": 5,
6
+ "n_gram_range": [
7
+ 1,
8
+ 1
9
+ ],
10
+ "nr_topics": null,
11
+ "seed_topic_list": null,
12
+ "top_n_words": 10,
13
+ "verbose": false,
14
+ "zeroshot_min_similarity": 0.7,
15
+ "zeroshot_topic_list": null,
16
+ "embedding_model": "all-MiniLM-L6-v2"
17
+ }
ctfidf.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab6e192a6b8c4a5895c844acd2e5059ef123e06f8aee394e0c21aa10e9d79c0c
3
+ size 77752
ctfidf_config.json ADDED
The diff for this file is too large to render. See raw diff
 
topic_embeddings.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dcc05adfdd3c56d5d8a07c2f46f5a79ee7e42f8a85d74317e06ab055080d3fa
3
+ size 15448
topics.json ADDED
@@ -0,0 +1,665 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "topic_representations": {
3
+ "-1": [
4
+ [
5
+ "airline",
6
+ 0.41720014810562134
7
+ ],
8
+ [
9
+ "airlines",
10
+ 0.41072994470596313
11
+ ],
12
+ [
13
+ "flights",
14
+ 0.3909800052642822
15
+ ],
16
+ [
17
+ "refund",
18
+ 0.3571051359176636
19
+ ],
20
+ [
21
+ "flight",
22
+ 0.3357126712799072
23
+ ],
24
+ [
25
+ "fare",
26
+ 0.3030855059623718
27
+ ],
28
+ [
29
+ "singapore",
30
+ 0.29068851470947266
31
+ ],
32
+ [
33
+ "complaint",
34
+ 0.274583637714386
35
+ ],
36
+ [
37
+ "flying",
38
+ 0.26368528604507446
39
+ ],
40
+ [
41
+ "voucher",
42
+ 0.2579178214073181
43
+ ]
44
+ ],
45
+ "0": [
46
+ [
47
+ "airline",
48
+ 0.47997474670410156
49
+ ],
50
+ [
51
+ "airlines",
52
+ 0.4713612198829651
53
+ ],
54
+ [
55
+ "flights",
56
+ 0.4244295060634613
57
+ ],
58
+ [
59
+ "singapore",
60
+ 0.36062896251678467
61
+ ],
62
+ [
63
+ "meals",
64
+ 0.3596476912498474
65
+ ],
66
+ [
67
+ "flight",
68
+ 0.35365304350852966
69
+ ],
70
+ [
71
+ "food",
72
+ 0.29792851209640503
73
+ ],
74
+ [
75
+ "eating",
76
+ 0.2671264708042145
77
+ ],
78
+ [
79
+ "bread",
80
+ 0.2548995316028595
81
+ ],
82
+ [
83
+ "fly",
84
+ 0.2533579170703888
85
+ ]
86
+ ],
87
+ "1": [
88
+ [
89
+ "refund",
90
+ 0.4560515880584717
91
+ ],
92
+ [
93
+ "airline",
94
+ 0.3744000792503357
95
+ ],
96
+ [
97
+ "airlines",
98
+ 0.3643943667411804
99
+ ],
100
+ [
101
+ "complaint",
102
+ 0.3607919216156006
103
+ ],
104
+ [
105
+ "singapore",
106
+ 0.35467201471328735
107
+ ],
108
+ [
109
+ "flights",
110
+ 0.32735028862953186
111
+ ],
112
+ [
113
+ "cancel",
114
+ 0.3221847414970398
115
+ ],
116
+ [
117
+ "compensation",
118
+ 0.2879146933555603
119
+ ],
120
+ [
121
+ "flight",
122
+ 0.2787000834941864
123
+ ],
124
+ [
125
+ "ticket",
126
+ 0.27175629138946533
127
+ ]
128
+ ],
129
+ "2": [
130
+ [
131
+ "baggage",
132
+ 0.47602730989456177
133
+ ],
134
+ [
135
+ "luggage",
136
+ 0.45390939712524414
137
+ ],
138
+ [
139
+ "airlines",
140
+ 0.43988215923309326
141
+ ],
142
+ [
143
+ "airline",
144
+ 0.42567938566207886
145
+ ],
146
+ [
147
+ "bags",
148
+ 0.3698173463344574
149
+ ],
150
+ [
151
+ "bag",
152
+ 0.3604351878166199
153
+ ],
154
+ [
155
+ "handbag",
156
+ 0.33640238642692566
157
+ ],
158
+ [
159
+ "haul",
160
+ 0.31882810592651367
161
+ ],
162
+ [
163
+ "suitcase",
164
+ 0.31776320934295654
165
+ ],
166
+ [
167
+ "singapore",
168
+ 0.30839091539382935
169
+ ]
170
+ ],
171
+ "3": [
172
+ [
173
+ "airlines",
174
+ 0.44553548097610474
175
+ ],
176
+ [
177
+ "passengers",
178
+ 0.408767968416214
179
+ ],
180
+ [
181
+ "seats",
182
+ 0.33244749903678894
183
+ ],
184
+ [
185
+ "flight",
186
+ 0.3313222825527191
187
+ ],
188
+ [
189
+ "cabin",
190
+ 0.31762823462486267
191
+ ],
192
+ [
193
+ "attendants",
194
+ 0.3064364790916443
195
+ ],
196
+ [
197
+ "seat",
198
+ 0.28947174549102783
199
+ ],
200
+ [
201
+ "plane",
202
+ 0.2577582001686096
203
+ ],
204
+ [
205
+ "singapore",
206
+ 0.25600093603134155
207
+ ],
208
+ [
209
+ "class",
210
+ 0.24720510840415955
211
+ ]
212
+ ],
213
+ "4": [
214
+ [
215
+ "refund",
216
+ 0.4529116153717041
217
+ ],
218
+ [
219
+ "repayment",
220
+ 0.37985289096832275
221
+ ],
222
+ [
223
+ "sia",
224
+ 0.36999768018722534
225
+ ],
226
+ [
227
+ "customer",
228
+ 0.3524300456047058
229
+ ],
230
+ [
231
+ "complaints",
232
+ 0.33408963680267334
233
+ ],
234
+ [
235
+ "customers",
236
+ 0.3327406048774719
237
+ ],
238
+ [
239
+ "bank",
240
+ 0.32434985041618347
241
+ ],
242
+ [
243
+ "service",
244
+ 0.30804628133773804
245
+ ],
246
+ [
247
+ "charges",
248
+ 0.30167824029922485
249
+ ],
250
+ [
251
+ "contact",
252
+ 0.29032042622566223
253
+ ]
254
+ ],
255
+ "5": [
256
+ [
257
+ "airlines",
258
+ 0.425093412399292
259
+ ],
260
+ [
261
+ "airline",
262
+ 0.4162958860397339
263
+ ],
264
+ [
265
+ "fees",
266
+ 0.39408397674560547
267
+ ],
268
+ [
269
+ "singapore",
270
+ 0.39009320735931396
271
+ ],
272
+ [
273
+ "flights",
274
+ 0.3763861060142517
275
+ ],
276
+ [
277
+ "booking",
278
+ 0.3662506937980652
279
+ ],
280
+ [
281
+ "fare",
282
+ 0.36056625843048096
283
+ ],
284
+ [
285
+ "ticket",
286
+ 0.33361199498176575
287
+ ],
288
+ [
289
+ "extortion",
290
+ 0.3151136040687561
291
+ ],
292
+ [
293
+ "flight",
294
+ 0.3104732036590576
295
+ ]
296
+ ],
297
+ "6": [
298
+ [
299
+ "refund",
300
+ 0.5809109210968018
301
+ ],
302
+ [
303
+ "airline",
304
+ 0.4229697287082672
305
+ ],
306
+ [
307
+ "cancellation",
308
+ 0.3839954733848572
309
+ ],
310
+ [
311
+ "booking",
312
+ 0.36461541056632996
313
+ ],
314
+ [
315
+ "cancel",
316
+ 0.36459028720855713
317
+ ],
318
+ [
319
+ "repurchase",
320
+ 0.363625705242157
321
+ ],
322
+ [
323
+ "receipt",
324
+ 0.32762402296066284
325
+ ],
326
+ [
327
+ "flight",
328
+ 0.3106320798397064
329
+ ],
330
+ [
331
+ "return",
332
+ 0.29938411712646484
333
+ ],
334
+ [
335
+ "overbooked",
336
+ 0.2789418399333954
337
+ ]
338
+ ],
339
+ "7": [
340
+ [
341
+ "miles",
342
+ 0.38810890913009644
343
+ ],
344
+ [
345
+ "airlines",
346
+ 0.38180404901504517
347
+ ],
348
+ [
349
+ "airline",
350
+ 0.35821104049682617
351
+ ],
352
+ [
353
+ "mileage",
354
+ 0.3231382966041565
355
+ ],
356
+ [
357
+ "loyalty",
358
+ 0.30341872572898865
359
+ ],
360
+ [
361
+ "booking",
362
+ 0.2877732515335083
363
+ ],
364
+ [
365
+ "singapore",
366
+ 0.28564366698265076
367
+ ],
368
+ [
369
+ "alliance",
370
+ 0.27681979537010193
371
+ ],
372
+ [
373
+ "redeeming",
374
+ 0.2671453356742859
375
+ ],
376
+ [
377
+ "service",
378
+ 0.25483736395835876
379
+ ]
380
+ ],
381
+ "8": [
382
+ [
383
+ "airline",
384
+ 0.582975447177887
385
+ ],
386
+ [
387
+ "flight",
388
+ 0.4120853543281555
389
+ ],
390
+ [
391
+ "reviews",
392
+ 0.37630897760391235
393
+ ],
394
+ [
395
+ "booking",
396
+ 0.37470388412475586
397
+ ],
398
+ [
399
+ "customer",
400
+ 0.341844379901886
401
+ ],
402
+ [
403
+ "review",
404
+ 0.3333813548088074
405
+ ],
406
+ [
407
+ "flyer",
408
+ 0.32562053203582764
409
+ ],
410
+ [
411
+ "agency",
412
+ 0.29199445247650146
413
+ ],
414
+ [
415
+ "ticket",
416
+ 0.2773098945617676
417
+ ],
418
+ [
419
+ "service",
420
+ 0.25537988543510437
421
+ ]
422
+ ]
423
+ },
424
+ "topics": [
425
+ 0,
426
+ 8,
427
+ -1,
428
+ 1,
429
+ 5,
430
+ 6,
431
+ 0,
432
+ 2,
433
+ 4,
434
+ 1,
435
+ 7,
436
+ 3,
437
+ -1,
438
+ -1,
439
+ 3,
440
+ 8,
441
+ 0,
442
+ 4,
443
+ 2,
444
+ 3,
445
+ -1,
446
+ 7,
447
+ 3,
448
+ 1,
449
+ 4,
450
+ 1,
451
+ 5,
452
+ -1,
453
+ 2,
454
+ -1,
455
+ 4,
456
+ 6,
457
+ 8,
458
+ 0,
459
+ 8,
460
+ 3,
461
+ 0,
462
+ 7,
463
+ 6,
464
+ 0,
465
+ -1,
466
+ 4,
467
+ -1,
468
+ 0,
469
+ 0,
470
+ -1,
471
+ 0,
472
+ 1,
473
+ 0,
474
+ -1,
475
+ 0,
476
+ 0,
477
+ 0,
478
+ 0,
479
+ 0,
480
+ -1,
481
+ 0,
482
+ 6,
483
+ 5,
484
+ 2,
485
+ 0,
486
+ 0,
487
+ 2,
488
+ 5,
489
+ 1,
490
+ 0,
491
+ -1,
492
+ -1,
493
+ 4,
494
+ 7,
495
+ 2,
496
+ 0,
497
+ -1,
498
+ 5,
499
+ 8,
500
+ 1,
501
+ -1,
502
+ 4,
503
+ 0,
504
+ 0,
505
+ -1,
506
+ 1,
507
+ 3,
508
+ -1,
509
+ 6,
510
+ 1,
511
+ 0,
512
+ 6,
513
+ 2,
514
+ 1,
515
+ 6,
516
+ 7,
517
+ 1,
518
+ -1,
519
+ 3,
520
+ -1,
521
+ 0,
522
+ -1,
523
+ 0,
524
+ 0,
525
+ 5,
526
+ 0,
527
+ -1,
528
+ 1,
529
+ 3,
530
+ 5,
531
+ -1,
532
+ -1,
533
+ 3,
534
+ 1,
535
+ 8,
536
+ 0,
537
+ -1,
538
+ -1,
539
+ 0,
540
+ -1,
541
+ -1,
542
+ 0,
543
+ 6,
544
+ -1,
545
+ 0,
546
+ 0,
547
+ 5,
548
+ 3,
549
+ 0,
550
+ 2,
551
+ 0,
552
+ 1,
553
+ 2,
554
+ 0,
555
+ 2,
556
+ 6,
557
+ 0,
558
+ 2,
559
+ 7,
560
+ 4,
561
+ 2,
562
+ 0,
563
+ -1,
564
+ 7,
565
+ 0,
566
+ 0,
567
+ 2,
568
+ -1,
569
+ 5,
570
+ 0,
571
+ 2,
572
+ -1,
573
+ 1,
574
+ 0,
575
+ 1,
576
+ 0,
577
+ 1,
578
+ 1,
579
+ 1,
580
+ 0,
581
+ 3,
582
+ 4,
583
+ 1,
584
+ 4
585
+ ],
586
+ "topic_sizes": {
587
+ "0": 43,
588
+ "8": 6,
589
+ "-1": 31,
590
+ "1": 20,
591
+ "5": 9,
592
+ "6": 9,
593
+ "2": 14,
594
+ "4": 10,
595
+ "7": 7,
596
+ "3": 11
597
+ },
598
+ "topic_mapper": [
599
+ [
600
+ -1,
601
+ -1,
602
+ -1
603
+ ],
604
+ [
605
+ 0,
606
+ 0,
607
+ 0
608
+ ],
609
+ [
610
+ 1,
611
+ 1,
612
+ 4
613
+ ],
614
+ [
615
+ 2,
616
+ 2,
617
+ 8
618
+ ],
619
+ [
620
+ 3,
621
+ 3,
622
+ 6
623
+ ],
624
+ [
625
+ 4,
626
+ 4,
627
+ 3
628
+ ],
629
+ [
630
+ 5,
631
+ 5,
632
+ 7
633
+ ],
634
+ [
635
+ 6,
636
+ 6,
637
+ 2
638
+ ],
639
+ [
640
+ 7,
641
+ 7,
642
+ 1
643
+ ],
644
+ [
645
+ 8,
646
+ 8,
647
+ 5
648
+ ]
649
+ ],
650
+ "topic_labels": {
651
+ "-1": "-1_airline_airlines_flights_refund",
652
+ "0": "0_airline_airlines_flights_singapore",
653
+ "1": "1_refund_airline_airlines_complaint",
654
+ "2": "2_baggage_luggage_airlines_airline",
655
+ "3": "3_airlines_passengers_seats_flight",
656
+ "4": "4_refund_repayment_sia_customer",
657
+ "5": "5_airlines_airline_fees_singapore",
658
+ "6": "6_refund_airline_cancellation_booking",
659
+ "7": "7_miles_airlines_airline_mileage",
660
+ "8": "8_airline_flight_reviews_booking"
661
+ },
662
+ "custom_labels": null,
663
+ "_outliers": 1,
664
+ "topic_aspects": {}
665
+ }