{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import matplotlib.pyplot as plt\n" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelcommentusersubredditdatesup_commentpredictionconfidenceTopic_key_word
00Actually most of her supporters and sane peopl...Quinnjesterpolitics2016-09Hillary's Surrogotes Told to Blame Media for '...00.974983TODO 2
10They can't survive without an echo chamber whi...TheGettysburgAddressThe_Donald2016-11Thank God Liberals like to live in concentrate...10.956885TODO 2
20you're pretty cute yourself 1729 totalSempiternally_free2007scape2016-11Saw this cutie training his Attack today...00.899885TODO 2
30If you kill me you'll crash the meme marketCatacomb82AskReddit2016-10If you were locked in a room with 49 other peo...00.905721TODO 2
40I bet he wrote that last message as he was sob...Dorian-throwawayniceguys2016-11You're not even that pretty!10.589593TODO 2
..............................
2516031Respect your elders you little snot.Tiffany_Butlersports2009-06Aren't you a little old to be on the internet,...10.852649TODO 1
2516041I'm just glad they won't be using taxpayer mon...harryballsagnacanada2009-06\"I'm sorry, I can't hear you over the sound of...10.974458TODO 0
2516051what.. with this awesome narration?aberantlost2009-04So far, so lame.10.809398TODO 1
2516061He looks trustworthy.permacultureunitedkingdom2009-01\"I don't care\" says Lapland boss10.979738TODO 4
2516071Well yeah, but it'll work this time.SovereignManpolitics2009-02When their efforts failed, as they usually did...10.975283TODO 1
\n", "

5000 rows × 9 columns

\n", "
" ], "text/plain": [ " label comment \\\n", "0 0 Actually most of her supporters and sane peopl... \n", "1 0 They can't survive without an echo chamber whi... \n", "2 0 you're pretty cute yourself 1729 total \n", "3 0 If you kill me you'll crash the meme market \n", "4 0 I bet he wrote that last message as he was sob... \n", "... ... ... \n", "251603 1 Respect your elders you little snot. \n", "251604 1 I'm just glad they won't be using taxpayer mon... \n", "251605 1 what.. with this awesome narration? \n", "251606 1 He looks trustworthy. \n", "251607 1 Well yeah, but it'll work this time. \n", "\n", " user subreddit date \\\n", "0 Quinnjester politics 2016-09 \n", "1 TheGettysburgAddress The_Donald 2016-11 \n", "2 Sempiternally_free 2007scape 2016-11 \n", "3 Catacomb82 AskReddit 2016-10 \n", "4 Dorian-throwaway niceguys 2016-11 \n", "... ... ... ... \n", "251603 Tiffany_Butler sports 2009-06 \n", "251604 harryballsagna canada 2009-06 \n", "251605 aberant lost 2009-04 \n", "251606 permaculture unitedkingdom 2009-01 \n", "251607 SovereignMan politics 2009-02 \n", "\n", " sup_comment prediction \\\n", "0 Hillary's Surrogotes Told to Blame Media for '... 0 \n", "1 Thank God Liberals like to live in concentrate... 1 \n", "2 Saw this cutie training his Attack today... 0 \n", "3 If you were locked in a room with 49 other peo... 0 \n", "4 You're not even that pretty! 1 \n", "... ... ... \n", "251603 Aren't you a little old to be on the internet,... 1 \n", "251604 \"I'm sorry, I can't hear you over the sound of... 1 \n", "251605 So far, so lame. 1 \n", "251606 \"I don't care\" says Lapland boss 1 \n", "251607 When their efforts failed, as they usually did... 1 \n", "\n", " confidence Topic_key_word \n", "0 0.974983 TODO 2 \n", "1 0.956885 TODO 2 \n", "2 0.899885 TODO 2 \n", "3 0.905721 TODO 2 \n", "4 0.589593 TODO 2 \n", "... ... ... \n", "251603 0.852649 TODO 1 \n", "251604 0.974458 TODO 0 \n", "251605 0.809398 TODO 1 \n", "251606 0.979738 TODO 4 \n", "251607 0.975283 TODO 1 \n", "\n", "[5000 rows x 9 columns]" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d = pd.read_csv('./data/results extended.csv', index_col=0)\n", "d" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "TODO:\n", "- [x] Show LDA top words for each topic\n", "- [ ] I topic con una bassa percentuale di ironia sono i topic considerati più \"seri\" (?)\n", "- [x] Per ora sto utilizzando le label assegnate dal dataset, se non avessi le label e dovessi prevedere l'ironia LDA è cmq affidabile?" ] } ], "metadata": { "kernelspec": { "display_name": "torch_new", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.16" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }