import matplotlib.pyplot as plt
import numpy as np
import requests
import jieba
import csv

from PIL import Image
from collections import Counter
from wordcloud import WordCloud

def wordCloud(csv_url):
    r = requests.get(csv_url)
    with open('wordCloudData/news.csv', 'wb') as f:
        f.write(r.content)

    news = []
    ## Read csv file from news.csv
    with open('wordCloudData/news.csv', 'r', encoding='utf-8-sig') as f:
        reader = csv.DictReader(f)
        for row in reader:
            news.append({
                'title': row['Title'], 
                'content': row['Content']
            })

    # Replace_words
    replace_list = [
        '\n',
        '\r',
        '<br />',
        '<br>',
        '的',
        '及',
        '於',
        '並',
        '113'
    ]

    with open('wordCloudData/news.txt', 'w', encoding='utf-8') as f:
        for n in news:
            try:
                title = n['title']
                content = n['content']
                for replace_word in replace_list:
                    title = title.replace(replace_word, ' ')
                    content = content.replace(replace_word, ' ')
                f.write(title + '\n')
                f.write(content + '\n')
                f.write('\n')
                # print(n)
            except:
                pass

    text = open('wordCloudData/news.txt', "r",encoding="utf-8").read()  #讀文字資料

    jieba.set_dictionary('wordCloudData/dict.txt')
    with open('wordCloudData/stopWord_test.txt', 'r', encoding='utf-8-sig') as f:  #設定停用詞
        stops = f.read().split('\n')
    terms = []  #儲存字詞
    for t in jieba.cut(text, cut_all=False):  #拆解句子為字詞
        if t not in stops:  #不是停用詞
            terms.append(t)
    diction = Counter([x for x in terms if x != '\n'])

    font = 'wordCloudData/msyh.ttc'  #設定字型
    mask = np.array(Image.open("wordCloudData/car.jpg"))  #設定文字雲形狀
    wordcloud = WordCloud(font_path=font)
    wordcloud = WordCloud(background_color="white",mask=mask,font_path=font)  #背景顏色預設黑色,改為白色
    wordcloud.generate_from_frequencies(frequencies=diction)  #產生文字雲

    #產生圖片
    plt.figure(figsize=(6,6))
    plt.imshow(wordcloud)
    plt.axis("off")

    wordcloud.to_file("wordCloudData/news_Wordcloud.png")  #存檔

    return Image.open("wordCloudData/news_Wordcloud.png")