import matplotlib.pyplot as plt import numpy as np import requests import jieba import csv from PIL import Image from collections import Counter from wordcloud import WordCloud def wordCloud(csv_url): r = requests.get(csv_url) with open('wordCloudData/news.csv', 'wb') as f: f.write(r.content) news = [] ## Read csv file from news.csv with open('wordCloudData/news.csv', 'r', encoding='utf-8-sig') as f: reader = csv.DictReader(f) for row in reader: news.append({ 'title': row['Title'], 'content': row['Content'] }) # Replace_words replace_list = [ '\n', '\r', '
', '
', '的', '及', '於', '並', '113' ] with open('wordCloudData/news.txt', 'w', encoding='utf-8') as f: for n in news: try: title = n['title'] content = n['content'] for replace_word in replace_list: title = title.replace(replace_word, ' ') content = content.replace(replace_word, ' ') f.write(title + '\n') f.write(content + '\n') f.write('\n') # print(n) except: pass text = open('wordCloudData/news.txt', "r",encoding="utf-8").read() #讀文字資料 jieba.set_dictionary('wordCloudData/dict.txt') with open('wordCloudData/stopWord_test.txt', 'r', encoding='utf-8-sig') as f: #設定停用詞 stops = f.read().split('\n') terms = [] #儲存字詞 for t in jieba.cut(text, cut_all=False): #拆解句子為字詞 if t not in stops: #不是停用詞 terms.append(t) diction = Counter([x for x in terms if x != '\n']) font = 'wordCloudData/msyh.ttc' #設定字型 mask = np.array(Image.open("wordCloudData/car.jpg")) #設定文字雲形狀 wordcloud = WordCloud(font_path=font) wordcloud = WordCloud(background_color="white",mask=mask,font_path=font) #背景顏色預設黑色,改為白色 wordcloud.generate_from_frequencies(frequencies=diction) #產生文字雲 #產生圖片 plt.figure(figsize=(6,6)) plt.imshow(wordcloud) plt.axis("off") wordcloud.to_file("wordCloudData/news_Wordcloud.png") #存檔 return Image.open("wordCloudData/news_Wordcloud.png")