import matplotlib.pyplot as plt
import numpy as np
import requests
import jieba
import csv
from PIL import Image
from collections import Counter
from wordcloud import WordCloud
def wordCloud(csv_url):
r = requests.get(csv_url)
with open('wordCloudData/news.csv', 'wb') as f:
f.write(r.content)
news = []
## Read csv file from news.csv
with open('wordCloudData/news.csv', 'r', encoding='utf-8-sig') as f:
reader = csv.DictReader(f)
for row in reader:
news.append({
'title': row['Title'],
'content': row['Content']
})
# Replace_words
replace_list = [
'\n',
'\r',
'
',
'
',
'的',
'及',
'於',
'並',
'113'
]
with open('wordCloudData/news.txt', 'w', encoding='utf-8') as f:
for n in news:
try:
title = n['title']
content = n['content']
for replace_word in replace_list:
title = title.replace(replace_word, ' ')
content = content.replace(replace_word, ' ')
f.write(title + '\n')
f.write(content + '\n')
f.write('\n')
# print(n)
except:
pass
text = open('wordCloudData/news.txt', "r",encoding="utf-8").read() #讀文字資料
jieba.set_dictionary('wordCloudData/dict.txt')
with open('wordCloudData/stopWord_test.txt', 'r', encoding='utf-8-sig') as f: #設定停用詞
stops = f.read().split('\n')
terms = [] #儲存字詞
for t in jieba.cut(text, cut_all=False): #拆解句子為字詞
if t not in stops: #不是停用詞
terms.append(t)
diction = Counter([x for x in terms if x != '\n'])
font = 'wordCloudData/msyh.ttc' #設定字型
mask = np.array(Image.open("wordCloudData/car.jpg")) #設定文字雲形狀
wordcloud = WordCloud(font_path=font)
wordcloud = WordCloud(background_color="white",mask=mask,font_path=font) #背景顏色預設黑色,改為白色
wordcloud.generate_from_frequencies(frequencies=diction) #產生文字雲
#產生圖片
plt.figure(figsize=(6,6))
plt.imshow(wordcloud)
plt.axis("off")
wordcloud.to_file("wordCloudData/news_Wordcloud.png") #存檔
return Image.open("wordCloudData/news_Wordcloud.png")