From cacafd887e8deb1009061941b1c57ac3db518476 Mon Sep 17 00:00:00 2001 From: Draven Date: Tue, 18 Apr 2017 17:00:55 +0800 Subject: [PATCH 1/4] Update README.md --- README.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/README.md b/README.md index 3edd163..3b7c40b 100644 --- a/README.md +++ b/README.md @@ -10,3 +10,12 @@ 请遵循robots协议 http://www.shixiseng.com/robots.txt + + +下面是一些可视化 + +![](http://i.imgur.com/21pzfeY.png) +![](http://i.imgur.com/4reCtJM.png) +![](http://i.imgur.com/PcjXkyx.png) + +![](http://i.imgur.com/xGOvq91.png) From c2eac7832095960d723be664b34d966d3f5ebfdc Mon Sep 17 00:00:00 2001 From: Draven Date: Tue, 18 Apr 2017 17:01:30 +0800 Subject: [PATCH 2/4] =?UTF-8?q?Rename=20lalala.py=20to=20=E6=8A=93?= =?UTF-8?q?=E5=8F=96=E5=B9=B6=E4=BF=9D=E5=AD=98=E5=88=B0csv.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...217\226\345\271\266\344\277\235\345\255\230\345\210\260csv.py" | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename lalala.py => "\346\212\223\345\217\226\345\271\266\344\277\235\345\255\230\345\210\260csv.py" (100%) diff --git a/lalala.py "b/\346\212\223\345\217\226\345\271\266\344\277\235\345\255\230\345\210\260csv.py" similarity index 100% rename from lalala.py rename to "\346\212\223\345\217\226\345\271\266\344\277\235\345\255\230\345\210\260csv.py" From a37d5b4366ff73ce50302df485f78bc8a32e337e Mon Sep 17 00:00:00 2001 From: Draven Date: Tue, 18 Apr 2017 17:02:38 +0800 Subject: [PATCH 3/4] =?UTF-8?q?Create=20=E7=94=9F=E6=88=90=E5=9B=BE?= =?UTF-8?q?=E4=BA=91.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...37\346\210\220\345\233\276\344\272\221.py" | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 "\347\224\237\346\210\220\345\233\276\344\272\221.py" diff --git "a/\347\224\237\346\210\220\345\233\276\344\272\221.py" "b/\347\224\237\346\210\220\345\233\276\344\272\221.py" new file mode 100644 index 0000000..bcccc64 --- /dev/null +++ "b/\347\224\237\346\210\220\345\233\276\344\272\221.py" @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- +import matplotlib.pyplot as plt +import pickle +from wordcloud import WordCloud,STOPWORDS,ImageColorGenerator +import jieba +import codecs + +# 第一次运行程序时将分好的词存入文件 +text = '' +with open('word.txt') as fin:#根据情况可加入encoding='utf-8' + for line in fin.readlines(): + line = line.strip('\n') + text += '/'.join(jieba.cut(line)) + text += ' ' +fout = open('text.txt','wb')#以二进制写模式写入 +pickle.dump(text,fout) +fout.close() + +# 直接从文件读取数据 +fr = open('text.txt','rb') +text = pickle.load(fr) + +backgroud_Image = plt.imread('girl.jpg') #需要自己准备一张图片 +wc = WordCloud( background_color = 'white', # 设置背景颜色 + mask = backgroud_Image, # 设置背景图片 + max_words = 2000, # 设置最大现实的字数 + stopwords = STOPWORDS, # 设置停用词 + font_path = 'MSYH.TTF',# 设置字体格式,如不设置显示不了中文 + max_font_size = 3000, # 设置字体最大值 + random_state = 50, # 设置有多少种随机生成状态,即有多少种配色方案 + ) +wc.generate(text) +image_colors = ImageColorGenerator(backgroud_Image) +#wc.recolor(color_func = image_colors) +plt.imshow(wc) +plt.axis('off') +plt.show() From 9e87b641d3325c88df9f613e1bd1f41b18aa3a33 Mon Sep 17 00:00:00 2001 From: Draven Date: Wed, 27 Sep 2017 17:25:57 +0800 Subject: [PATCH 4/4] =?UTF-8?q?Update=20=E6=8A=93=E5=8F=96=E5=B9=B6?= =?UTF-8?q?=E4=BF=9D=E5=AD=98=E5=88=B0csv.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...7\226\345\271\266\344\277\235\345\255\230\345\210\260csv.py" | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git "a/\346\212\223\345\217\226\345\271\266\344\277\235\345\255\230\345\210\260csv.py" "b/\346\212\223\345\217\226\345\271\266\344\277\235\345\255\230\345\210\260csv.py" index 9f5f769..a4d55af 100644 --- "a/\346\212\223\345\217\226\345\271\266\344\277\235\345\255\230\345\210\260csv.py" +++ "b/\346\212\223\345\217\226\345\271\266\344\277\235\345\255\230\345\210\260csv.py" @@ -4,7 +4,7 @@ from fake_useragent import UserAgent ua = UserAgent() -headers = {'User-Agent': 'ua.random'} +headers = {'User-Agent': ua.random} job = [] location = [] company = []