刚开始学python,选了这个题目,把代码放上来留念,没有用到很流行的框架,所以代码量挺大
GUI用wxpython写的# _*_ coding: UTF-8 _*_
import osimport reimport requestsimport sysimport wximport tracebackfrom datetime import datetimefrom datetime import timedeltafrom lxml import etreeimport data_analysisglobal file_path
file_path = ''class Wb(wx.App): def Operate(self): self.cookie = {} self.username = '' # 用户名,如“Dear-迪丽热巴” self.Number = 0 # 用户全部微博数 self.number1 = 0 # 爬取到的微博数 self.Guanzhu = 0 self.fans = 0 self.Content = [] # 微博内容 self.Time = [] # 微博发布时间 self.star = [] # 微博对应的点赞数 self.Zhuanfa = [] # 微博对应的转发数 self.Pinglun = [] # 微博对应的评论数 self.publish_tool = [] self.Id = 0000###======================================================================================================###======================================GUI=============================================================# 建立一个窗口和frame控件
self.frame_operate = wx.Frame( None, title="Weibo_Spider_GUI", size=(500, 500)) self.panel_operate = wx.Panel(self.frame_operate, -1)# 设置字体格式
self.font1 = wx.Font(18, wx.ROMAN, wx.ITALIC, wx.NORMAL) self.label1 = wx.StaticText( self.panel_operate, -1, "WeiBo Spider", pos=(180, 60), style=wx.ALIGN_CENTER) self.label1.SetFont(self.font1)# cookie的标签和文本框
self.label2 = wx.StaticText( self.panel_operate, -1, "请输入您微博登陆的有效cookie", pos=(160, 130), style=wx.ALIGN_CENTER) self.textCookie = wx.TextCtrl( self.panel_operate, -1, pos=(200, 150), size=(80, 20), style=wx.TE_CENTER)# 获取所爬取用户的self.Id
self.label3 = wx.StaticText( self.panel_operate, -1, "请输入您所要爬取微博账号的self.Id", pos=(160, 180), style=wx.ALIGN_CENTER) self.textId = wx.TextCtrl( self.panel_operate, -1, pos=(200, 200), size=(80, 20), style=wx.TE_CENTER)# 文件存储路径
self.label4 = wx.StaticText(self.panel_operate,-1,"数据文件保存路径", pos=(160,230),style=wx.ALIGN_CENTER) self.textFile_path = wx.TextCtrl(self.panel_operate,-1,pos=(200,250),size=(80,20),style=wx.TE_CENTER) # 设置开始爬虫按钮 self.button_start = wx.Button(self.panel_operate, -1, "开始爬取微博信息", pos=(200, 350)) # 绑定响应事件 self.Bind(wx.EVT_BUTTON, self.get_cookie, self.button_start) self.frame_operate.Show()# 微博的正式UI界面-----------------------------------------------------------------------------------
# 获取用户输入的参数值 #注意getvalue不能和用户输入放在一个函数里,要分开写,而且,获取不同的值,也要放在不同函数里!! def get_cookie(self,event): self.cookie = {"Cookie": self.textCookie.GetValue()} self.Id=int(self.textId.GetValue()) global file_path file_path = self.textFile_path.GetValue()+os.sep+"%d" % self.Id + ".txt" self.Onbutton_Start()def Onbutton_Start(self):
self.GetName() #获取用户名 self.GetSimple_Info() # 获取微博数,转发量,关注数,粉丝数 self.weibo_para() self.write_txt() self.weibo_UI1()def weibo_UI1(self):
# 建立新的窗口,展示用户的信息 # 弹出文本框:数据读取完毕 message = "文件爬取完毕" wx.MessageBox(message) self.weibo_UI2()def weibo_UI2(self):
self.frame_operate.Destroy() self.frame_Info = wx.Frame(None,title="User_Information",size=(500,500)) self.panel_Info = wx.Panel(self.frame_Info,-1) t1 = "用户昵称:" + str(self.username) t2 = "微博数:" + str(self.Number) t3 = "粉丝数:"+str(self.fans) t4 = "关注数:"+str(self.Guanzhu) self.label16 = wx.StaticText(self.panel_Info,-1,self.username,pos=(200,100),style=wx.ALIGN_LEFT) self.label5 = wx.StaticText(self.panel_Info,-1,t1,pos=(180,130),style=wx.ALIGN_LEFT) self.label13 = wx.StaticText(self.panel_Info,-1,t2,pos=(180,150),style=wx.ALIGN_LEFT) self.label14 = wx.StaticText(self.panel_Info,-1,t3,pos=(180,170),style=wx.ALIGN_LEFT) self.label15 = wx.StaticText(self.panel_Info,-1,t4,pos=(180,190),style=wx.ALIGN_LEFT) self.font2 = wx.Font(13,wx.SCRIPT,wx.ITALIC,wx.NORMAL) #小字体 font1大字体 self.label16.SetFont(self.font1) self.label5.SetFont(self.font2) self.label13.SetFont(self.font2) self.label14.SetFont(self.font2) self.label15.SetFont(self.font2)self.button_news = wx.Button(self.panel_Info,-1,"查看最近微博",pos=(220,280))
self.Bind(wx.EVT_BUTTON,self.weibo_UI3 ,self.button_news) self.frame_Info.Show()# 最进微博
def weibo_UI3(self,event): self.frame_Info.Destroy() self.frame_news = wx.Frame(None,title="---",size=(500,500)) self.panel_news = wx.Panel(self.frame_news,-1) label18 = wx.StaticText(self.panel_news,-1,"最新微博动态",pos=(200,40)) if self.Content: text1 = "最新/置顶 微博为: " + self.Content[0] text2 = "最新/置顶 微博发布工具: " + self.publish_tool[0] text3 = "最新/置顶 微博发布时间: " + self.Time[0] text4 = "最新/置顶 微博获得赞数: " + str(self.star[0]) text5 = "最新/置顶 微博获得转发数: " + str(self.Zhuanfa[0]) text6 = "最新/置顶 微博获得评论数: " + str(self.Pinglun[0])self.label6 = wx.TextCtrl(self.panel_news,-1,text1,pos=(90,60),size=(250,140), style=wx.TE_MULTILINE|wx.TE_RICH)
self.label7 = wx.StaticText(self.panel_news,-1,text2,pos=(90,200),style=wx.ALIGN_LEFT) self.label8 = wx.StaticText(self.panel_news,-1,text3,pos=(90,220),style=wx.ALIGN_LEFT) self.label9 = wx.StaticText(self.panel_news,-1,text4,pos=(90,240),style=wx.ALIGN_LEFT) self.label10 = wx.StaticText(self.panel_news,-1,text5,pos=(90,260),style=wx.ALIGN_LEFT) self.label11 = wx.StaticText(self.panel_news,-1,text6,pos=(90,280),style=wx.ALIGN_LEFT)# 查看微博信息
self.Button_info = wx.Button(self.panel_news,-1,"点击查看之前的微博内容",pos=(220,340)) self.Bind(wx.EVT_BUTTON,self.weibo_pre_info,self.Button_info) # 查看爬虫信息的文档 self.Button_file = wx.Button(self.panel_news,-1,"点击查看微博数据分析图表",pos=(220,380)) self.Bind(wx.EVT_BUTTON,self.analysis_UI,self.Button_file) self.frame_news.Show() def analysis_UI(self,event): self.frame_data = wx.Frame(None,title="data_analysis--20177830115",size=(500,500)) self.panel_data = wx.Panel(self.frame_data,-1) text1 = "2017-2018微博转发/点赞量折线统计图" text2 = '原创微博与转发微博统计图' text3 = '微博发布工具统计图' text4 = '微博使用心情统计图' self.button_1 = wx.Button(self.panel_data,-1,text1,pos=(180,120)) self.button_2 = wx.Button(self.panel_data,-1,text2,pos=(180,160)) self.button_3 = wx.Button(self.panel_data,-1,text3,pos=(180,200)) self.button_4 = wx.Button(self.panel_data,-1,text4,pos=(180,240)) self.Bind(wx.EVT_BUTTON,self.figure_1,self.button_1) self.Bind(wx.EVT_BUTTON,self.figure_2,self.button_2) self.Bind(wx.EVT_BUTTON,self.figure_3,self.button_3) self.Bind(wx.EVT_BUTTON,self.figure_4,self.button_4) self.frame_data.Show()def figure_1(self,event):
global file_path figure = data_analysis.analysis(file_path,self.Number) figure.analyse_Zhexian()def figure_2(self,event):
global file_path figure = data_analysis.analysis(file_path,self.Number) figure.analyse_YC()def figure_3(self,event):
global file_path figure = data_analysis.analysis(file_path,self.Number) figure.analyse_GJ()def figure_4(self,event):
global file_path figure = data_analysis.analysis(file_path,self.Number) figure.analyse_XQ()def weibo_pre_info(self,event): ## 过度函数,为了让不断进入weibo_info函数中(分条输出)不报错。(多次进入没有event触发)
self.weibo_info()def weibo_info(self):
#flag = 1#计次函数,flag==1,继续循环,flag==0退出循环,即不展示下一条微博 ## 这坑爹玩意根本不能用for循环,所以我只能不断进入函数 self.s = wx.Frame(None,title="---",size=(500,500)) self.f = wx.Panel(self.s,-1) #for i in range(1,self.Number+1): text1 = str(self.a+1)+":" + self.Content[self.a] text2 = "发布工具: " + self.publish_tool[self.a] text3 = "发布时间: " + self.Time[self.a] text4 = "点赞数: " + str(self.star[self.a]) text5 = "转发数: " + str(self.Zhuanfa[self.a]) text6 = "评论数: " + str(self.Pinglun[self.a])self.labela = wx.TextCtrl (self.f,-1,text1,pos=(80, 60),size=(250,140),style=wx.TE_MULTILINE|wx.TE_RICH) ##坑爹玩意,静态文本控件只能单行输出,就是不能多行!网上查的可以通过“...XXX~r XXX..”这样,
#但是相当无比麻烦,而且输出都是乱的,除非一条条设置?可能吗!!于是剑走偏锋,选择了用textCtr控件代替静态文本,就是可以改变框里的值,但是效果确实达到了。 self.labelb = wx.StaticText(self.f,-1,text2,pos=(80,200),style=wx.ALIGN_LEFT) self.labelc = wx.StaticText(self.f,-1,text3,pos=(80,220),style=wx.ALIGN_LEFT) self.labeld = wx.StaticText(self.f,-1,text4,pos=(80,240),style=wx.ALIGN_LEFT) self.labele = wx.StaticText(self.f,-1,text5,pos=(80,260),style=wx.ALIGN_LEFT) self.labelf = wx.StaticText(self.f,-1,text6,pos=(80,280),style=wx.ALIGN_LEFT)self.button_next=wx.Button(self.f,-1,"查看下一条",pos=(300,380))
self.button_exit=wx.Button(self.f,-1,"关闭",pos=(100,380))self.Bind(wx.EVT_BUTTON,self.exit,self.button_exit)
self.Bind(wx.EVT_BUTTON,self.cont,self.button_next) self.s.Show() def exit(self,event): self.s.Destroy()def cont(self,event):
self.a += 1 self.s.Destroy() self.weibo_info() 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224具体爬虫部分,参考github某大佬的 # 获取用户昵称 def GetName(self): url = "https://weibo.cn/%d/info" % (self.Id) html = requests.get(url, cookies=self.cookie).content selector = etree.HTML(html) # 将返回的html文档的标签补足 username = selector.xpath("//title/text()")[0]# 获取标签title的所有内容。第一个title就是列表的一个 self.username = username[:-3] # XXX的微博,后面三个字切片即为用户昵称# 获取用户微博数、关注数、粉丝数
def GetSimple_Info(self):url = "https://weibo.cn/u/%d?&page=1" % (self.Id)
html = requests.get(url, cookies=self.cookie).content selector = etree.HTML(html) # 转化为标准的HTML pattern = r"\d+\.?\d*" # 微博数 wb_num = selector.xpath("//div[@class='tip2']/span[@class='tc']/text()")[0]# <div class="tip2"><span class="tc">微博[1543]</span>  regx = re.findall(pattern, wb_num, re.S | re.M) # 只要数字(字符) for value in regx: num_wb = int(value) break self.Number = num_wb# 关注数
str_gz = selector.xpath("//div[@class='tip2']/a/text()")[0] regx = re.findall(pattern, str_gz, re.M) self.Guanzhu = int(regx[0])# 粉丝数
str_fs = selector.xpath("//div[@class='tip2']/a/text()")[1] regx = re.findall(pattern, str_fs, re.M) self.fans = int(regx[0])# 获取"长微博"全部文字内容
def GetLong(self, weibo_link): html = requests.get(weibo_link, cookies=self.cookie).content selector = etree.HTML(html) info = selector.xpath("//div[@class='c']")[1] wb_content = info.xpath("div/span[@class='ctt']")[0].xpath( "string(.)").replace(u"\u200b", "").encode(sys.stdout.encoding, "ignore").decode( sys.stdout.encoding) return wb_content# 获取转发微博信息
def GetZhuanfa(self, is_retweet, info, wb_content): original_user = is_retweet[0].xpath("a/text()") if not original_user: wb_content = u"转发微博已被删除" return wb_content else: original_user = original_user[0] retweet_reason = info.xpath("div")[-1].xpath("string(.)").replace(u"\u200b", "").encode( sys.stdout.encoding, "ignore").decode( sys.stdout.encoding) retweet_reason = retweet_reason[:retweet_reason.rindex(u"赞")] wb_content = (retweet_reason + "\n" + u"原始用户: " + original_user + "\n" + u"转发内容: " + wb_content) return wb_content #一个界面展示一条微博的发布时间、点赞数、转发数、评论数 def weibo_para(self):url = "https://weibo.cn/u/%d?&page=1" % (self.Id)
html = requests.get(url, cookies=self.cookie).content selector = etree.HTML(html) if selector.xpath("//input[@name='mp']") == []: page_num = 1 else: page_num = (int)(selector.xpath( "//input[@name='mp']")[0].attrib["value"]) pattern = r"\d+\.?\d*" for page in range(1, page_num + 1): url2 = "https://weibo.cn/u/%d?&page=%d" % ( self.Id, page) html2 = requests.get(url2, cookies=self.cookie).content selector2 = etree.HTML(html2) info = selector2.xpath("//div[@class='c']") is_empty = info[0].xpath("div/span[@class='ctt']") if is_empty: for i in range(0, len(info) - 2): # 微博内容 str_t = info[i].xpath("div/span[@class='ctt']") Content = str_t[0].xpath("string(.)").replace(u"\u200b", "").encode( sys.stdout.encoding, "ignore").decode( sys.stdout.encoding) Content = Content[:-1] weibo_Id = info[i].xpath("@id")[0][2:] a_link = info[i].xpath( "div/span[@class='ctt']/a") is_retweet = info[i].xpath("div/span[@class='cmt']") if a_link: if a_link[-1].xpath("text()")[0] == u"全文": weibo_link = "https://weibo.cn/comment/" + weibo_Id wb_content = self.GetLong(weibo_link) if wb_content: if not is_retweet: wb_content = wb_content[1:] Content = wb_content if is_retweet: Content = self.GetZhuanfa( is_retweet, info[i], Content) self.Content.append(Content) # 微博发布时间 str_time = info[i].xpath("div/span[@class='ct']") str_time = str_time[0].xpath("string(.)").encode(sys.stdout.encoding, "ignore").decode( sys.stdout.encoding) Time = str_time.split(u'来自')[0] if u"刚刚" in Time: Time = datetime.now().strftime('%Y-%m-%d %H:%M') elif u"分钟" in Time: minute = Time[:Time.find(u"分钟")] minute = timedelta(minutes=int(minute)) Time = (datetime.now() - minute).strftime("%Y-%m-%d %H:%M") elif u"今天" in Time: today = datetime.now().strftime("%Y-%m-%d") time = Time[3:] Time = today + " " + time elif u"月" in Time: year = datetime.now().strftime("%Y") month = Time[0:2] day = Time[3:5] time = Time[7:12] Time = ( year + "-" + month + "-" + day + " " + time) else: Time = Time[:16] self.Time.append(Time)str_footer = info[i].xpath("div")[-1]
str_footer = str_footer.xpath("string(.)").encode( sys.stdout.encoding, "ignore").decode(sys.stdout.encoding) str_footer = str_footer[str_footer.rfind(u'赞'):] regx = re.findall(pattern, str_footer, re.M)# 微博发布工具
if len(str_time.split(u'来自')) > 1: publish_tool = str_time.split(u'来自')[1] else: publish_tool = u"无" self.publish_tool.append(publish_tool) str_footer = info[i].xpath("div")[-1] str_footer = str_footer.xpath("string(.)").encode( sys.stdout.encoding, "ignore").decode(sys.stdout.encoding) str_footer = str_footer[str_footer.rfind(u'赞'):] guid = re.findall(pattern, str_footer, re.M)# 点赞数
star = int(regx[0]) self.star.append(star)# 转发数
Zhuanfa = int(regx[1]) self.Zhuanfa.append(Zhuanfa)# 评论数
Pinglun = int(regx[2]) self.Pinglun.append(Pinglun) self.number1 += 1# 将爬取的信息写入文件--------------------------------------------------------------------------
def write_txt(self): try: contents_header = u"\n\n微博内容: \n" contents = (u"用户信息\n用户昵称:" + self.username + u"\n用户Id: " + str(self.Id) + u"\n微博数: " + str(self.Number) + u"\n关注数: " + str(self.Guanzhu) + u"\n粉丝数: " + str(self.fans) + contents_header + '\n')for i in range(1, self.number1 + 1):
text = (str(i) + ":" + self.Content[i - 1] + "\n" + u"发布工具: " + self.publish_tool[i - 1] + "\n" + u"发布时间: " + self.Time[i - 1] + "\n" + u"点赞数: " + str(self.star[i - 1]) + u"转发数: " + str(self.Zhuanfa[i - 1]) + u"评论数: " + str(self.Pinglun[i - 1]) + "\n\n") contents = contents + textglobal file_path
f = open(file_path, "wb") f.write(contents.encode(sys.stdout.encoding)) f.close()except Exception as e:
print("Error: ", e) traceback.print_exc()1
23456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193测试函数部分def main():
weibo = Wb() weibo.Operate() weibo.MainLoop()if __name__ == "__main__":
main()1234567数据分析部分:用matplotlib制图,只是粗浅学了一些,所以画的不够精美,数据过少,分析的可能有点问题,emmm,一共画了四张图, “2017-2018微博转发/点赞量折线统计图”、 ‘原创微博与转发微博统计图’ 、 ‘微博发布工具统计图’、 ‘微博使用心情统计图’import re
import numpy as npimport matplotlib.pyplot as pltimport matplotlib.dates as mdatefrom matplotlib import font_manager as fmimport timefrom datetime import datetimeimport webbrowserclass analysis(object):
def __init__(self,file_name,number):
self.file_name = file_name self.number = number self.X_data = [] self.Y1_data = [] self.Y_data = [] self.str = ""## 折线图展示窗口
def analyse_Zhexian(self): pattern = re.compile(r'转发数: \d+') # 查找数字 pattern1 = re.compile(r'\d+')#匹配转发数或者评论数的数字字符 pattern2 = re.compile(r'发布时间: (\d{4}-\d{1,2}-\d{1,2}\s\d{1,2}:\d{1,2})')#提取时间 pattern3 = re.compile(r'.*2016.*') pattern4 = re.compile(r'点赞数: \d+')with open(self.file_name,encoding = "utf-8") as f:
str = f.read() # 用正则表达式提取所需数据 result = pattern.findall(str) ls3 = ''.join(result)#拼接成一个字符串 Result = pattern1.findall(ls3)#把所有数字提取完毕 Num_Zhuanfa = [ int(x) for x in Result ] # y轴1result1 = pattern4.findall(str)
ls1 = ''.join(result1) Result1 = pattern1.findall(ls1) Num_Dianzan = [int(x) for x in Result1] # y轴2 # 将x轴数据转化为datetime 类型 Num_Zhuanfa_time = pattern2.findall(str) for i in range(0,len(Num_Zhuanfa_time)): if pattern3.findall(Num_Zhuanfa_time[i]): stop = i # 用stop记录2016年的最后一条微博 break#只选择2017-2018年两年的数据,因为微博数太多,横轴日期占比太大,matplotlib的横坐标显示不完全,中间会有大量重叠,这里数据分析的算法并不好,结果出来还是会有很大重叠,如果有更好的办法请大佬指教
Num_Zhuanfa = Num_Zhuanfa[0:stop:1] Num_Zhuanfa_time = Num_Zhuanfa_time[0:stop:1] Num_Dianzan = Num_Dianzan[0:stop:1]# 数据除以1000,画图更美观
for i in range(0,len(Num_Zhuanfa)): Num_Zhuanfa[i] = Num_Zhuanfa[i] /1000 for i in range(0,len(Num_Dianzan)): Num_Dianzan[i] = Num_Dianzan[i] /1000#将时间转化为时间戳再转化为datetime类型
aa=[time.strptime(i, "%Y-%m-%d %H:%M") for i in Num_Zhuanfa_time] timeStamp = [int(time.mktime(a)) for a in aa] Num_Zhuanfa_time=[datetime.fromtimestamp(k) for k in timeStamp]# 处理数据量过多的问题
number = len(Num_Zhuanfa) Group = int(0.18 * number) k = number // Group # 数据太多,这里只要15%的数据,分组,每组随机选一个作为代表数据 for i in range(0,Group): self.X_data.append(Num_Zhuanfa_time[i*k]) self.Y_data.append(Num_Zhuanfa[i*k]) self.Y1_data.append(Num_Dianzan[i*k])# 绘制两条折线
fig1 = plt.figure(figsize=(8,5)) plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签 ax1 = fig1.add_subplot(1,1,1) ax1.xaxis.set_major_formatter(mdate.DateFormatter('%Y-%m-%d %H-%M'))#设置时间标签显示格式 plt.xticks(self.X_data,rotation=90)#竖着输出时间 plt.yticks(np.linspace(0,5000,5,endpoint=True)) plt.title(u"2017-2018微博转发/点赞量折线图",color="black") plt.plot(self.X_data,self.Y_data,"o-",color='skyblue',label="转发量",markersize=1.5) #折线 plt.plot(self.X_data,self.Y1_data,"o-",color='pink',label="点赞量",markersize=1.5) plt.xlabel("发布时间") plt.ylabel("数量(千/条)") plt.legend() # 显示标签 plt.show()def analyse_YC(self):
pattern = re.compile(r'转发理由')with open(self.file_name,encoding = "utf-8") as f:
str = f.read() Zhuanfa = pattern.findall(str) Number_Zhuanfa = int(len(Zhuanfa)) Yuanchuang = self.number - Number_Zhuanfaplt.rcParams['font.sans-serif'] = ['SimHei']
labels = ['转发微博','原创微博'] sizes = [Number_Zhuanfa,Yuanchuang] explode= (0.1,0) plt.pie(sizes,explode=explode,labels=labels,autopct='%1.1f%%',shadow=False,startangle=150) plt.title(u"原创与转发微博量",color="black") plt.show()def analyse_GJ(self):
pattern = re.compile(r'发布工具: (.*)\n发布时间') with open(self.file_name,encoding = "utf-8") as f: str = f.read() number_GJ = pattern.findall(str) #print(number_GJ) gongju = dict() for i in number_GJ: name = i if name in gongju: gongju[name]+=1 else: gongju[name]=1# 少于10的记录舍去
for key in list(gongju.keys()): if gongju[key]<=10: del gongju[key]labels = list(gongju.keys())
sizes = list(gongju.values()) plt.rcParams['font.sans-serif'] = ['SimHei'] plt.pie(sizes,labels=labels,autopct='%1.1f%%',shadow=True,startangle=150) plt.title(u"微博发布工具统计",color="black") plt.show()def analyse_XQ(self):
pattern = re.compile(r'\[(.{1,4})\].*\[(.{1,4})\]') with open(self.file_name,encoding = "utf-8") as f: str = f.read() number_XQ = pattern.findall(str) # print(number_XQ) a=[] for i in range(0,len(number_XQ)): for j in (range(0,len(number_XQ[i]))): a.append(number_XQ[i][j])biaoqing = dict()
for i in a:
name = i if name in biaoqing: biaoqing[name]+=1 else: biaoqing[name]=1for key in list(biaoqing.keys()):
if biaoqing[key] <= 2: del biaoqing[key]labels = list(biaoqing.keys())
sizes = list(biaoqing.values()) fig1, ax1 = plt.subplots(http://www.my516.com) patches, texts, autotexts = ax1.pie(sizes, labels=labels, autopct='%1.0f%%', shadow=False, startangle=170) ax1.axis('equal') #重新设置字体大小 plt.rcParams['font.sans-serif'] = ['SimHei'] proptease = fm.FontProperties() proptease.set_size('small') plt.title(u"微博表情使用次数",color="black") plt.setp(autotexts, fontproperties=proptease) plt.setp(texts, fontproperties=proptease) plt.show()1
23456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170python程序打包#在cmd下安装pyinstaller
pip install pyinstaller#打包成一个可执行文件 -F (注意将cmd窗口切换至文件保存的路径下)pyinstaller -F filename.py1
2345本篇只适合新手简单学习,笔者也刚学,加上复习周,后期会逐渐完善,毕竟UI写的太丑了 !另:关于获取本地用户cookie和微博账号的id操作比较简单在此不再做详细解释。如果程序跑不出来相信我一定是cookie问题---------------------