用python写的嗅事百科阅读器

清华大佬耗费三个月吐血整理的几百G的资源,免费分享!....>>>

#!/usr/bin/env python3 
#-*- coding=utf-8 -*- 


import codecs 
import sys 
import urllib.request as request 
import io 
import re 
import gzip 
import lxml.etree as etree 
import logging 
import logging.handlers 
import tkinter as tk 
import tkinter.ttk as tkttk 
import tkinter.messagebox as tkmsg 
import tkinter.filedialog as tkfd 
import tkinter.scrolledtext as tkst 
import webbrowser 
from PIL import Image,ImageTk 




APP_NAME = 'QBReader' 
APP_VER  = 'v0.9' 
DEFAULT_PIC = 'default.jpg' 
PIC_W = 640 
PIC_H = 480 


LOG_DIR = 'log' 
LOG_LV_CH = logging.DEBUG 
LOG_LV_FH = logging.DEBUG 
LOG_FILE = 'QBReader.log' 
MAX_LOG_SIZE = 1024 * 1024  #1MB 
LOG_BACKUP_COUNT = 3 


#if not os.path.exists(LOG_DIR): 
#    os.mkdir(LOG_DIR) 


logger = logging.getLogger('QBReader') 
logger.setLevel(LOG_LV_CH) 
#fh = logging.handlers.RotatingFileHandler(os.path.join(LOG_DIR, LOG_FILE), 
#                                          maxBytes=MAX_LOG_SIZE, 
#                                          backupCount=LOG_BACKUP_COUNT, 
#                                          encoding='utf-8') 
#fh.setLevel(LOG_LV_FH) 
ch = logging.StreamHandler() 
ch.setLevel(LOG_LV_CH) 
formatter = logging.Formatter( 
                "%(asctime)s - %(name)s - %(lineno)d - %(levelname)s - %(message)s") 
#fh.setFormatter(formatter) 
ch.setFormatter(formatter) 
# add the handlers to logger 
#logger.addHandler(fh) 
logger.addHandler(ch) 
DEBUG = logger.debug 
INFO = logger.info 
WARNING = logger.warning 
ERROR = logger.error 




class QiuBai(object): 
    def __init__(self, page_type=""): 
        self.host = 'http://www.qiushibaike.com' 
        self.qiubai_type = "NULL" 
        self.set_type(page_type) 


    def set_type(self, page_type=""): 
        if page_type == "hot": 
            self.url = "http://www.qiushibaike.com/hot/page/%s" 
            self.qiubai_type = "hot" 
        elif page_type == "week": 
            self.url = "http://www.qiushibaike.com/week/page/%s" 
            self.qiubai_type = "week" 
        elif page_type == "now": 
            self.url = "http://www.qiushibaike.com/8hr/page/%s" 
            self.qiubai_type = "now" 
        else: 
            self.url = "http://www.qiushibaike.com/8hr/page/%s" 
            self.qiubai_type = "now" 
        INFO('set qiubai type to {0}'.format(self.qiubai_type)) 


    def get_type_cn(self): 
        ret = '未知' 
        if self.qiubai_type == "hot": 
            ret = '24小时内' 
        elif self.qiubai_type == "week": 
            ret = '七天内' 
        elif self.qiubai_type == "now": 
            ret = '热门' 
        else: 
            ret = '未知' 
        return ret 


    def get_type(self): 
        return self.qiubai_type 


    def get_page(self,page=1): 
        url = self.url % page 
        header = {"User-Agent": "Mozilla/4.0", 
                  "Accept-Encoding": "gzip, deflate"} 
        req = request.Request(url,headers=header) 
        html = request.urlopen(req) 
        if html.headers.get("Content-Encoding"): 
            comp_data = html.read() 
            comp_stream = io.BytesIO(comp_data) 
            gzipper = gzip.GzipFile(fileobj=comp_stream) 
            text = gzipper.read().decode("utf-8") 
        else: 
            text=html.read().decode("utf-8") 
        return text 


    def get_image(self,image_url): 
        url = image_url 
        header = {"User-Agent": "Mozilla/4.0",} 
        req = request.Request(url,headers=header) 
        image_bytes = request.urlopen(req).read() 
        return image_bytes 


    def get_content(self, page): 
        qiubai_list = [] 
        text = self.get_page(page) 
        try: 
            parser = etree.HTMLParser(recover=True) 
            text_dom = etree.fromstring(text, parser) 
        except: 
            ERROR("页面解析错误") 
        else: 
            '''div_node = text_dom.find("body").findall("div") 
            content_node = div_node[2].find("div").find("div").findall("div[@class][@id]")''' 
            content_node = text_dom.xpath("//body/div[3]/div/div/div[@class][@id]") 
            for qiubai_node in content_node: 
                user_name = "匿名" 
                qiubai_date = '未知' 
                qiubai_text = "NULL" 
                qiubai_pic = "NULL" 
                qiubai_url = "NULL" 
                stauts_funny = "0" 
                stauts_not_funny = "0" 
                stauts_reply = "0" 
                for item in qiubai_node.xpath("child::div"): 
                    if item.get("class") == "author clearfix": 
                        #user_name = item.findall("a")[1].text 
                        user_name = ''.join(item.xpath("child::a[2]//text()")) 
                    elif item.get("class") == "content": 
                        qiubai_date = ''.join(item.xpath("attribute::title")) 
                        #qiubai_text = item.text 
                        qiubai_text = ''.join(item.xpath("child::text()")) 
                        #format content, remove '\n' and whitespace 
                        qiubai_text = qiubai_text.strip('\n') 
                        qiubai_text = qiubai_text.strip() 
                    elif item.get("class") == "thumb": 
                        #qiubai_pic = item.find("a").find("img").get("src") 
                        qiubai_pic = ''.join(item.xpath("child::a/img/attribute::src")) 
                    elif item.get("class") == "stats clearfix": 
                        qiubai_url = ''.join(item.xpath("child::span[@class='stats-comments']/a/attribute::href")) 
                        qiubai_url = self.host + qiubai_url 
                        stauts_funny = ''.join(item.xpath("child::span[@class='stats-vote']/i[@class='number']//text()")) 
                        stauts_reply = ''.join(item.xpath("child::span[@class='stats-comments']/a/i[@class='number']//text()")) 
                #DEBUG("{0}-{1}".format(user_name, qiubai_text)) 
                if qiubai_text != "NULL": 
                    qiubai_list.append({'user_name': user_name, 
                                        'qiubai_date': qiubai_date, 
                                        'qiushi_content': qiubai_text, 
                                        'qiushi_img': qiubai_pic, 
                                        'qiushi_url': qiubai_url, 
                                        'stauts_funny':stauts_funny, 
                                        'stauts_reply':stauts_reply}) 
        return qiubai_list 




class Set_QiuBai_Type(object):           # 定义对话框类 
    def __init__(self, root, init_type='now'):    # 对话框初始化 
        self.qiubai_type = '' 
        self.top = tk.Toplevel(root)     # 生成Toplevel组件 
        self.top.withdraw()  #隐藏 
        self.top.title('糗事类别选择') 
        self.top.resizable(False,False) #禁止改变大小 
         
        mainframe = tkttk.Frame(self.top, padding="12 12 12 12") 
        mainframe.grid(column=0, row=0, sticky=(tk.N, tk.W, tk.E, tk.S)) 
        label = tk.Label(mainframe, text='选择糗事类别') # 生成标签组件 
        label.grid(column=1, row=1) 
        modes = [ 
            ("热门", "now"), 
            ("24小时内", "hot"), 
            ("7天内", "week"), 
        ] 
        self.v = tk.StringVar() 
        self.v.set(init_type) 
        co = 0 
        for text, mode in modes: 
            b = tk.Radiobutton(mainframe,text = text, 
                            variable=self.v,value=mode)    # 生成单选框组件 
            b.grid(column=co, row=2) 
            co += 1 
        button = tk.Button(mainframe, text='Ok',   # 生成按钮 
                        command=self.Ok)    # 设置按钮事件处理函数 
        #self.top.bind('<Enter>',self.Ok) 
        button.grid(column=1, row=3) 
        self.top.update_idletasks() 
        self.top.deiconify()  #计算窗口大小 
        self.top.withdraw()  #隐藏窗口 
        app_geo = get_center_app_screen(root,self.top.winfo_reqwidth(),self.top.winfo_reqheight()) 
        self.top.geometry(app_geo) 
        self.top.deiconify() 
        DEBUG('弹出窗口大小: {0}*{1}'.format(self.top.winfo_width(),self.top.winfo_height())) 
     
    def Ok(self):                        # 定义按钮事件处理函数 
        self.qiubai_type = self.v.get()    # 获取文本框中内容,保存为input 
        self.top.destroy()               # 销毁对话框 
         
    def get(self):    # 返回在文本框输入的内容 
        return self.qiubai_type 


     
class QiuBai_Gui(object): 
    def __init__(self, roots, obj_qb): 
        self.roots = roots 
        self.obj_qb = obj_qb 
        self.qb_page = 1 
        self.is_main_ui = True 
         
        self.qb_content = self.obj_qb.get_content(self.qb_page) 
        self.content_len = len(self.qb_content) 
        self.content_index = 0; 
        #config menu 
        self.__confmenu__() 


        #cofig main gui 
        self.__confmaingui__() 


    def __confmenu__(self): 
        menu_roots = tk.Menu(self.roots) 


        menu_v = tk.Menu(menu_roots, tearoff=0) 
        menu_v.add_command(label='下一条', command=self.mv_next) 
        menu_v.add_command(label='上一条', command=self.mv_forward) 
        menu_v.add_command(label='刷新', command=self.mv_refresh) 
        menu_v.add_command(label='打开糗事页面', command=self.mv_open_source) 
        menu_v.add_separator() 
        menu_v.add_command(label='退出', command=self.mv_exit) 


        menu_st = tk.Menu(menu_roots, tearoff=0) 
        menu_st.add_command(label='类别', command=self.ms_type) 
        #menu_st.add_command(label='Proxy', command=self.ms_proxy) 


        menu_hp = tk.Menu(menu_roots, tearoff=0) 
        menu_hp.add_command(label='关于', command=self.ma_about) 


        #布局 
        menu_roots.add_cascade(label="查看", menu=menu_v) 
        menu_roots.add_cascade(label="设置", menu=menu_st) 
        menu_roots.add_cascade(label="帮助", menu=menu_hp) 


        self.roots.config(menu=menu_roots) 


    def __confmaingui__(self): 
        #main ui 
        main_frame = tkttk.Frame(self.roots, padding="12 12 12 12") 
        main_frame.grid(column=0, row=0, sticky=(tk.N, tk.W, tk.E, tk.S)) 
        main_frame.columnconfigure(0, weight=1) 
        main_frame.rowconfigure(0, weight=1) 
        self.__main_ui__(main_frame) 


        #status frame 
        status_frame = tkttk.Frame(self.roots, padding="12 12 12 12") 
        status_frame.grid(column=0, row=1, sticky=(tk.N, tk.W, tk.E, tk.S)) 
        #State 
        self.disState = tk.StringVar() 
        self.__update_read_state__() 
         
        lbState = tkttk.Label(status_frame, textvariable=self.disState, borderwidth=1, relief=tk.SUNKEN, anchor=tk.W, width=74) 
        lbState.grid(column=0, row=5, columnspan=2, sticky=(tk.N, tk.W, tk.E, tk.S)) 
        #DEBUG("{0}-{1}".format(type(main_frame),main_frame)) 
     
    def __main_ui__(self, mainframe): 
        group_qiubai = tk.LabelFrame(mainframe, text="糗事", bg='#808000', padx=10, pady=5) 
        group_qiubai.grid(column=0, row=0, sticky=(tk.N, tk.W, tk.E, tk.S)) 
        #text 
        #self.qiubai_txt = Text(group_qiubai, height=4, width=70) 
        self.qiubai_txt = tkst.ScrolledText(group_qiubai, height=4, width=69) 
        self.qiubai_txt.grid(column=1, row=0, columnspan=2) 
  
        #pic 
        self.qiubai_label = tk.Label(group_qiubai, 
                                  height=PIC_H, 
                                  width=PIC_W) 
        self.qiubai_label.grid(column=1, row=1, columnspan=2) 


        self.__show_qiushi__(self.qb_content[self.content_index]) 
        DEBUG(self.qb_content[self.content_index]) 


        for child in mainframe.winfo_children(): 
            child.grid_configure(padx=1, pady=5) 


        #bind key 
        self.qiubai_label.bind("<Button>",self.__event_handler__) 
        group_qiubai.bind_all("<Key>",self.__event_handler__) 
        #qiubai text not input by key 
        self.qiubai_txt.bind("<KeyPress>", lambda e : "break") 
        #self.roots.bind("<Destroy>",lambda a:print('abc')) 
        #DEBUG("{0}-{1}".format(type(group_qiubai),group_qiubai)) 
     
     
    def __updateState__(self, ststr): 
        self.disState.set(ststr) 
        self.roots.update() 


    def __update_read_state__(self): 
        self.__updateState__( 
            '页码({0}): {1} - ({2}/{3})  |  {4} 好笑 - {5} 回复  |  - {6}, {7}'.format( 
                self.obj_qb.get_type_cn(), 
                self.qb_page, 
                self.content_index+1, 
                self.content_len, 
                self.qb_content[self.content_index]['stauts_funny'], 
                self.qb_content[self.content_index]['stauts_reply'], 
                self.qb_content[self.content_index]['user_name'], 
                self.qb_content[self.content_index]['qiubai_date'])) 


    def __show_qiushi__(self,qiushi): 
        self.qiubai_txt.delete(0.0, tk.END) 
        self.qiubai_txt.insert(0.0, qiushi['qiushi_content']) 
        #pic 
        if 'NULL' != qiushi['qiushi_img']: 
            pil_bytes = self.obj_qb.get_image(qiushi['qiushi_img']) 
            pil_image = Image.open(io.BytesIO(pil_bytes)) 
            pil_image_resize = self.__resize_pic__(pil_image) 
            qiubai_pic2= ImageTk.PhotoImage(pil_image_resize) 
            self.qiubai_label.configure(image = qiubai_pic2) 
            self.qiubai_label.image = qiubai_pic2 
            #self.roots.update() 
        else: 
            pil_image = Image.open(DEFAULT_PIC) 
            pil_image_resize = self.__resize_pic__(pil_image) 
            qiubai_pic2= ImageTk.PhotoImage(pil_image_resize) 
            self.qiubai_label.configure(image = qiubai_pic2) 
            self.qiubai_label.image = qiubai_pic2 


    def __resize_pic__(self,pil_image,w_box=PIC_W,h_box=PIC_H): 
        ''' 
        resize a pil_image object so it will fit into 
        a box of size w_box times h_box, but retain aspect ratio 
        ''' 
        w,h = pil_image.size 
        f1 = 1.0*w_box/w  # 1.0 forces float division in Python2 
        f2 = 1.0*h_box/h 
        factor = min([f1, f2]) 
        #print(f1, f2, factor)  # test 
        # use best down-sizing filter 
        width = int(w*factor) 
        height = int(h*factor) 
        DEBUG("change image from {0}/{1} to {2}/{3}".format(w,h,width,height)) 
        return pil_image.resize((width, height), Image.ANTIALIAS) 


    def __set_qiubai_type__(self,qiubai_type): 
        self.qb_page = 1 
        self.obj_qb.set_type(qiubai_type) 


        self.qb_content = self.obj_qb.get_content(self.qb_page) 
        self.content_len = len(self.qb_content) 
        self.content_index = 0; 


        self.__show_qiushi__(self.qb_content[self.content_index]) 
        self.__update_read_state__() 
        DEBUG(self.qb_content[self.content_index])  


    def __event_handler__(self, event): 
        #DEBUG(event.widget) 
        #DEBUG("{0}-{1}".format(type(event.widget),event.widget)) 
        if self.is_main_ui: 
            if (event.num == 1) or (event.keysym in ('Right','Down')): 
                self.mv_next() 


            if (event.num == 3) or (event.keysym in ('Left','Up')): 
                self.mv_forward() 


    def mv_next(self): 
        self.content_index += 1 
        #next page 
        if self.content_index >= self.content_len: 
            self.qb_page += 1 
            self.qb_content = self.obj_qb.get_content(self.qb_page) 
            self.content_len = len(self.qb_content) 
            self.content_index = 0; 
            INFO('go to next page {0}'.format(self.qb_page)) 
        self.__show_qiushi__(self.qb_content[self.content_index]) 
        self.__update_read_state__() 
        DEBUG(self.qb_content[self.content_index])         


    def mv_forward(self): 
        #DEBUG(self.content_index) 
        #forward page 
        if self.content_index <= 0: 
            if self.qb_page > 1: 
                self.qb_page -= 1 
                self.qb_content = self.obj_qb.get_content(self.qb_page) 
                self.content_len = len(self.qb_content) 
                self.content_index = self.content_len - 1 ; 
                INFO('go to forward page {0}'.format(self.qb_page)) 
            else: 
                WARNING('have go to top page.') 
                return 
        else: 
            self.content_index -= 1 
        self.__show_qiushi__(self.qb_content[self.content_index]) 
        self.__update_read_state__() 
        DEBUG(self.qb_content[self.content_index])             


    def mv_refresh(self): 
        self.content_index = 0 
        self.qb_page = 1 
        self.qb_content = self.obj_qb.get_content(self.qb_page) 
        self.content_len = len(self.qb_content) 
        INFO('refresh to page {0}'.format(self.qb_page)) 
        self.__show_qiushi__(self.qb_content[self.content_index]) 
        self.__update_read_state__() 
        DEBUG(self.qb_content[self.content_index])  
     
    def mv_open_source(self): 
        url = self.qb_content[self.content_index]['qiushi_url'] 
        r = tkmsg.askquestion("查看源", 
                            "网址:{0}\n\nURL已经保存到剪切板,是否在浏览器中打开?".format(url), 
                            parent=self.roots) 
        if 'yes' == r: 
            DEBUG('open url:{0}'.format(url)) 
            webbrowser.open_new_tab(url) 
        else: 
            DEBUG('cancel to open url:{0}'.format(url)) 
            self.roots.clipboard_append(url) 
         
    def mv_exit(self): 
        self.roots.quit() 


    def ms_type(self): 
        self.is_main_ui = False 
        self.roots.iconify() # 隐藏主窗口 
        t = Set_QiuBai_Type(self.roots,self.obj_qb.get_type()) # 生成对话框 
        self.roots.wait_window(t.top) 
        self.roots.deiconify() # 重新显示主窗口 


        if t.get(): 
            self.__set_qiubai_type__(t.get()) 
            INFO('change qiubai type to {0}'.format(t.get())) 
        self.is_main_ui = True 


    def ma_about(self): 
        tkmsg.showinfo("{0} {1}".format(APP_NAME,APP_VER), 
                            "作者: 逸山\n电子邮箱: tony_227@qq.com",parent=self.roots) 




def get_center_window_geometry(root, width, height): 
    screenwidth = root.winfo_screenwidth() 
    screenheight = root.winfo_screenheight() 
    size = '%dx%d+%d+%d' % (width, height, (screenwidth - width)/2, (screenheight - height)/2) 
    INFO('screen geometry: {0}'.format(size)) 
    return size 


def get_center_app_screen(root, width, height): 
    window_geo = root.geometry() 
    w,h,off_w,off_h = parse_geometry(window_geo) 
    size = '%dx%d+%d+%d' % (width, height, 
                            (w - width)/2 + off_w, (h - height)/2 + off_h) 
    INFO('app_pop_screen geometry: {0}'.format(size)) 
    return size 


def parse_geometry(geometry): 
    m = re.match("(\d+)x(\d+)([-+]\d+)([-+]\d+)", geometry) 
    if not m: 
        raise ValueError("failed to parse geometry string") 
    return map(int, m.groups()) 
     
if __name__ == "__main__": 
    qiubai_obj = QiuBai() 
     
    top = tk.Tk() 
    top.withdraw()  #隐藏窗口 
    top.title(APP_NAME) 
    top.iconbitmap('home.ico') 
    top.resizable(False,False) 
    #top.maxsize(700,700) 
    #top.minsize(600,600) 


    QiuBai_Gui(top,qiubai_obj) 


    top.update_idletasks() 
    top.deiconify() #重新计算窗口大小 
    top.withdraw()  #再次隐藏窗口 
    win_geo = get_center_window_geometry(top,top.winfo_width(),top.winfo_height()) 
    top.geometry(win_geo) 
    top.deiconify() 


    DEBUG('屏幕大小: {0}*{1}'.format(top.winfo_screenwidth(),top.winfo_screenheight())) 
    DEBUG('需求窗口大小: {0}*{1}'.format(top.winfo_reqwidth(),top.winfo_reqheight())) 
    DEBUG('窗口大小: {0}*{1}'.format(top.winfo_width(),top.winfo_height())) 


    top.mainloop()