天涯帖子解放F5(只看楼主)

清华大佬耗费三个月吐血整理的几百G的资源,免费分享!....>>>

'''
天涯帖子直播
20150426
Python3.4.3
'''
import re,time,os
import requests
from bs4 import *
header={'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8','Accept-Encoding':'gzip,deflate,sdch','Accept-Language':'zh-CN,zh;q=0.8','Cache-Control':'max-age=0','Connection':'keep-alive','Host':'bbs.tianya.cn','User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 Safari/537.36'}
refushtime=30 #默认刷新时间
lastpage=1    #最新页面
Furl=''#最新页面URL
lzname=''
#Turl='http://bbs.tianya.cn/post-stocks-1131734-1.shtml'#初始页面URL
Turl='http://bbs.tianya.cn/post-stocks-1345750-1.shtml'
def bsp(newurl):#bs4初始化
	html=requests.get(newurl,headers=header,timeout=10)
	#print(html)#正常显示中文
	soup=BeautifulSoup(html.content.decode())
	return soup
def pagnum(soup):#获取总页数
	#print(soup.find('script',{'type':'text/javascript'}).text)
	tx=soup.find('script',{'type':'text/javascript'}).text
	req=re.search(r'pageCount : \d*,',tx).group(0)
	req=req[12:-1]#截第12位后字符和倒数1字符之间
	#print(req)
	return int(req)
def pagepro():#URL处理
	global Furl
	tx1=Turl.split('-')
	#['http://bbs.tianya.cn/post', 'stocks', '1131734', '1.shtml']
	Furl=tx1[0]+'-'+tx1[1]+'-'+tx1[2]+'-'+'%d.shtml'%lastpage
	#print(Furl)
	return Furl
def pagecollect():#获取内容
	soup=bsp(Furl)
	#print(soup)
	txt=[]
	times=[]
	lzpost=soup.findAll('div',{'_host':lzname})
	#print(lzpost[0])
	if lzpost==[]:
		pass
	else:
		for i in range(len(lzpost)):
			ntime=lzpost[i].find('div',{'class':'atl-info'}).text #获取时间
			#times.append(ntime)
			#print(times)
			post=lzpost[i].find('div',{'class':'atl-content'})#一级搜索
			post=post.find('div',{'class':'bbs-content'}).text#二级搜索
			post=post.strip()
			#print(post)
			txt.append(ntime)
			txt.append(post)
	return txt
def formatprint(txt):#输出处理
	if txt==[]:
		print('===========None============')
	else:
		for i in range(0,len(txt),2): #数组【时间,内容,时间……】
			print('='*30)
			print(txt[i])
			txt[i+1]=txt[i+1].replace('-'*29,'\n----------\n')
			print(txt[i+1])
			print('='*30)
			#time.sleep(0.5) #显示间隔
			pass

def main():
	#Turl=input('>>')
	global refushtime,lastpage,Furl,Turl
	soup=bsp(Turl)
	title=re.sub('_.*','=====',soup.title.text)
	print('=====',title)
	lastpage=pagnum(soup)
	print('LastPage:',lastpage)
	Furl=pagepro() #合成最新URL
	print('LastURL:',Furl)
	lzname=soup.find('div',{'class':'atl-menu clearfix js-bbs-act'})['js_activityusername']
	print('Lzname:',lzname)

	formatprint(pagecollect()) #第一次输出
	while True:
		time.sleep(refushtime)
		soup=bsp(Turl)  #刷新
		newtime=pagnum(soup)
		if newtime > lastpage:
			print('LastPage:',newtime)
			lastpage=newtime
			Furl=pagepro()
			formatprint(pagecollect())
		else:
			os.system('cls')
			formatprint(pagecollect())
		print('==========Refush==========')
	
if __name__ == '__main__':
	soup=bsp(Turl)
	title=re.sub('_.*','=====',soup.title.text)
	print('=====',title)
	lastpage=pagnum(soup)
	print('LastPage:',lastpage)
	Furl=pagepro() #合成最新URL
	print('LastURL:',Furl)
	lzname=soup.find('div',{'class':'atl-menu clearfix js-bbs-act'})['js_activityusername']
	print('Lzname:',lzname)
	formatprint(pagecollect()) #第一次输出
	while True:
		time.sleep(refushtime)
		#input('Go to Refush')
		soup=bsp(Turl)  #刷新
		newtime=pagnum(soup)
		if newtime > lastpage:
			print('LastPage:',newtime)
			lastpage=newtime
			Furl=pagepro()
			formatprint(pagecollect())
		else:
			os.system('cls')
			print('==========Refush==========')
			formatprint(pagecollect())