# -*- coding: utf-8 -*- from bs4 import BeautifulSoup def file2soup(ffile): with open(ffile,'r+b') as f: f.encoding='utf-8' soup=BeautifulSoup(f,'lxml') # print (soup) return soup pass # def soup2list(soup,ulist): # # print (soup.div) # for i in soup.find('tbody').children: # if isinstance(i,bs4.element.Tag): # o=i('td') # ulist.append([o[0].string,o[1].string,o[3].string]) # return ulist # def soup2list(soup,ulist): # # print (soup.div) # for i in soup.find('tbody').children: # if len(i)>1:#去空行 # o=i('td')#取出td标签 "td"加引号 o:!!! # a,b,c=0,1,3 # ulist.append([o[a].string,o[b].string,o[c].string]) # return ulist def soup2list(soup,ulist): i=soup.find('tbody',id="hidden_zhpm") # print (len(i)) print (i) url = 'http://bj.58.com/pinpaigongyu/pn/{ppp}/?minprice=2000_4000' ffile='d://best.txt' ulist=[] soup=file2soup(ffile) ulist=soup2list(soup,ulist) # print (ulist) # print ('{:0>3}\t {:+<15}\t {: >5}\t'.format('排名','校名','总分')) # for i in range(11): # u=ulist[i] # #{:起头+一个填充符+对齐方式+字符长度} # print ('{:0>3}\t {:+<15}\t {: >5}\t'.format(u[0], u[1], u[2] )) --------------------
x=soup.select('.hidden_zhpm')[0] cc=6 list=[] for i in x: if len(i)>1 and cc>0: cc-=1 ii=i('td') list.append([ii[0].string,ii[2].string,ii[3].string]) # print (list) # print ('{} {} {}'.format(list[0][0],list[0][1],list[0][2])) for i in range(11): print (list[i][0],list[i][1],list[i][2]) # print (type(list[0])) # print (list)