爬取单日比赛数据的python代码
import re
import requests
from bs4 import BeautifulSoup
import pandas as pd
#-----------------------
def fb_tweekXed(tstr):
str_week=['星期一','星期二','星期三','星期四','星期五','星期六','星期日']
str_inx=['1','2','3','4','5','6','0']
tstr=str_mxrep(tstr,str_week,str_inx)
return tstr
def str_mxrep(dstr,old_lst,new_lst):
for xss,xs2 in zip(old_lst,new_lst):
if dstr.find(xss)>-1:dstr=dstr.replace(xss,xs2)
return dstr
def str_xmid(dss,ks1,ks9):
#s="abcd232" ;x=str_xmid(s,'b','2');print(x)
mx=''.join(['(',ks1,')(.*?)(',ks9,')']);
r = re.search( mx,dss)
dat=''
if r:dat=r.groups()[1]
return dat
def fb_kwin4qnum(jq,sq,rq=0):
if (jq<0)or(sq<0):return -1
#
jqk=jq+rq #or -rq
if jqk>sq:kwin=3
elif jqk<sq:kwin=0
else:kwin=1
#
return kwin
#-----------------------
def fb_gid_get4htm(htm):
bs=BeautifulSoup(htm,'html5lib')
gidsign = ['gid','gset','mplay','mtid','gplay','gtid', 'qj','qs','qr', 'kend','kwin','kwinrq', 'tweek','tplay','tsell']
gidnill = ['', '', '', '', '', '', '-1', '-1', '0', '0', '-1', '-1', '', '', '']
df=pd.DataFrame(columns=gidsign,dtype=str)
ds=pd.Series(gidnill,gidsign,dtype=str)
#---1#
def bs_get_ktag(tag):
return tag.has_attr('isend')
x10 = bs.find_all(bs_get_ktag)
for xc,x in enumerate(x10):
#print('\n@x\n',xc,'#',x.attrs)
ds['gid'],ds['gset']=x['fid'],x['lg']
ds['mplay']=x['homesxname']
ds['gplay']=x['awaysxname']
ds['kend']=x['isend']
s2=ds['tweek']=x['gdate'].split(' ')[0] #tweek
ds['tweek']=fb_tweekXed(s2)
ds['tplay'],ds['tsell']=x['pdate'],x['pendtime'] #tplay,tsell,
#
df=df.append(ds.T,ignore_index=True)
#---2#
x20=bs.find_all('a',class_='score')
for xc,x in enumerate(x20):
xss=x['href']
kss=str_xmid(xss,'ju-','.sh')
clst=x.text.split(':')
ds=df[df['gid']==kss]
ds=df[df['gid']==kss] 爬取足彩单日比赛数据的python代码:http://www.chuibin.com/fanwen/lunwen_205551.html