毕业论文

当前位置: 毕业论文 > 范文 >

爬取足彩单日比赛数据的python代码

时间:2025-05-19 22:05来源:优尔论文
gidsign = ['gid','gset','mplay','mtid','gplay','gtid', 'qj','qs','qr', 'kend','kwin','kwinrq', 'tweek','tplay','tsell']

爬取单日比赛数据的python代码

import re

import requests

from bs4 import BeautifulSoup

import pandas as pd

#----------------------- 

def fb_tweekXed(tstr):

    str_week=['星期一','星期二','星期三','星期四','星期五','星期六','星期日']

    str_inx=['1','2','3','4','5','6','0']

    tstr=str_mxrep(tstr,str_week,str_inx)

    return tstr

def str_mxrep(dstr,old_lst,new_lst):

    for xss,xs2 in zip(old_lst,new_lst):

        if dstr.find(xss)>-1:dstr=dstr.replace(xss,xs2)

    return dstr

def str_xmid(dss,ks1,ks9):

    #s="abcd232" ;x=str_xmid(s,'b','2');print(x)

    mx=''.join(['(',ks1,')(.*?)(',ks9,')']);

    r = re.search( mx,dss)

    dat=''

    if r:dat=r.groups()[1]

    return dat

def fb_kwin4qnum(jq,sq,rq=0):

    if (jq<0)or(sq<0):return -1

    #   

    jqk=jq+rq  #or -rq

    if jqk>sq:kwin=3

    elif jqk<sq:kwin=0

    else:kwin=1

    #

    return kwin

#-----------------------

def fb_gid_get4htm(htm):

    bs=BeautifulSoup(htm,'html5lib')

    gidsign = ['gid','gset','mplay','mtid','gplay','gtid', 'qj','qs','qr',  'kend','kwin','kwinrq', 'tweek','tplay','tsell']

    gidnill = ['', '', '', '', '', '', '-1', '-1', '0', '0', '-1', '-1', '', '', '']

    df=pd.DataFrame(columns=gidsign,dtype=str)

    ds=pd.Series(gidnill,gidsign,dtype=str)

    #---1#

    def bs_get_ktag(tag):

        return tag.has_attr('isend')

    x10 = bs.find_all(bs_get_ktag) 

    for xc,x in enumerate(x10):

        #print('\n@x\n',xc,'#',x.attrs)

        ds['gid'],ds['gset']=x['fid'],x['lg']

        ds['mplay']=x['homesxname']

        ds['gplay']=x['awaysxname']

        ds['kend']=x['isend']

        s2=ds['tweek']=x['gdate'].split(' ')[0] #tweek

        ds['tweek']=fb_tweekXed(s2)

        ds['tplay'],ds['tsell']=x['pdate'],x['pendtime']  #tplay,tsell,

        #

        df=df.append(ds.T,ignore_index=True)   

    #---2#

    x20=bs.find_all('a',class_='score')

    for xc,x in enumerate(x20):

        xss=x['href']

        kss=str_xmid(xss,'ju-','.sh')

        clst=x.text.split(':')

        ds=df[df['gid']==kss]

        ds=df[df['gid']==kss] 爬取足彩单日比赛数据的python代码:http://www.chuibin.com/fanwen/lunwen_205551.html

------分隔线----------------------------
推荐内容