py_scripts/get_data.py

from requests_html import HTML, HTMLSession
from retrying import retry


from lottery import Lottery

session = HTMLSession()

@retry(stop_max_attempt_number=3)
def _get_data(url, lottery_type):
    lottery = Lottery(lottery_type=lottery_type)
    r = session.get(url)
    table_list = r.html.find("table.kj_tablelist02", first=True)
    draw_issue = table_list.find("td.td_title01 span.span_left strong")[0].text
    draw_date = table_list.find("td.td_title01 span.span_right")[0].text
    draw_code = table_list.find("div.ball_box01")[0].text.replace('\n', '')
    print(draw_code)
    last_id = lottery.insert(draw_issue, draw_date, draw_code)
    if last_id:
        print(f"issue:{draw_issue}数据写入完成。。。")
    else:
        print(f'issue:{draw_issue}已经存在')

def get_data(url, lottery_type):
    try:
        _get_data(url, lottery_type)
    except Exception as e:
        print(e)
        print(url)
        print('异常出错重试后，依然报错')
        raise e

def main(basic_url, lottery_type):
    """爬取相关数据"""
    r = session.get(basic_url)
    select_list = list(reversed(r.html.find('div.kjxq_box02_title_right span div a')))
    for item in select_list:
        html = HTML(html=item.html)
        url = html.find('a', first=True).attrs['href']
        try:
            get_data(url, lottery_type)
        except Exception as e:
            print(e)
            continue

if __name__ == '__main__':
    issue = "23107"
    url = f"https://kaijiang.500.com/shtml/pls/{issue}.shtml"
    get_data(url, lottery_type='pls')
    # for y in range(23, 24):
    #     for i in range(1, 365):
    #         try :
    #             issue = f"{y:02d}{i:03d}"
    #             url = f"https://kaijiang.500.com/shtml/plw/{issue}.shtml"
    #             get_data(url, lottery_type='plw')
    #         except Exception as e:
    #             print(e)
    #             continue