from requests_html import HTML, HTMLSession from retrying import retry from lottery import Lottery session = HTMLSession() @retry(stop_max_attempt_number=3) def _get_data(url, lottery_type): r = session.get(url) table_list = r.html.find("table.kj_tablelist02", first=True) draw_issue = table_list.find("td.td_title01 span.span_left strong")[0].text draw_date = table_list.find("td.td_title01 span.span_right")[0].text draw_code = table_list.find("div.ball_box01")[0].text.replace('\n', '') with Lottery(lottery_type=lottery_type) as lottery: last_id = lottery.insert(draw_issue, draw_date, draw_code) if last_id: print(f"issue:{draw_issue}数据写入完成。。。") else: print(f'issue:{draw_issue}已经存在') def get_data(url, lottery_type): try: _get_data(url, lottery_type) except Exception as e: print(e) print(url) print('异常出错重试后,依然报错') raise e def main(basic_url, lottery_type): """爬取相关数据""" r = session.get(basic_url) select_list = list(reversed(r.html.find('div.kjxq_box02_title_right span div a'))) for item in select_list: html = HTML(html=item.html) url = html.find('a', first=True).attrs['href'] try: get_data(url, lottery_type) except Exception as e: print(e) continue if __name__ == '__main__': issue = "23108" url = f"https://kaijiang.500.com/shtml/pls/{issue}.shtml" get_data(url, lottery_type='pls') # for y in range(23, 24): # for i in range(1, 365): # try : # issue = f"{y:02d}{i:03d}" # url = f"https://kaijiang.500.com/shtml/plw/{issue}.shtml" # get_data(url, lottery_type='plw') # except Exception as e: # print(e) # continue