58 lines
1.9 KiB
Python
58 lines
1.9 KiB
Python
from requests_html import HTML, HTMLSession
|
|
from retrying import retry
|
|
|
|
|
|
from lottery import Lottery
|
|
|
|
session = HTMLSession()
|
|
|
|
@retry(stop_max_attempt_number=3)
|
|
def _get_data(url, lottery_type):
|
|
lottery = Lottery(lottery_type=lottery_type)
|
|
r = session.get(url)
|
|
table_list = r.html.find("table.kj_tablelist02", first=True)
|
|
draw_issue = table_list.find("td.td_title01 span.span_left strong")[0].text
|
|
draw_date = table_list.find("td.td_title01 span.span_right")[0].text
|
|
draw_code = table_list.find("div.ball_box01")[0].text.replace('\n', '')
|
|
print(draw_code)
|
|
last_id = lottery.insert(draw_issue, draw_date, draw_code)
|
|
if last_id:
|
|
print(f"issue:{draw_issue}数据写入完成。。。")
|
|
else:
|
|
print(f'issue:{draw_issue}已经存在')
|
|
|
|
def get_data(url, lottery_type):
|
|
try:
|
|
_get_data(url, lottery_type)
|
|
except Exception as e:
|
|
print(e)
|
|
print(url)
|
|
print('异常出错重试后,依然报错')
|
|
raise e
|
|
|
|
def main(basic_url, lottery_type):
|
|
"""爬取相关数据"""
|
|
r = session.get(basic_url)
|
|
select_list = list(reversed(r.html.find('div.kjxq_box02_title_right span div a')))
|
|
for item in select_list:
|
|
html = HTML(html=item.html)
|
|
url = html.find('a', first=True).attrs['href']
|
|
try:
|
|
get_data(url, lottery_type)
|
|
except Exception as e:
|
|
print(e)
|
|
continue
|
|
|
|
if __name__ == '__main__':
|
|
issue = "23107"
|
|
url = f"https://kaijiang.500.com/shtml/pls/{issue}.shtml"
|
|
get_data(url, lottery_type='pls')
|
|
# for y in range(23, 24):
|
|
# for i in range(1, 365):
|
|
# try :
|
|
# issue = f"{y:02d}{i:03d}"
|
|
# url = f"https://kaijiang.500.com/shtml/plw/{issue}.shtml"
|
|
# get_data(url, lottery_type='plw')
|
|
# except Exception as e:
|
|
# print(e)
|
|
# continue |