py_scripts/get_data.py

52 lines
1.8 KiB
Python

from requests_html import HTML, HTMLSession
from retrying import retry
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from lottery import Lottery
session = HTMLSession()
@retry(stop_max_attempt_number=3)
def _get_data(url, lottery_type):
lottery = Lottery(lottery_type=lottery_type)
r = session.get(url)
table_list = r.html.find("table.kj_tablelist02", first=True)
draw_issue = table_list.find("td.td_title01 span.span_left strong")[0].text
draw_date = table_list.find("td.td_title01 span.span_right")[0].text
draw_code = table_list.find("div.ball_box01")[0].text.replace('\n', '')
last_id = lottery.insert(draw_issue, draw_date, draw_code)
if last_id:
print(f"issue:{draw_issue}数据写入完成。。。")
else:
print(f'issue:{draw_issue}已经存在')
def get_data(url, lottery_type):
try:
_get_data(url, lottery_type)
except Exception as e:
print(e)
print('异常出错重试后,依然报错')
raise e
def main(basic_url, lottery_type):
"""爬取相关数据"""
r = session.get(basic_url)
select_list = list(reversed(r.html.find('div.kjxq_box02_title_right span div a')))
for item in select_list:
html = HTML(html=item.html)
url = html.find('a', first=True).attrs['href']
try:
get_data(url, lottery_type)
except Exception as e:
print(e)
continue
if __name__ == '__main__':
url = "https://kaijiang.500.com/shtml/pls/22265.shtml"
# url = "https://kaijiang.500.com/shtml/sd/04001.shtml"
# url = "https://kaijiang.500.com/shtml/plw/04001.shtml"
# main(url, lottery_type='plw')
for i in range(22320, 22321):
url = f"https://kaijiang.500.com/shtml/pls/{i}.shtml"
get_data(url, lottery_type= 'pls')