增加lottery基类
This commit is contained in:
parent
f937c697a4
commit
a0f3c18d08
|
|
@ -1,2 +1,3 @@
|
|||
*/__pycache__
|
||||
sample
|
||||
__pycache__
|
||||
|
|
@ -0,0 +1,51 @@
|
|||
from requests_html import HTMLSession, HTML
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
from retrying import retry
|
||||
|
||||
from lottery import Lottery
|
||||
|
||||
|
||||
session = HTMLSession()
|
||||
|
||||
@retry(stop_max_attempt_number=3)
|
||||
def _get_data(url, lottery_type):
|
||||
lottery = Lottery(lottery_type=lottery_type)
|
||||
r = session.get(url)
|
||||
table_list = r.html.find("table.kj_tablelist02", first=True)
|
||||
draw_issue = table_list.find("td.td_title01 span.span_left strong")[0].text
|
||||
draw_date = table_list.find("td.td_title01 span.span_right")[0].text
|
||||
draw_code = table_list.find("div.ball_box01")[0].text.replace('\n', '')
|
||||
last_id = lottery.insert(draw_issue, draw_date, draw_code)
|
||||
if last_id:
|
||||
print(f"issue:{issue}数据写入完成。。。")
|
||||
else:
|
||||
print(f'issue:{issue}已经存在')
|
||||
|
||||
def get_data(url, lottery_type):
|
||||
try:
|
||||
_get_data(url, lottery_type)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print('异常出错重试后,依然报错')
|
||||
raise e
|
||||
|
||||
def main(basic_url, lottery_type):
|
||||
"""爬取相关数据"""
|
||||
r = session.get(basic_url)
|
||||
select_list = list(reversed(r.html.find('div.kjxq_box02_title_right span div a')))
|
||||
for item in select_list:
|
||||
html = HTML(html=item.html)
|
||||
url = html.find('a', first=True).attrs['href']
|
||||
try:
|
||||
get_data(url, lottery_type)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
continue
|
||||
|
||||
if __name__ == '__main__':
|
||||
url = "https://kaijiang.500.com/shtml/pls/22265.shtml"
|
||||
# url = "https://kaijiang.500.com/shtml/sd/04001.shtml"
|
||||
# url = "https://kaijiang.500.com/shtml/plw/04001.shtml"
|
||||
# main(url, lottery_type='plw')
|
||||
get_data(url, lottery_type= 'pls')
|
||||
|
|
@ -1,106 +0,0 @@
|
|||
from requests_html import HTMLSession, HTML
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
import re
|
||||
import time
|
||||
import random
|
||||
from datetime import date
|
||||
from retrying import retry
|
||||
|
||||
from model.pls import PLS
|
||||
from model.plw import PLW
|
||||
from model.sd import SD
|
||||
from model.klb import KLB
|
||||
|
||||
engine = create_engine("mysql+pymysql://root:123456@localhost/lottery?charset=utf8")
|
||||
DbSession = sessionmaker(bind=engine)
|
||||
db = DbSession()
|
||||
|
||||
session = HTMLSession()
|
||||
pat1 = re.compile('开奖日期:(\d+)年(\d+)月(\d+)日.*')
|
||||
pat2 = re.compile('开奖日期:(\d+)-(\d+)-(\d+)\s.*')
|
||||
|
||||
|
||||
@retry(stop_max_attempt_number=3)
|
||||
def _get_data(url, lottery_type):
|
||||
if lottery_type.lower() == 'pls':
|
||||
Model = PLS
|
||||
elif lottery_type.lower() == 'sd':
|
||||
Model = SD
|
||||
elif lottery_type.lower() == 'plw':
|
||||
Model = PLW
|
||||
else:
|
||||
Model = KLB
|
||||
# 爬取数据
|
||||
r = session.get(url)
|
||||
table_list = r.html.find("table.kj_tablelist02", first=True)
|
||||
issue = table_list.find("td.td_title01 span.span_left strong")[0].text
|
||||
open_date = table_list.find("td.td_title01 span.span_right")[0].text
|
||||
draw_code = table_list.find("div.ball_box01")[0].text.replace('\n', '')
|
||||
m1 = re.match(pat1, open_date)
|
||||
m2 = re.match(pat2, open_date)
|
||||
# 查询数据库记录
|
||||
result = db.query(Model).filter_by(draw_issue=issue).first()
|
||||
if result is None:
|
||||
record = Model()
|
||||
record.draw_issue = issue
|
||||
record.draw_code = draw_code
|
||||
if m1 or m2 :
|
||||
if m1:
|
||||
record.draw_date = date(int(m1.group(1)), int(m1.group(2)), int(m1.group(3)))
|
||||
else:
|
||||
record.draw_date = date(int(m2.group(1)), int(m2.group(2)), int(m2.group(3)))
|
||||
else:
|
||||
raise Exception(f"issue:{issue}数据写入失败。。。")
|
||||
# 如果是排列3和3D
|
||||
if isinstance(record, PLS) or isinstance(record, SD):
|
||||
record.hundred = draw_code[0]
|
||||
record.ten = draw_code[1]
|
||||
record.one = draw_code[2]
|
||||
record.code_small = len(list(filter(lambda x: True if int(x) < 5 else False, draw_code)))
|
||||
record.code_big = len(list(filter(lambda x: True if int(x) >= 5 else False, draw_code)))
|
||||
record.code_single = len(list(filter(lambda x: True if int(x) % 2 == 1 else False, draw_code)))
|
||||
record.code_double = len(list(filter(lambda x: True if int(x) % 2 == 0 else False, draw_code)))
|
||||
record.draw_code = draw_code
|
||||
record.sum_num = sum(map(int, draw_code))
|
||||
record.sum_hundred_one = int(draw_code[2]) + int(draw_code[0])
|
||||
record.sum_hundred_ten = int(draw_code[2]) + int(record.draw_code[1])
|
||||
record.sum_ten_one = int(record.draw_code[1]) + int(record.draw_code[0])
|
||||
if len(set(draw_code)) == 2:
|
||||
record.group_type = 3
|
||||
elif len(set(draw_code)) == 3:
|
||||
record.group_type = 6
|
||||
else:
|
||||
record.group_type = 1
|
||||
db.add(record)
|
||||
db.commit()
|
||||
print(f"issue:{issue}数据写入完成。。。")
|
||||
else:
|
||||
print(f'issue:{issue}已经存在')
|
||||
|
||||
def get_data(url, lottery_type):
|
||||
try:
|
||||
_get_data(url, lottery_type)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print('异常出错重试后,依然报错')
|
||||
raise e
|
||||
|
||||
def main(basic_url, lottery_type):
|
||||
"""爬取相关数据"""
|
||||
r = session.get(basic_url)
|
||||
select_list = list(reversed(r.html.find('div.kjxq_box02_title_right span div a')))
|
||||
for item in select_list:
|
||||
html = HTML(html=item.html)
|
||||
url = html.find('a', first=True).attrs['href']
|
||||
try:
|
||||
get_data(url, lottery_type)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
continue
|
||||
|
||||
if __name__ == '__main__':
|
||||
# https://kaijiang.500.com/shtml/pls/04001.shtml
|
||||
# https://kaijiang.500.com/shtml/sd/04001.shtml
|
||||
basic_url = "https://kaijiang.500.com/shtml/plw/04001.shtml"
|
||||
main(basic_url, lottery_type='plw')
|
||||
|
|
@ -0,0 +1,75 @@
|
|||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
import re
|
||||
from datetime import date
|
||||
|
||||
from model.pls import PLS
|
||||
from model.plw import PLW
|
||||
from model.sd import SD
|
||||
from model.klb import KLB
|
||||
|
||||
class Lottery(object):
|
||||
"""
|
||||
Lottery Base Object
|
||||
"""
|
||||
def __init__(self, lottery_type='pls'):
|
||||
self._pat1 = re.compile('开奖日期:(\d+)年(\d+)月(\d+)日.*')
|
||||
self._pat2 = re.compile('开奖日期:(\d+)-(\d+)-(\d+)\s.*')
|
||||
self._lottery_type = lottery_type
|
||||
self.db = self._get_db_session()
|
||||
if lottery_type.lower() == 'pls':
|
||||
self._Model = PLS
|
||||
elif lottery_type.lower() == 'sd':
|
||||
self._Model = SD
|
||||
elif lottery_type.lower() == 'plw':
|
||||
self._Model = PLW
|
||||
elif lottery_type.lower() == 'klb':
|
||||
self._Model = KLB
|
||||
else:
|
||||
raise Exception("未知的lottery_type")
|
||||
|
||||
def _get_db_session(self):
|
||||
_engine = create_engine("mysql+pymysql://root:123456@localhost/lottery?charset=utf8")
|
||||
_DbSession = sessionmaker(bind=_engine)
|
||||
return _DbSession()
|
||||
|
||||
def insert(self, draw_issue, draw_date, draw_code, **kwargs):
|
||||
result = self.db.query(self._Model).filter_by(draw_issue=draw_issue).first()
|
||||
if result is None:
|
||||
record = self._Model()
|
||||
record.draw_issue = draw_issue
|
||||
record.draw_code = draw_code
|
||||
m1 = re.match(self._pat1, draw_date)
|
||||
m2 = re.match(self._pat2, draw_date)
|
||||
if m1 or m2 :
|
||||
if m1:
|
||||
record.draw_date = date(int(m1.group(1)), int(m1.group(2)), int(m1.group(3)))
|
||||
else:
|
||||
record.draw_date = date(int(m2.group(1)), int(m2.group(2)), int(m2.group(3)))
|
||||
else:
|
||||
raise Exception(f"issue:{issue}数据写入失败。。。")
|
||||
# 如果是排列3和3D
|
||||
if isinstance(record, PLS) or isinstance(record, SD):
|
||||
record.hundred = draw_code[0]
|
||||
record.ten = draw_code[1]
|
||||
record.one = draw_code[2]
|
||||
record.code_small = len(list(filter(lambda x: True if int(x) < 5 else False, draw_code)))
|
||||
record.code_big = len(list(filter(lambda x: True if int(x) >= 5 else False, draw_code)))
|
||||
record.code_single = len(list(filter(lambda x: True if int(x) % 2 == 1 else False, draw_code)))
|
||||
record.code_double = len(list(filter(lambda x: True if int(x) % 2 == 0 else False, draw_code)))
|
||||
record.draw_code = draw_code
|
||||
record.sum_num = sum(map(int, draw_code))
|
||||
record.sum_hundred_one = int(draw_code[2]) + int(draw_code[0])
|
||||
record.sum_hundred_ten = int(draw_code[2]) + int(record.draw_code[1])
|
||||
record.sum_ten_one = int(record.draw_code[1]) + int(record.draw_code[0])
|
||||
if len(set(draw_code)) == 2:
|
||||
record.group_type = 3
|
||||
elif len(set(draw_code)) == 3:
|
||||
record.group_type = 6
|
||||
else:
|
||||
record.group_type = 1
|
||||
self.db.add(record)
|
||||
self.db.commit()
|
||||
return record.id
|
||||
else:
|
||||
return result.id
|
||||
Loading…
Reference in New Issue