From 9073e118e347cd81faccbc93403d2e54aebd855a Mon Sep 17 00:00:00 2001 From: chenwj113 Date: Wed, 12 Oct 2022 11:11:46 +0800 Subject: [PATCH] init project --- .gitignore | 2 + README.md | 1 + auto.py | 20 +++++++++ get_lottery_data.py | 106 ++++++++++++++++++++++++++++++++++++++++++++ kl8.py | 5 +++ model/klb.py | 25 +++++++++++ model/pls.py | 29 ++++++++++++ model/plw.py | 25 +++++++++++ model/sd.py | 30 +++++++++++++ pd_test.py | 21 +++++++++ pls.py | 50 +++++++++++++++++++++ 11 files changed, 314 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 auto.py create mode 100644 get_lottery_data.py create mode 100644 kl8.py create mode 100644 model/klb.py create mode 100644 model/pls.py create mode 100644 model/plw.py create mode 100644 model/sd.py create mode 100644 pd_test.py create mode 100644 pls.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b11ec0c --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*/__pycache__ +sample \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..c9d2ab6 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +> sqlacodegen --tables sd --outfile ./model/sd.py "mysql+pymysql://root:123456@localhost/lottery?charset=utf8" \ No newline at end of file diff --git a/auto.py b/auto.py new file mode 100644 index 0000000..a8c58cf --- /dev/null +++ b/auto.py @@ -0,0 +1,20 @@ +from helium import * +import time + +driver = start_chrome("https://youdian.jindianle.com/", headless=True) +click(Text("您尚未登录,点击登录")) +click(Text("密码登录")) +write("15359827092", into="请输入手机号") +write("c113w927j", into="请输入6-12位数字或字母") +click(Button("登 录")) +# 进入页面先点击关闭弹窗按钮 +_close = S('//*[@id="vue2_el"]/div/div[10]/div/p') +click(_close) +# 点击竞彩足球 +jczq = S('//*[@id="vue2_el"]/div/section/div[6]/div[2]/div[1]/a/p/em') +click(jczq) +fenxi_list = find_all(Text("分析")) +print(fenxi_list) +# click(fenxi_list[0]) +# time.sleep(5) +# kill_browser() \ No newline at end of file diff --git a/get_lottery_data.py b/get_lottery_data.py new file mode 100644 index 0000000..44af842 --- /dev/null +++ b/get_lottery_data.py @@ -0,0 +1,106 @@ +from requests_html import HTMLSession, HTML +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker +import re +import time +import random +from datetime import date +from retrying import retry + +from model.pls import PLS +from model.plw import PLW +from model.sd import SD +from model.klb import KLB + +engine = create_engine("mysql+pymysql://root:123456@localhost/lottery?charset=utf8") +DbSession = sessionmaker(bind=engine) +db = DbSession() + +session = HTMLSession() +pat1 = re.compile('开奖日期:(\d+)年(\d+)月(\d+)日.*') +pat2 = re.compile('开奖日期:(\d+)-(\d+)-(\d+)\s.*') + + +@retry(stop_max_attempt_number=3) +def _get_data(url, lottery_type): + if lottery_type.lower() == 'pls': + Model = PLS + elif lottery_type.lower() == 'sd': + Model = SD + elif lottery_type.lower() == 'plw': + Model = PLW + else: + Model = KLB + # 爬取数据 + r = session.get(url) + table_list = r.html.find("table.kj_tablelist02", first=True) + issue = table_list.find("td.td_title01 span.span_left strong")[0].text + open_date = table_list.find("td.td_title01 span.span_right")[0].text + draw_code = table_list.find("div.ball_box01")[0].text.replace('\n', '') + m1 = re.match(pat1, open_date) + m2 = re.match(pat2, open_date) + # 查询数据库记录 + result = db.query(Model).filter_by(draw_issue=issue).first() + if result is None: + record = Model() + record.draw_issue = issue + record.draw_code = draw_code + if m1 or m2 : + if m1: + record.draw_date = date(int(m1.group(1)), int(m1.group(2)), int(m1.group(3))) + else: + record.draw_date = date(int(m2.group(1)), int(m2.group(2)), int(m2.group(3))) + else: + raise Exception(f"issue:{issue}数据写入失败。。。") + # 如果是排列3和3D + if isinstance(record, PLS) or isinstance(record, SD): + record.hundred = draw_code[0] + record.ten = draw_code[1] + record.one = draw_code[2] + record.code_small = len(list(filter(lambda x: True if int(x) < 5 else False, draw_code))) + record.code_big = len(list(filter(lambda x: True if int(x) >= 5 else False, draw_code))) + record.code_single = len(list(filter(lambda x: True if int(x) % 2 == 1 else False, draw_code))) + record.code_double = len(list(filter(lambda x: True if int(x) % 2 == 0 else False, draw_code))) + record.draw_code = draw_code + record.sum_num = sum(map(int, draw_code)) + record.sum_hundred_one = int(draw_code[2]) + int(draw_code[0]) + record.sum_hundred_ten = int(draw_code[2]) + int(record.draw_code[1]) + record.sum_ten_one = int(record.draw_code[1]) + int(record.draw_code[0]) + if len(set(draw_code)) == 2: + record.group_type = 3 + elif len(set(draw_code)) == 3: + record.group_type = 6 + else: + record.group_type = 1 + db.add(record) + db.commit() + print(f"issue:{issue}数据写入完成。。。") + else: + print(f'issue:{issue}已经存在') + +def get_data(url, lottery_type): + try: + _get_data(url, lottery_type) + except Exception as e: + print(e) + print('异常出错重试后,依然报错') + raise e + +def main(basic_url, lottery_type): + """爬取相关数据""" + r = session.get(basic_url) + select_list = list(reversed(r.html.find('div.kjxq_box02_title_right span div a'))) + for item in select_list: + html = HTML(html=item.html) + url = html.find('a', first=True).attrs['href'] + try: + get_data(url, lottery_type) + except Exception as e: + print(e) + continue + +if __name__ == '__main__': + # https://kaijiang.500.com/shtml/pls/04001.shtml + # https://kaijiang.500.com/shtml/sd/04001.shtml + basic_url = "https://kaijiang.500.com/shtml/plw/04001.shtml" + main(basic_url, lottery_type='plw') \ No newline at end of file diff --git a/kl8.py b/kl8.py new file mode 100644 index 0000000..4af56f7 --- /dev/null +++ b/kl8.py @@ -0,0 +1,5 @@ +import numpy as np +import pandas as pd +a = np.arange(1, 80) +for i in range(10): + print(np.random.choice(a, 4)) \ No newline at end of file diff --git a/model/klb.py b/model/klb.py new file mode 100644 index 0000000..b51f361 --- /dev/null +++ b/model/klb.py @@ -0,0 +1,25 @@ +# coding: utf-8 +from sqlalchemy import Column, Integer, String, Date +from sqlalchemy.ext.declarative import declarative_base + +Base = declarative_base() +metadata = Base.metadata + + +class KLB(Base): + __tablename__ = 'klb' + + id = Column(Integer, primary_key=True) + draw_issue = Column(String(25, 'utf8mb4_general_ci')) + draw_date = Column(Date) + draw_code = Column(String(10, 'utf8mb4_general_ci')) + code_big = Column(Integer) + code_small = Column(Integer) + code_single = Column(Integer) + code_double = Column(Integer) + sum_num = Column(Integer, comment='总和') + sum_ten_one = Column(Integer, comment='十位个位和') + sum_hundred_ten = Column(Integer, comment='百位十位和') + sum_hundred_one = Column(Integer, comment='百位各位和') + sum_single_double = Column(Integer, comment='总和奇偶') + sum_big_small = Column(Integer, comment='总和大小') diff --git a/model/pls.py b/model/pls.py new file mode 100644 index 0000000..c98e3ac --- /dev/null +++ b/model/pls.py @@ -0,0 +1,29 @@ +# coding: utf-8 +from sqlalchemy import Column, Date, Integer, String +from sqlalchemy.ext.declarative import declarative_base + +Base = declarative_base() +metadata = Base.metadata + + +class PLS(Base): + __tablename__ = 'pls' + + id = Column(Integer, primary_key=True) + draw_issue = Column(String(25, 'utf8mb4_general_ci')) + draw_date = Column(Date) + draw_code = Column(String(10, 'utf8mb4_general_ci')) + hundred = Column(Integer) + ten = Column(Integer) + one = Column(Integer) + group_type = Column(Integer) + code_big = Column(Integer) + code_small = Column(Integer) + code_single = Column(Integer) + code_double = Column(Integer) + sum_num = Column(Integer, comment='总和') + sum_ten_one = Column(Integer, comment='十位个位和') + sum_hundred_ten = Column(Integer, comment='百位十位和') + sum_hundred_one = Column(Integer, comment='百位各位和') + sum_single_double = Column(Integer, comment='总和奇偶') + sum_big_small = Column(Integer, comment='总和大小') diff --git a/model/plw.py b/model/plw.py new file mode 100644 index 0000000..41f69c1 --- /dev/null +++ b/model/plw.py @@ -0,0 +1,25 @@ +# coding: utf-8 +from sqlalchemy import Column, Integer, String, Date +from sqlalchemy.ext.declarative import declarative_base + +Base = declarative_base() +metadata = Base.metadata + + +class PLW(Base): + __tablename__ = 'plw' + + id = Column(Integer, primary_key=True) + draw_issue = Column(String(25, 'utf8mb4_general_ci')) + draw_date = Column(Date) + draw_code = Column(String(10, 'utf8mb4_general_ci')) + code_big = Column(Integer) + code_small = Column(Integer) + code_single = Column(Integer) + code_double = Column(Integer) + sum_num = Column(Integer, comment='总和') + sum_ten_one = Column(Integer, comment='十位个位和') + sum_hundred_ten = Column(Integer, comment='百位十位和') + sum_hundred_one = Column(Integer, comment='百位各位和') + sum_single_double = Column(Integer, comment='总和奇偶') + sum_big_small = Column(Integer, comment='总和大小') diff --git a/model/sd.py b/model/sd.py new file mode 100644 index 0000000..17a61dc --- /dev/null +++ b/model/sd.py @@ -0,0 +1,30 @@ +# coding: utf-8 +from sqlalchemy import Column, Date, Integer +from sqlalchemy.dialects.mysql import VARCHAR +from sqlalchemy.ext.declarative import declarative_base + +Base = declarative_base() +metadata = Base.metadata + + +class SD(Base): + __tablename__ = 'sd' + + id = Column(Integer, primary_key=True) + draw_issue = Column(VARCHAR(25)) + draw_date = Column(Date) + draw_code = Column(VARCHAR(10)) + hundred = Column(Integer) + ten = Column(Integer) + one = Column(Integer) + group_type = Column(Integer) + code_big = Column(Integer) + code_small = Column(Integer) + code_single = Column(Integer) + code_double = Column(Integer) + sum_num = Column(Integer, comment='总和') + sum_ten_one = Column(Integer, comment='十位个位和') + sum_hundred_ten = Column(Integer, comment='百位十位和') + sum_hundred_one = Column(Integer, comment='百位各位和') + sum_single_double = Column(Integer, comment='总和奇偶') + sum_big_small = Column(Integer, comment='总和大小') diff --git a/pd_test.py b/pd_test.py new file mode 100644 index 0000000..74586ba --- /dev/null +++ b/pd_test.py @@ -0,0 +1,21 @@ +import pandas as pd +import numpy as np + +s1 = pd.Series([1.47, 2.3, 3.0]) +s2 = pd.Series([5.5, 6.1]) +s3 = pd.Series([2.3, 4.6, 5.3]) + +s1.index = [True, False, False] +s2.index = [False, True] +s3.index = [False, True, False] + +a1 = np.expand_dims(s1.to_numpy(), axis=0) +a2 = np.expand_dims(s2.to_numpy(), axis=0) +a3 = np.expand_dims(s3.to_numpy(), axis=0) + +print(a1.shape) +print(a2.T.shape) +print((a1.T)*a2) +t = (a2.T)*a1 +print(len(t.flatten())) +print((s1*s2).loc[True]) \ No newline at end of file diff --git a/pls.py b/pls.py new file mode 100644 index 0000000..96b5184 --- /dev/null +++ b/pls.py @@ -0,0 +1,50 @@ +import numpy as np +import pandas as pd + + +a = np.arange(1000) + +l = set([ "".join(sorted(f"{i:03d}")) for i in a ]) + +sum_list = [ sum(map(int, list(i))) for i in l] +sum_df = pd.value_counts(sum_list) +# print(sum_df.to_dict()) +# print(220/sum_df) + +# 组三 +group3 = [ i for i in l if len(set(i)) == 2] +sum_group_dict = {i: [] for i in np.arange(28)} +for i in group3: + sum_group_dict[sum(map(int, list(i)))].append(i) + +for i in sum_group_dict.keys(): + f3 = lambda number: True if '3' not in number else False + print(i, list(filter(f3, sum_group_dict[i]))) +# print(sum_group3_dict) +# sum_group3 = [ sum(map(int, list(i))) for i in group3] +# print(pd.value_counts(sum_group3).to_dict()) +for i in range(5, 20): + _result = [item for item in sum_group_dict[i]] +# 组六 +group6 = [ i for i in l if len(set(i)) == 3] +print(len(group6)/220) +sum_group6_dict = {i: [] for i in np.arange(28)} +for i in group6: + sum_group6_dict[sum(map(int, list(i)))].append(i) +for i in sum_group6_dict.keys(): + f1 = lambda number: len([i for i in number if int(i) %2 == 0]) != 0 + f2 = lambda number: len([i for i in number if int(i) %2 == 1]) != 0 + f3 = lambda number: True if '2' not in number else False + f4 = lambda number: True if max(map(int, number)) - min(map(int, number)) != 2 else False + f5 = lambda number: True if '3' not in number else False + print(i, list(filter(f5, sum_group6_dict[i]))) + # print(i, list(filter(f3, filter(f2, filter(f1, sum_group_dict[i]))))) + # print(i, sum_group_dict[i]) +# print(pd.value_counts(sum_group3).to_dict()) + +# 连号 +result = [] +for i in l: + a = np.array(list(map(int, i))) + if int(np.max(a) - np.median(a)) == 1 or int(np.median(a) - np.min(a)) == 1: + result.append(i)