jtyoui.reptile.doublechromosphere 源代码

#!/usr/bin/python3.7
# -*- coding: utf-8 -*-
# @Time : 2019/11/16 19:35:00
# @Email : jtyoui@qq.com
# @Software : PyCharm
from jtyoui.web import get
import re


[文档]def double_data_chart(start=None, end=None): """爬取双色球数据,第一列数据是信息头 :param start: 开始期号:默认是第一期时间 :param end: 结束期号:默认是现在时间 :return: 二维列表 """ if start is None and end is None: return double_data_chart(*_get_start_end()) header = ['期号', '红球1', '红球2', '红球3', '红球4', '红球5', '红球6', '篮球', '奖池', '一等奖注数', '一等奖奖金', '二等奖注数', '二等奖奖金', '总投注额', '开奖日期'] ls = [header] url = f'https://datachart.500.com/ssq/history/newinc/history.php?start={start}&end={end}' data = get(url) response = data.content.decode('utf-8') for charts in re.findall(r'<tr class="t_tr1">.+?</tr>', response): td = re.findall('<td.*?>(.+?)</td>', charts) td.pop(9) m = map(lambda x: x if '-' in x else int(x.replace(',', '')), td[1:]) ls.append(list(m)) return ls
def _get_start_end(): """获取开始期号和结束期号""" url = 'https://datachart.500.com/ssq/history/history.shtml' data = get(url) response = data.content.decode('gbk') search = re.search('<input id="end" name="end" value="(.+?)" size="10" />', response) start, end = search.start() + 34, search.end() - 14 return 3001, response[start:end] if __name__ == '__main__': print(double_data_chart())