#!/usr/bin/python3.7
# -*- coding: utf-8 -*-
# @Time : 2019/12/9 13:30
# @Author: Jtyoui@qq.com
from jtyoui.plunar import LunarSolarDateConverter, LunarDate
import regex as re # pip install regex==2.5.65
import arrow # pip install arrow==0.13.1
import copy
import jtyoui
[文档]class TimePoint:
"""标记时间类型"""
def __init__(self):
"""六个字段分别是:年-月-日-时-分-秒,"""
self.unit = [-1, -1, -1, -1, -1, -1]
[文档]class RangeTimeEnum:
"""一天大概范围时间"""
day_break = 3 # 黎明
early_morning = 8 # 早
morning = 10 # 上午
noon = 12 # 中午、午间
afternoon = 15 # 下午、午后
night = 18 # 晚上、傍晚
lateNight = 20 # 晚、晚间
midNight = 23 # 深夜
[文档] @classmethod
def name(cls, names):
"""根据名字获取数据"""
if hasattr(RangeTimeEnum, names):
return getattr(RangeTimeEnum, names)
return 0
[文档]class TimeUnit:
"""时间解析"""
def __init__(self, exp_time, normalizer, context):
"""时间语句分析"""
self._no_year = False
self.exp_time = exp_time
self.normalizer = normalizer
self.tp = TimePoint()
self.tp_origin = context
self.isFirstTimeSolveContext = True
self.isAllDayTime = True
self.time = None
self.time_normalization()
[文档] def time_normalization(self):
"""时间解析"""
self.norm_set_year()
self.norm_set_month()
self.norm_set_day()
self.norm_set_month_fuzzy_day()
self.norm_set_solar_holiday()
self.norm_set_cur_related()
self.norm_set_hour()
self.norm_set_minute()
self.norm_set_second()
self.norm_set_special()
self.norm_set_span_related()
self.norm_set_lunar_holiday()
self.modify_time_base()
self.tp_origin.unit = copy.deepcopy(self.tp.unit)
# 判断是时间点还是时间区间
flag = True
for i in range(0, 4):
if self.tp.unit[i] != -1:
flag = False
if flag:
self.normalizer.isTimeSpan = True
if self.normalizer.isTimeSpan:
days = 0
if self.tp.unit[0] > 0:
days += 365 * self.tp.unit[0]
if self.tp.unit[1] > 0:
days += 30 * self.tp.unit[1]
if self.tp.unit[2] > 0:
days += self.tp.unit[2]
unit = self.tp.unit
for i in range(3, 6):
if self.tp.unit[i] < 0:
unit[i] = 0
seconds = unit[3] * 3600 + unit[4] * 60 + unit[5]
if seconds == 0 and days == 0:
self.normalizer.invalidSpan = True
self.normalizer.timeSpan = self.gen_span(days, seconds)
return None
time_grid = self.normalizer.timeBase.split('-')
unit_pointer = 5
while unit_pointer >= 0 and self.tp.unit[unit_pointer] < 0:
unit_pointer -= 1
for i in range(unit_pointer):
if self.tp.unit[i] < 0:
self.tp.unit[i] = int(time_grid[i])
self.time = self.gen_time(self.tp.unit)
[文档] @staticmethod
def gen_span(days, seconds):
"""根据毫秒获取时分秒"""
day = int(seconds / (3600 * 24))
h = int((seconds % (3600 * 24)) / 3600)
m = int(((seconds % (3600 * 24)) % 3600) / 60)
s = int(((seconds % (3600 * 24)) % 3600) % 60)
return str(days + day) + ' days, ' + '%d:%02d:%02d' % (h, m, s)
[文档] @staticmethod
def gen_time(unit):
"""得到时间"""
time = arrow.get('1970-01-01 00:00:00')
if unit[0] > 0:
time = time.replace(year=int(unit[0]))
if unit[1] > 0:
time = time.replace(month=unit[1])
if unit[2] > 0:
time = time.replace(day=unit[2])
if unit[3] > 0:
time = time.replace(hour=unit[3])
if unit[4] > 0:
time = time.replace(minute=unit[4])
if unit[5] > 0:
time = time.replace(second=unit[5])
return time
[文档] def norm_set_year(self):
"""该方法识别时间表达式单元的年字段"""
# 一位数和三位表示的年份
for rule in ['(?<![0-9])[0-9]{1}(?=年)', '(?<![0-9])[0-9]{3}(?=年)']:
match = re.search(rule, self.exp_time)
if match is not None:
self.normalizer.isTimeSpan = True
year = int(match.group())
self.tp.unit[0] = year
# 两位和四位数表示的年份
for rule in ['[0-9]{2}(?=年)', '[0-9]{4}(?=年)']:
match = re.search(rule, self.exp_time)
if match is not None:
year = int(match.group())
self.tp.unit[0] = year
[文档] def norm_set_month(self):
"""该方法识别时间表达式单元的月字段 """
rule = '((10)|(11)|(12)|([1-9]))(?=月)'
match = re.search(rule, self.exp_time)
if match is not None:
self.tp.unit[1] = int(match.group())
# 处理倾向于未来时间的情况
self.prefer_future(1)
[文档] def norm_set_month_fuzzy_day(self):
"""兼容模糊写法:该方法识别时间表达式单元的月、日字段"""
rule = '((10)|(11)|(12)|([1-9]))(月|\\.|\\-)([0-3][0-9]|[1-9])'
match = re.search(rule, self.exp_time)
if match is not None:
match_str = match.group()
p = re.compile('(月|\\.|\\-)')
m = p.search(match_str)
if match is not None:
start = m.start()
month = match_str[0: start]
day = match_str[start + 1:]
self.tp.unit[1] = int(month)
self.tp.unit[2] = int(day)
# 处理倾向于未来时间的情况
self.prefer_future(1)
self._check_time(self.tp.unit)
[文档] def norm_set_day(self):
"""该方法识别时间表达式单元的日字段 """
rule = '((?<!\\d))([0-3][0-9]|[1-9])(?=(日|号))'
match = re.search(rule, self.exp_time)
if match is not None:
self.tp.unit[2] = int(match.group())
# 处理倾向于未来时间的情况
self.prefer_future(2)
self._check_time(self.tp.unit)
[文档] def daytime(self, rule, name):
"""预测一天是在什么时候
预测情况包括:
day_break = 3 # 黎明\n
early_morning = 8 # 早\n
morning = 10 # 上午\n
noon = 12 # 中午、午间\n
afternoon = 15 # 下午、午后\n
night = 18 # 晚上、傍晚\n
lateNight = 20 # 晚、晚间\n
midNight = 23 # 深夜
:param rule: 预测情况的正则
:param name: 预测的名字
"""
match = re.search(rule, self.exp_time)
if match is not None:
if self.tp.unit[3] == -1: # 增加对没有明确时间点,只写了凌晨这种情况的处理
self.tp.unit[3] = RangeTimeEnum.name(name)
elif 12 <= self.tp.unit[3] <= 23:
self.tp.unit[3] -= 12
elif self.tp.unit[3] == 0:
self.tp.unit[3] = 12
# 处理倾向于未来时间的情况
self.prefer_future(3)
self.isAllDayTime = False
[文档] def norm_set_hour(self):
"""该方法识别时间表达式单元的时字段"""
rule = '(?<!(周|星期))([0-2]?[0-9])(?=(点|时))'
match = re.search(rule, self.exp_time)
if match is not None:
self.tp.unit[3] = int(match.group())
# 处理倾向于未来时间的情况
self.prefer_future(3)
self.isAllDayTime = False
self.daytime('凌晨', 'day_break')
self.daytime('早上|早晨|早间|晨间|今早|明早|早|清晨', 'early_morning')
self.daytime('上午', 'morning')
match = re.search('(中午)|(午间)|(am)|(白天)', self.exp_time, flags=re.I)
if match is not None:
if 0 <= self.tp.unit[3] <= 10:
self.tp.unit[3] += 12
if self.tp.unit[3] == -1: # 增加对没有明确时间点,只写了中午/午间这种情况的处理
self.tp.unit[3] = RangeTimeEnum.noon
# 处理倾向于未来时间的情况
self.prefer_future(3)
self.isAllDayTime = False
match = re.search('(下午)|(午后)|(pm)', self.exp_time, flags=re.I)
if match is not None:
if 0 <= self.tp.unit[3] <= 11:
self.tp.unit[3] += 12
if self.tp.unit[3] == -1:
self.tp.unit[3] = RangeTimeEnum.afternoon
# 处理倾向于未来时间的情况
self.prefer_future(3)
self.isAllDayTime = False
match = re.search('晚上|夜间|夜里|今晚|明晚|晚|夜里', self.exp_time)
if match is not None:
if 0 <= self.tp.unit[3] <= 11:
self.tp.unit[3] += 12
elif self.tp.unit[3] == 12:
self.tp.unit[3] = 0
elif self.tp.unit[3] == -1:
self.tp.unit[3] = RangeTimeEnum.lateNight
# 处理倾向于未来时间的情况
self.prefer_future(3)
self.isAllDayTime = False
[文档] def norm_set_minute(self):
"""该方法识别时间表达式单元的分字段 """
rule = '([0-9]+(?=分(?!钟)))|((?<=((?<!小)[点时]))[0-5]?[0-9](?!刻))'
match = re.search(rule, self.exp_time)
if match is not None:
if match.group():
self.tp.unit[4] = int(match.group())
# 处理倾向于未来时间的情况
self.isAllDayTime = False
# 加对一刻,半,3刻的正确识别(1刻为15分,半为30分,3刻为45分)
rule = '(?<=[点时])[1一]刻(?!钟)'
match = re.search(rule, self.exp_time)
if match is not None:
self.tp.unit[4] = 15
self.isAllDayTime = False
rule = '(?<=[点时])半'
match = re.search(rule, self.exp_time)
if match is not None:
self.tp.unit[4] = 30
self.prefer_future(4)
self.isAllDayTime = False
rule = '(?<=[点时])[3三]刻(?!钟)'
match = re.search(rule, self.exp_time)
if match is not None:
self.tp.unit[4] = 45
self.isAllDayTime = False
[文档] def norm_set_second(self):
"""添加了省略秒说法的时间:如17点15分32"""
rule = '([0-9]+(?=秒))|((?<=分)[0-5]?[0-9])'
match = re.search(rule, self.exp_time)
if match is not None:
self.tp.unit[5] = int(match.group())
self.isAllDayTime = False
[文档] def norm_set_special(self):
"""该方法识别特殊形式的时间表达式单元的各个字段"""
time_re = '([0-2]?[0-9]):[0-5]?[0-9](:[0-5]?[0-9])?'
rule = '(晚上|夜间|夜里|今晚|明晚|晚|夜里|下午|午后)?(?<!(周|星期))' + time_re
match = re.search(rule, self.exp_time)
if match is not None:
match = re.search(time_re, self.exp_time)
tmp_target = match.group()
tmp_parser = tmp_target.split(':')
if 0 <= int(tmp_parser[0]) <= 11:
self.tp.unit[3] = int(tmp_parser[0]) + 12
else:
self.tp.unit[3] = int(tmp_parser[0])
self.tp.unit[4] = int(tmp_parser[1])
if len(tmp_parser) > 2:
self.tp.unit[5] = int(tmp_parser[2])
self.prefer_future(3)
self.isAllDayTime = False
rule = '(?<!(周|星期|晚上|夜间|夜里|今晚|明晚|晚|夜里|下午|午后))' + time_re
match = re.search(rule, self.exp_time)
if match is not None:
tmp_target = match.group()
tmp_parser = tmp_target.split(':')
self.tp.unit[3] = int(tmp_parser[0])
self.tp.unit[4] = int(tmp_parser[1])
if len(tmp_parser) > 2:
self.tp.unit[5] = int(tmp_parser[2])
self.prefer_future(3)
self.isAllDayTime = False
rule = r'([0-9]?[0-9]?[0-9]{2}).(10|11|12|[1-9]).([0-3][0-9]|[1-9])' # 匹配年月日
match = re.search(rule, self.exp_time)
if match is not None:
year, month, day = match.group(1), match.group(2), match.group(3)
self.tp.unit[0] = int(year)
self.tp.unit[1] = int(month)
self.tp.unit[2] = int(day)
[文档] def norm_set_lunar_holiday(self):
"""识别农历节日和时节"""
rule = '|'.join(self.normalizer.lunar_holiday)
rule += '|立春|雨水|惊蛰|春分|清明|谷雨|立夏|小满|芒种|夏至|小暑|大暑|立秋|处暑|白露|秋分|寒露|霜降|立冬|小雪|大雪|冬至|小寒|大寒'
match = re.search(rule, self.exp_time)
if match is not None:
if self.tp.unit[0] == -1:
self.tp.unit[0] = int(self.normalizer.timeBase.split('-')[0])
holiday = match.group()
if holiday in self.normalizer.lunar_holiday:
date = self.normalizer.lunar_holiday[holiday].split('-')
ls_converter = LunarSolarDateConverter()
lunar = LunarDate(self.tp.unit[0], int(date[0]), int(date[1]))
solar = ls_converter.lunar_to_solar(lunar)
self.tp.unit[0] = solar.solarYear
date[0] = solar.solarMonth
date[1] = solar.solarDay
else:
holiday = holiday.strip('节')
if holiday in ['小寒', '大寒']:
self.tp.unit[0] += 1
date = self.china_24_st(self.tp.unit[0], holiday)
self.tp.unit[1] = int(date[0])
self.tp.unit[2] = int(date[1])
[文档] def norm_set_solar_holiday(self):
"""识别阳历节日"""
rule = '|'.join(self.normalizer.solar_holiday)
match = re.search(rule, self.exp_time)
if match is not None:
if self.tp.unit[0] == -1:
self.tp.unit[0] = int(self.normalizer.timeBase.split('-')[0])
holiday = match.group()
if holiday in self.normalizer.solar_holiday:
date = self.normalizer.solar_holiday[holiday].split('-')
self.tp.unit[1] = int(date[0])
self.tp.unit[2] = int(date[1])
[文档] @staticmethod
def china_24_st(year: int, china_st: str):
"""二十世纪和二十一世纪,24节气计算"""
if (19 == year // 100) or (2000 == year):
# 20世纪key值
st_key = [6.11, 20.84, 4.6295, 19.4599, 6.3826, 21.4155, 5.59, 20.888, 6.318, 21.86, 6.5, 22.2, 7.928,
23.65, 8.35, 23.95, 8.44, 23.822, 9.098, 24.218, 8.218, 23.08, 7.9, 22.6]
else:
# 21世纪key值
st_key = [5.4055, 20.12, 3.87, 18.73, 5.63, 20.646, 4.81, 20.1, 5.52, 21.04, 5.678, 21.37, 7.108, 22.83,
7.5, 23.13, 7.646, 23.042, 8.318, 23.438, 7.438, 22.36, 7.18, 21.94]
# 二十四节气字典-- key值, 月份,(特殊年份,相差天数)
solar_terms = {
'小寒': [st_key[0], '1', (2019, -1), (1982, 1)],
'大寒': [st_key[1], '1', (2082, 1)],
'立春': [st_key[2], '2', (None, 0)],
'雨水': [st_key[3], '2', (2026, -1)],
'惊蛰': [st_key[4], '3', (None, 0)],
'春分': [st_key[5], '3', (2084, 1)],
'清明': [st_key[6], '4', (None, 0)],
'谷雨': [st_key[7], '4', (None, 0)],
'立夏': [st_key[8], '5', (1911, 1)],
'小满': [st_key[9], '5', (2008, 1)],
'芒种': [st_key[10], '6', (1902, 1)],
'夏至': [st_key[11], '6', (None, 0)],
'小暑': [st_key[12], '7', (2016, 1), (1925, 1)],
'大暑': [st_key[13], '7', (1922, 1)],
'立秋': [st_key[14], '8', (2002, 1)],
'处暑': [st_key[15], '8', (None, 0)],
'白露': [st_key[16], '9', (1927, 1)],
'秋分': [st_key[17], '9', (None, 0)],
'寒露': [st_key[18], '10', (2088, 0)],
'霜降': [st_key[19], '10', (2089, 1)],
'立冬': [st_key[20], '11', (2089, 1)],
'小雪': [st_key[21], '11', (1978, 0)],
'大雪': [st_key[22], '12', (1954, 1)],
'冬至': [st_key[23], '12', (2021, -1), (1918, -1)]
}
if china_st in ['小寒', '大寒', '立春', '雨水']:
flag_day = int((year % 100) * 0.2422 + solar_terms[china_st][0]) - int((year % 100 - 1) / 4)
else:
flag_day = int((year % 100) * 0.2422 + solar_terms[china_st][0]) - int((year % 100) / 4)
# 特殊年份处理
for special in solar_terms[china_st][2:]:
if year == special[0]:
flag_day += special[1]
break
return (solar_terms[china_st][1]), str(flag_day)
def _set_time(self, rule, cur, flag, index, **kwargs):
"""修改年月日
:param rule: 修改规则
:param cur: 日期
:param flag: 标记
:param index: 标记索引
:param kwargs: 改变时间的参数
:return: 时间
"""
match = re.search(rule, self.exp_time)
if match is not None:
flag[index] = True
cur = cur.shift(**kwargs)
return cur
[文档] def get_date(self):
"""获取当前日期,并转为标准日期格式"""
if self.tp.unit[0] > 0 and self.tp.unit[1] > 0 and self.tp.unit[2] > 0:
cur = arrow.get(jtyoui.join('-', self.tp.unit[0:3]), 'YYYY-M-D')
else:
cur = arrow.get(self.normalizer.timeBase, 'YYYY-M-D')
return cur
[文档] def modify_time_base(self):
"""该方法用于更新timeBase使之具有上下文关联性"""
if not self.normalizer.isTimeSpan:
if 30 <= self.tp.unit[0] < 100:
self.tp.unit[0] += 1900
if 0 < self.tp.unit[0] < 30:
self.tp.unit[0] += 2000
time_grid = self.normalizer.timeBase.split('-')
arr = []
for i in range(0, 6):
if self.tp.unit[i] == -1:
arr.append(str(time_grid[i]))
else:
arr.append(str(self.tp.unit[i]))
self.normalizer.timeBase = '-'.join(arr)
[文档] def prefer_future_week(self, weekday, cur):
"""预测下一个周的时间
:param weekday: 星期几
:param cur: 当前时间
:return: 预测的时间
"""
# 1. 确认用户选项
if not self.normalizer.isPreferFuture:
return cur
# 2. 检查被检查的时间级别之前,是否没有更高级的已经确定的时间,如果有,则不进行处理.
for i in range(0, 2):
if self.tp.unit[i] != -1:
return cur
# 获取当前是在周几,如果识别到的时间小于当前时间,则识别时间为下一周
tmp = arrow.get(self.normalizer.timeBase, 'YYYY-M-D-H-m-s')
week = tmp.weekday()
if week > weekday:
cur = cur.shift(days=7)
return cur
[文档] def prefer_future(self, check_time_index):
"""如果用户选项是倾向于未来时间,检查check_time_index所指的时间是否是过去的时间,如果是的话,将大一级的时间设为当前时间的+1。
如在晚上说“早上8点看书”,则识别为明天早上;
12月31日说3号买菜,则识别为明年1月的3号。
"""
# 1. 检查被检查的时间级别之前,是否没有更高级的已经确定的时间,如果有,则不进行处理.
for i in range(0, check_time_index):
if self.tp.unit[i] != -1:
return
# 2. 根据上下文补充时间
self.check_context_time(check_time_index)
# 3. 根据上下文补充时间后再次检查被检查的时间级别之前,是否没有更高级的已经确定的时间,如果有,则不进行倾向处理.
for i in range(0, check_time_index):
if self.tp.unit[i] != -1:
return
# 4. 确认用户选项
if not self.normalizer.isPreferFuture:
return
# 5. 获取当前时间,如果识别到的时间小于当前时间,则将其上的所有级别时间设置为当前时间,并且其上一级的时间步长+1
time_arr = self.normalizer.timeBase.split('-')
cur = arrow.get(self.normalizer.timeBase, 'YYYY-M-D-H-m-s')
cur_unit = int(time_arr[check_time_index])
if self.tp.unit[0] == -1:
self._no_year = True
else:
self._no_year = False
if cur_unit < self.tp.unit[check_time_index]:
return
# 准备增加的时间单位是被检查的时间的上一级,将上一级时间+1
cur = self.add_time(cur, check_time_index - 1)
time_arr = cur.format('YYYY-M-D-H-m-s').split('-')
for i in range(0, check_time_index):
self.tp.unit[i] = int(time_arr[i])
def _check_time(self, parse):
"""检查未来时间点"""
time_arr = self.normalizer.timeBase.split('-')
if self._no_year:
if parse[1] == int(time_arr[1]):
if parse[2] > int(time_arr[2]):
parse[0] -= 1
self._no_year = False
[文档] def check_context_time(self, check_time_index):
"""根据上下文时间补充时间信息"""
for i in range(0, check_time_index):
if self.tp.unit[i] == -1 and self.tp_origin.unit[i] != -1:
self.tp.unit[i] = self.tp_origin.unit[i]
# 在处理小时这个级别时,如果上文时间是下午的且下文没有主动声明小时级别以上的时间,则也把下文时间设为下午
if self.isFirstTimeSolveContext is True and check_time_index == 3 and \
self.tp_origin.unit[check_time_index] >= 12 and self.tp.unit[check_time_index] < 12:
self.tp.unit[check_time_index] += 12
self.isFirstTimeSolveContext = False
[文档] @staticmethod
def add_time(cur, fore_unit: int):
"""修改日期
:param cur: 当前日期
:param fore_unit: 修改属性
:return: 修改好的日期
"""
if fore_unit == 0:
cur = cur.shift(years=1)
elif fore_unit == 1:
cur = cur.shift(months=1)
elif fore_unit == 2:
cur = cur.shift(days=1)
elif fore_unit == 3:
cur = cur.shift(hours=1)
elif fore_unit == 4:
cur = cur.shift(minutes=1)
elif fore_unit == 5:
cur = cur.shift(seconds=1)
return cur