jtyoui.ee.entityextract 源代码

#!/usr/bin/python3.7
# -*- coding: utf-8 -*-
# @Time  : 2019/11/25 14:19
# @Author: Jtyoui@qq.com
import jtyoui
import re
import os


[文档]class EntityExtraction: """信息抽取""" def __init__(self, sentence: str, model_path: str = None): self.sentence = sentence self.num = None self.model_path = model_path if self.model_path and os.path.exists(self.model_path): self._st = self._load_model() self._load() def _load_model(self): from jtyoui.neuralNetwork import ernie_st, ernie_match self.ernie_match = ernie_match return ernie_st(self.model_path) def _nn(self, r): """根据正则去获取实体""" ls = self.num word = [] if ls: ls = jtyoui.join('', ls) for index in re.finditer(r, ls): word.append(self.sentence[index.start():index.end()]) return word @property def people(self): """提取人名""" return self._nn('[01]+') @property def address(self): """提取地址""" return self._nn('[45]+') @property def org(self): """提取机构名""" return self._nn('[23]+') def _load(self): """抽取信息""" self.num = self.ernie_match(self.sentence, self._st) @property def time(self): """提取时间""" p = jtyoui.ParseTime(self.sentence) return p.find_times() @property def re_num(self): """提取数字""" return re.findall(r'\d+', self.sentence) @property def car_plate(self): """提取车牌号码""" return jtyoui.plate_number(self.sentence) @property def phone(self): """提取手机号码""" t = jtyoui.telephone_number_matching_verification(jtyoui.ALL_Mobile_Data_Network_Card_RE, self.sentence) return [i for i in t] @property def re_card(self): """提取身份证号码""" cards = [] for i in re.findall(r'\d{18}|\d{17}[Xx]|\d{15}', self.sentence): try: jtyoui.check_id_card(i) cards.append(i) except jtyoui.IdCardCheckError: ... except AssertionError: ... return cards
[文档] def set_sentence(self, sentence): """从定义语句""" self.sentence = sentence if self.model_path: self._load()
@property def sentences(self): return self.sentence @sentences.setter def sentences(self, sentence): self.set_sentence(sentence)
if __name__ == '__main__': ee = EntityExtraction( '李斯从金阳世纪城打到中天铭廷,他的车牌是:贵AU8080。并且他的电话是:15180864970,身份证号码是:522121193702157024,时间是昨天下午2点半,他在花溪公园玩耍', model_path='D://model') print(ee.time) print(ee.address) print(ee.car_plate) print(ee.org) print(ee.people) print(ee.phone) print(ee.re_card) print(ee.re_num) ee.sentence = '今天' print(ee.time)