jtyoui.tools.tool 源代码

#!/usr/bin/python3.7
# -*- coding: utf-8 -*-
# @Time  : 2019/4/12 13:13
# @Author: Jtyoui@qq.com
import re
from jtyoui.error import InconsistentLengthError
import operator


[文档]class Tool: """自定义工具类 >>> tool = Tool('我家在贵州省遵义县的一个地方是虾子') >>> i_s = tool.index_select_string('01056666600000056', '56+') >>> print(i_s) >>> tool.string = '我在这里、一、相亲最大的好处是。二、想要什么婚姻。三五、开放型的婚姻是凉鞋。' >>> t_s = tool.split('[一二三四五六七八九十]+、', retain=False) >>> print(t_s) >>> tool.string = '我家在贵州省遵义县的一个地方是虾子' >>> s_i = tool.string_select_index(ls=['贵州省', '遵义县', '虾子'], start_name='5', end_name='6') >>> print(s_i) >>> d = [[1, 2, 3],[1, 0, -1],[0, 1, 1]] >>> print(tool.select_row(d, 1)) # [2, 0, 1] >>> tool.generator = False >>> print(tool.select_ls(['遵义县', '虾子'])) >>> tool.string = '9994599945545599945' >>> ts = tool.search('(45+)+') >>> print(ts.start(), ts.end(), ts.value()) >>> print(tool.string) >>> tool.string = 'are you fuck!' >>> print(tool.replace('[0-9a-zA-Z]', '')) """ generator = True def __init__(self, string): self._string = string
[文档] def index_select_string(self, index, select): """利用索引的关系来标记字符串 利用索引的关系来找字符串:一般用在深度学习中的标注模型 :param index: 索引 :param select: 索引匹配的正则 :return: 匹配字符串列表 """ if len(index) != len(self._string): raise InconsistentLengthError("参数index和参数string长度不一致错误!") rf = re.finditer(select, index) return [self._string[r.start():r.end()] for r in rf]
@property def string(self): """更新字符串""" return self._string @string.setter def string(self, string): self._string = string
[文档] def split(self, re_, flag=0, retain=True): """支持正则分割 :param re_: 正则表达式 :param flag: re.search(re_, self.string, flag), 默认flag=0 :param retain: 是否要保留正则匹配的字符,默认是保留 """ ls_word = re.split(pattern=re_, string=self._string, flags=flag) if retain: rs = re.finditer(pattern=re_, string=self._string, flags=flag) for index, r in enumerate(rs, start=1): ls_word[index] = r.group() + ls_word[index] return ls_word
[文档] def string_select_index(self, ls, start_name, end_name, flag='O', labels=None): """将一段文字进行标记返回标记的列表 :param ls: 标记的关键字列表 :param start_name: 开始标记的名称 :param end_name: 连续标记的名称 :param flag: 不在关键字列表中默认标记,默认是O,大写的o :param labels: 自定义标记 """ if not labels: labels = [flag for _ in range(len(self._string))] for word in ls: index = 0 while True: start = self._string.find(word, index) if start > -1: end = start + len(word) labels[start] = start_name for i in range(start + 1, end): labels[i] = end_name index = end else: break return labels
[文档] def select_ls(self, ls_): """根据列表里面的元素选取字符串中的元素 :param ls_: 列表元素,比如['张三','李四','王麻子'],string='张三去李四家找东西' :return: ['张三','李四'] """ if self.generator: return (name for name in ls_ if self._string.find(name) > 0) else: return [name for name in ls_ if self._string.find(name) > 0]
[文档] def select_row(self, iterable_, row): """选取可迭代对象中的某一列 :param iterable_: 可迭代对象 :param row: 每一列 """ g = operator.itemgetter(row) if self.generator: return (g(i) for i in iterable_) else: return [g(i) for i in iterable_]
[文档] def search(self, pattern, flags=0): """根据正则获取字符串的索引以及值,索引和值都是list类型""" r = re.search(pattern, self.string, flags=flags) start, end, value = [], [], [] flag = '' while r: start_ = r.start() + len(flag) end_ = r.end() + len(flag) start.append(start_) end.append(end_) value.append(self.string[start_:end_]) flag = self.string[:end_] string = self.string[end_:] r = re.search(pattern, string, flags=flags) class _: @staticmethod def start(): return start @staticmethod def end(): return end @staticmethod def value(): return value return _()
[文档] def replace(self, pattern, repl, count=0, flags=0): """正则替换""" return re.sub(pattern, repl, self.string, count=count, flags=flags)
if __name__ == '__main__': tool = Tool('我家在贵州省遵义县的一个地方是虾子') i_s = tool.index_select_string('01056666600000056', '56+') print(i_s) tool.string = '我在这里、一、相亲最大的好处是。二、想要什么婚姻。三五、开放型的婚姻是凉鞋。' t_s = tool.split('[一二三四五六七八九十]+、', retain=False) print(t_s) tool.string = '我家在贵州省遵义县的一个地方是虾子' s_i = tool.string_select_index(ls=['贵州省', '遵义县', '虾子'], start_name='5', end_name='6') print(s_i) d = [ [1, 2, 3], [1, 0, -1], [0, 1, 1] ] print(tool.select_row(d, 1)) # [2, 0, 1] tool.generator = False print(tool.select_ls(['遵义县', '虾子'])) tool.string = '9994599945545599945' ts = tool.search('(45+)+') print(ts.start(), ts.end(), ts.value()) print(tool.string) tool.string = 'are you fuck!' print(tool.replace('[0-9a-zA-Z]', ''))