diff options
| author | Bobby <[email protected]> | 2022-09-01 00:33:14 -0400 |
|---|---|---|
| committer | Bobby <[email protected]> | 2022-09-01 00:33:14 -0400 |
| commit | 9c6c9c4570a0c583d93e197d3414c35315afa5fb (patch) | |
| tree | 5aeebbd3a5b843f8e3d34a5e44d1e01901b5d920 /src | |
| parent | 45b7045187a955c1957fab08c11173003f89a141 (diff) | |
| download | edify-9c6c9c4570a0c583d93e197d3414c35315afa5fb.tar.xz edify-9c6c9c4570a0c583d93e197d3414c35315afa5fb.zip | |
Rewriting builder based on Super Expressive
Diffstat (limited to 'src')
| -rw-r--r-- | src/edify/builder/builder.py | 298 | ||||
| -rw-r--r-- | src/edify/builder/errors.py | 38 | ||||
| -rw-r--r-- | src/edify/builder/helpers/core.py | 33 | ||||
| -rw-r--r-- | src/edify/builder/helpers/quantifiers.py | 11 | ||||
| -rw-r--r-- | src/edify/builder/helpers/regex_vars.py | 1 | ||||
| -rw-r--r-- | src/edify/builder/helpers/t.py | 48 |
6 files changed, 420 insertions, 9 deletions
diff --git a/src/edify/builder/builder.py b/src/edify/builder/builder.py index 5c10288..d22ff24 100644 --- a/src/edify/builder/builder.py +++ b/src/edify/builder/builder.py @@ -1,16 +1,296 @@ -from .abc import Builder +import re +from sys import _current_frames +from .helpers.core import create_stack_frame, assertion, escape_special +from .helpers.t import t +from .helpers.regex_vars import named_group_regex +from .errors import * + +class RegexBuilder: + """Regular Expression Builder Class. + """ + + state = {} -class RegexBuilder(Builder): def __init__(self): - self.parts = [] + self.state = { + 'has_defined_start': False, + 'has_defined_end': False, + 'flags': { + 'g': False, + 'y': False, + 'm': False, + 'i': False, + 'u': False, + 's': False, + }, + 'stack': create_stack_frame(t['root']), + 'named_groups': [], + 'total_capture_groups': 0, + } + + + def allow_multiple_matches(self): + self.state['stack']['flags']['g'] = True + + + def sticky(self): + self.state['stack']['flags']['y'] = True + + + def line_by_line(self): + self.state['stack']['flags']['m'] = True + + + def case_insensitive(self): + self.state['stack']['flags']['i'] = True + + + def unicode(self): + self.state['stack']['flags']['u'] = True + + + def single_line(self): + self.state['stack']['flags']['s'] = True - def build(self): - return "".join(map(str, self.parts)) - def add(self, part): - if not (issubclass(type(part), Builder) or isinstance(part, str)): - raise ValueError(f"{part} is not a valid Part") + def get_current_frame(self): + return self.state['stack'] - self.parts.append(part) + + def get_current_element_array(self): + return self.get_current_frame()['elements'] + + + def match_element(self, type_fn): + current_element_array = self.get_current_element_array() + current_element_array.append(self.apply_quantifier(type_fn)) + return self + + + def apply_quantifier(self, element): + current_frame = self.get_current_frame() + if current_frame['quantifier'] is not None: + wrapped = current_frame['quantifier']['value'](element) + current_frame['quantifier'] = None + return wrapped + return element + + + def frame_creating_element(self, type_fn): + new_frame = create_stack_frame(type_fn) + self.state['stack'] = new_frame return self + + + def tracked_named_group(self, name): + assertion(type(name) is str, must_be_a_string("Name", type(name))) + assertion(len(name) > 0, must_be_one_character("Name")) + assertion(name not in self.state['named_groups'], cannot_create_duplicate_named_group(name)) + assertion(re.compile(named_group_regex, re.I).match(name), name_not_valid(name)) + self.state['named_groups'].append(name) + + + def capture(self): + new_frame = create_stack_frame(t['capture']) + self.state['stack'] = new_frame + self.state['total_capture_groups'] += 1 + return self + + + def named_capture(self, name): + new_frame = create_stack_frame(t['named_capture'](name)) + self.tracked_named_group(name) + self.state['stack'] = new_frame + self.state['total_capture_groups'] += 1 + return self + + + def quantifier_element(self, type_fn): + current_frame = self.get_current_frame() + if current_frame['quantifier'] is not None: + raise Exception(unable_to_quantify(type_fn, current_frame['quantifier']['type'])) + current_frame['quantifier'] = t[type_fn] + return self + + + def any_char(self): + return self.match_element(t['any_char']) + + + def whitespace_char(self): + return self.match_element(t['whitespace_char']) + + + def non_whitespace_char(self): + return self.match_element(t['non_whitespace_char']) + + + def digit(self): + return self.match_element(t['digit']) + + + def non_digit(self): + return self.match_element(t['non_digit']) + + + def word(self): + return self.match_element(t['word']) + + + def non_word(self): + return self.match_element(t['non_word']) + + + def word_boundary(self): + return self.match_element(t['word_boundary']) + + + def non_word_boundary(self): + return self.match_element(t['non_word_boundary']) + + + def new_line(self): + return self.match_element(t['new_line']) + + + def carriage_return(self): + return self.match_element(t['carriage_return']) + + + def tab(self): + return self.match_element(t['tab']) + + + def null_byte(self): + return self.match_element(t['null_byte']) + + + def named_back_reference(self, name): + assertion(name in self.state['named_groups'], named_group_does_not_exist(name)) + return self.match_element(t['named_back_reference'](name)) + + + def back_reference(self, index: int): + assertion(type(index) is int, 'Index must be an integer.') + assertion( + index > 0 and index <= self.state['total_capture_groups'], + invalid_total_capture_groups_index(index, self.state['total_capture_groups']) + ) + return self.match_element(t['back_reference'](index)) + + + def any_of(self): + return self.frame_creating_element(t['any_of']) + + + def group(self): + return self.frame_creating_element(t['group']) + + + def assert_ahead(self): + return self.frame_creating_element(t['assert_ahead']) + + + def assert_not_ahead(self): + return self.frame_creating_element(t['assert_not_ahead']) + + + def assert_behind(self): + return self.frame_creating_element(t['assert_behind']) + + + def assert_not_behind(self): + return self.frame_creating_element(t['assert_not_behind']) + + + def optional(self): + return self.quantifier_element('optional') + + + def zero_or_more(self): + return self.quantifier_element('zero_or_more') + + + def zero_or_more_lazy(self): + return self.quantifier_element('zero_or_more_lazy') + + + def one_or_more(self): + return self.quantifier_element('one_or_more') + + + def one_or_more_lazy(self): + return self.quantifier_element('one_or_more_lazy') + + + def exactly(self, count): + assertion(type(count) is int and count > 0, must_be_positive_integer('count')) + current_frame = self.get_current_frame() + if current_frame['quantifier'] is not None: + raise Exception(unable_to_quantify("exactly", current_frame['quantifier']['type'])) + current_frame['quantifier'] = t['exactly'](count) + return self + + + def at_least(self, count): + assertion(type(count) is int and count > 0, must_be_positive_integer('count')) + current_frame = self.get_current_frame() + if current_frame['quantifier'] is not None: + raise Exception(unable_to_quantify("at_least", current_frame['quantifier']['type'])) + current_frame['quantifier'] = t['at_least'](count) + return self + + + def between(self, x, y): + assertion(type(x) is int and x >= 0, must_be_integer_greater_than_zero('x')) + assertion(type(y) is int and y > 0, must_be_positive_integer('y')) + assertion(x < y, 'X must be less than Y.') + current_frame = self.get_current_frame() + if current_frame['quantifier'] is not None: + raise Exception(unable_to_quantify("between", current_frame['quantifier']['type'])) + current_frame['quantifier'] = t['between'](x, y) + return self + + + def between_lazy(self, x, y): + assertion(type(x) is int and x >= 0, must_be_integer_greater_than_zero('x')) + assertion(type(y) is int and y > 0, must_be_positive_integer('y')) + assertion(x < y, 'X must be less than Y.') + current_frame = self.get_current_frame() + if current_frame['quantifier'] is not None: + raise Exception(unable_to_quantify("between_lazy", current_frame['quantifier']['type'])) + current_frame['quantifier'] = t['between_lazy'](x, y) + return self + + def start_of_input(self): + assertion(self.state['has_defined_start'] is False, start_input_already_defined()) + assertion(self.state['has_defined_end'] is False, cannot_define_start_after_end()) + self.state['has_defined_start'] = True + current_element_array = self.get_current_element_array() + current_element_array.append(t['start_of_input']) + return self + + + def end_of_input(self): + assertion(self.state['has_defined_end'] is False, end_input_already_defined()) + self.state['has_defined_end'] = True + current_element_array = self.get_current_element_array() + current_element_array.append(t['end_of_input']) + return self + + + def any_of_chars(self, chars): + element_value = t['any_of_chars'](escape_special(chars)) + current_frame = self.get_current_frame() + current_frame['elements'].append(self.apply_quantifier(element_value)) + return self + + def end(self): + assertion(len(self.state['stack']) > 1, can_not_end_while_building_root_exp()) + old_frame = self.state['stack'].pop() + current_frame = self.get_current_frame() + current_frame['elements'].append(self.apply_quantifier(old_frame['type']['value'](old_frame['elements']))) + return self + + diff --git a/src/edify/builder/errors.py b/src/edify/builder/errors.py new file mode 100644 index 0000000..27258cd --- /dev/null +++ b/src/edify/builder/errors.py @@ -0,0 +1,38 @@ +def must_be_a_string(value, variable_name): + return '{} must be a string. (got {})'.format(value, type(variable_name)) + +def must_be_one_character(variable_name): + return '{} must be one character long.'.format(variable_name) + +def cannot_create_duplicate_named_group(name): + return 'Can not create duplicate named group "{}".'.format(name) + +def name_not_valid(name): + return 'Name {} is not valid. (only alphanumeric characters and underscores are allowed)'.format(name) + +def named_group_does_not_exist(name): + return 'Named group "{}" does not exist (create one with .named_capture()).'.format(name) + +def invalid_total_capture_groups_index(index, total_capture_groups): + return 'Invalid index #{}. There are only {} capture groups.'.format(index, total_capture_groups) + +def must_be_positive_integer(variable_name): + return '{} must be a positive integer.'.format(variable_name) + +def must_be_integer_greater_than_zero(variable_name): + return '{} must be an integer greater than zero.'.format(variable_name) + +def unable_to_quantify(quantifier, type): + return 'Can not quantify regular expression with {}, because it has already been quantified with {}.'.format(quantifier, type) + +def start_input_already_defined(): + return 'Regex already has a start of input.' + +def cannot_define_start_after_end(): + return 'Can not define a start of input after defining an end of input.' + +def end_input_already_defined(): + return 'Regex already has an end of input.' + +def can_not_end_while_building_root_exp(): + return 'Can not end while building the root expression.' diff --git a/src/edify/builder/helpers/core.py b/src/edify/builder/helpers/core.py new file mode 100644 index 0000000..fa775ff --- /dev/null +++ b/src/edify/builder/helpers/core.py @@ -0,0 +1,33 @@ +import re +def as_type(type, options={}): + def as_type_fn(value=None): + return { + 'type': type, + 'value': value, + 'options': options, + } + return as_type_fn + + +def deferred_type(type, options = {}): + type_fn = as_type(type, options) + return type_fn(type_fn) + + +def create_stack_frame(type): + return { + 'type': type, + 'quantifier': None, + 'elements': [], + } + + +def assertion(condition, message): + if not condition: + raise Exception(message) + +# const escapeSpecial = s => specialChars.reduce((acc, char) => replaceAll(acc, char, `\\${char}`), s); +def escape_special(s): + return re.escape(s) + + diff --git a/src/edify/builder/helpers/quantifiers.py b/src/edify/builder/helpers/quantifiers.py new file mode 100644 index 0000000..1810658 --- /dev/null +++ b/src/edify/builder/helpers/quantifiers.py @@ -0,0 +1,11 @@ +quantifier_table = { + 'one_or_more': '+', + 'one_or_more_lazy': '+?', + 'zero_or_more': '*', + 'zero_or_more_lazy': '*?', + 'optional': '?', + 'exactly': lambda times: '{{{}}}'.format(times), + 'at_least': lambda times: '{{{},}}'.format(times), + 'between': lambda times: '{{{},{}}}'.format(times[0], times[1]), + 'between_lazy': lambda times: '{{{},{}}}?'.format(times[0], times[1]), +} diff --git a/src/edify/builder/helpers/regex_vars.py b/src/edify/builder/helpers/regex_vars.py new file mode 100644 index 0000000..4d7f17c --- /dev/null +++ b/src/edify/builder/helpers/regex_vars.py @@ -0,0 +1 @@ +named_group_regex=r"^[a-z]+\w*$" diff --git a/src/edify/builder/helpers/t.py b/src/edify/builder/helpers/t.py new file mode 100644 index 0000000..8ac3d7d --- /dev/null +++ b/src/edify/builder/helpers/t.py @@ -0,0 +1,48 @@ +from .core import as_type, deferred_type + +t = { + 'root': as_type('root')(), + 'noop': as_type('noop')(), + 'start_of_input': as_type('start_of_input')(), + 'end_of_input': as_type('end_of_input')(), + 'any_char': as_type('any_char')(), + 'whitespace_char': as_type('whitespace_char')(), + 'non_whitespace_char': as_type('non_whitespace_char')(), + 'digit': as_type('digit')(), + 'non_digit': as_type('non_digit')(), + 'word': as_type('word')(), + 'non_word': as_type('non_word')(), + 'word_boundary': as_type('word_boundary')(), + 'non_word_boundary': as_type('non_word_boundary')(), + 'new_line': as_type('new_line')(), + 'carriage_return': as_type('carriage_return')(), + 'tab': as_type('tab')(), + 'null_byte': as_type('null_byte')(), + 'any_of_chars': as_type('any_of_chars'), + 'anything_but_string': as_type('anything_but_string'), + 'anything_but_chars': as_type('anything_but_chars'), + 'anything_but_range': as_type('anything_but_range'), + 'char': as_type('char'), + 'range': as_type('range'), + 'string': as_type('string', {'quantifiers_require_group': True}), + 'named_back_reference': lambda name: deferred_type('named_back_reference', {'name': name}), + 'back_reference': lambda index: deferred_type('back_reference', {'index': index}), + 'capture': deferred_type('capture', {'contains_children': True}), + 'sub_expression': as_type('sub_expression', {'contains_children': True, 'quantifiers_require_group': True}), + 'named_capture': lambda name: deferred_type('named_capture', {'name': name, 'contains_children': True}), + 'group': deferred_type('group', {'contains_children': True}), + 'any_of': deferred_type('any_of', {'contains_children': True}), + 'assert_ahead': deferred_type('assert_ahead', {'contains_children': True}), + 'assert_not_ahead': deferred_type('assert_not_ahead', {'contains_children': True}), + 'assert_behind': deferred_type('assert_behind', {'contains_children': True}), + 'assert_not_behind': deferred_type('assert_not_behind', {'contains_children': True}), + 'exactly': lambda times: deferred_type('exactly', {'times': times, 'contains_child': True}), + 'at_least': lambda times: deferred_type('at_least', {'times': times, 'contains_child': True}), + 'between': lambda x, y: deferred_type('between', {'times': [x, y], 'contains_child': True}), + 'between_lazy': lambda x, y: deferred_type('between_lazy', {'times': [x, y], 'contains_child': True}), + 'zero_or_more': deferred_type('zero_or_more', {'contains_child': True}), + 'zero_or_more_lazy': deferred_type('zero_or_more_lazy', {'contains_child': True}), + 'one_or_more': deferred_type('one_or_more', {'contains_child': True}), + 'one_or_more_lazy': deferred_type('one_or_more_lazy', {'contains_child': True}), + 'optional': deferred_type('optional', {'contains_child': True}), +} |
