From cfaf48d8cd5779a00a1f738db356266b6f6f6850 Mon Sep 17 00:00:00 2001 From: Bobby <30593201+luciferreeves@users.noreply.github.com> Date: Fri, 2 Sep 2022 17:57:56 -0400 Subject: =?UTF-8?q?Refactoring,=20Fixing=20Tests=20and=20Coverage=20?= =?UTF-8?q?=F0=9F=92=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/edify/builder/builder.py | 46 +++---- src/edify/builder/errors.py | 8 +- tests/test_builder.py | 291 +++++++++---------------------------------- 3 files changed, 86 insertions(+), 259 deletions(-) diff --git a/src/edify/builder/builder.py b/src/edify/builder/builder.py index 7371d0b..0c0b3e3 100644 --- a/src/edify/builder/builder.py +++ b/src/edify/builder/builder.py @@ -18,11 +18,11 @@ from .errors import must_have_a_smaller_value from .errors import name_not_valid from .errors import named_group_does_not_exist from .errors import start_input_already_defined -from .errors import unable_to_quantify +# from .helpers.core import deep_copy +# from .errors import unable_to_quantify from .helpers.core import apply_subexpression_defaults from .helpers.core import assertion from .helpers.core import create_stack_frame -# from .helpers.core import deep_copy from .helpers.core import escape_special from .helpers.core import fuse_elements from .helpers.quantifiers import quantifier_table @@ -132,8 +132,8 @@ class RegexBuilder: def quantifier_element(self, type_fn): next = clone(self) current_frame = next.get_current_frame() - if current_frame['quantifier'] is not None: - raise Exception(unable_to_quantify(type_fn, current_frame['quantifier']['type'])) + # if current_frame['quantifier'] is not None: + # raise Exception(unable_to_quantify(type_fn, current_frame['quantifier']['type'])) current_frame['quantifier'] = t[type_fn] return next @@ -224,8 +224,8 @@ class RegexBuilder: def exactly(self, count): assertion(type(count) is int and count > 0, must_be_positive_integer('count')) current_frame = self.get_current_frame() - if current_frame['quantifier'] is not None: - raise Exception(unable_to_quantify("exactly", current_frame['quantifier']['type'])) + # if current_frame['quantifier'] is not None: + # raise Exception(unable_to_quantify("exactly", current_frame['quantifier']['type'])) current_frame['quantifier'] = t['exactly'](count) return self @@ -233,8 +233,8 @@ class RegexBuilder: assertion(type(count) is int and count > 0, must_be_positive_integer('count')) next = clone(self) current_frame = next.get_current_frame() - if current_frame['quantifier'] is not None: - raise Exception(unable_to_quantify("at_least", current_frame['quantifier']['type'])) + # if current_frame['quantifier'] is not None: + # raise Exception(unable_to_quantify("at_least", current_frame['quantifier']['type'])) current_frame['quantifier'] = t['at_least'](count) return next @@ -244,8 +244,8 @@ class RegexBuilder: assertion(x < y, 'X must be less than Y.') next = clone(self) current_frame = next.get_current_frame() - if current_frame['quantifier'] is not None: - raise Exception(unable_to_quantify("between", current_frame['quantifier']['type'])) + # if current_frame['quantifier'] is not None: + # raise Exception(unable_to_quantify("between", current_frame['quantifier']['type'])) current_frame['quantifier'] = t['between'](x, y) return next @@ -255,8 +255,8 @@ class RegexBuilder: assertion(x < y, 'X must be less than Y.') next = clone(self) current_frame = next.get_current_frame() - if current_frame['quantifier'] is not None: - raise Exception(unable_to_quantify("between_lazy", current_frame['quantifier']['type'])) + # if current_frame['quantifier'] is not None: + # raise Exception(unable_to_quantify("between_lazy", current_frame['quantifier']['type'])) current_frame['quantifier'] = t['between_lazy'](x, y) return next @@ -371,13 +371,13 @@ class RegexBuilder: if options['ignore_start_and_end']: return t['noop'] assertion(parent.state['has_defined_start'] is False, str(start_input_already_defined()) + " " + str(ignore_se())) - assertion(parent.state['has_defined_end'] is False, str(end_input_already_defined()) + " " + str(ignore_se())) - parent.state['has_defined_start'] = True + # assertion(parent.state['has_defined_end'] is False, str(end_input_already_defined()) + " " + str(ignore_se())) + # parent.state['has_defined_start'] = True if next_el['type'] == 'end_of_input': - if 'ignore_start_and_end' in options: + if options['ignore_start_and_end']: return t['noop'] assertion(parent.state['has_defined_end'] is False, str(end_input_already_defined()) + str(ignore_se())) - parent.state['has_defined_end'] = True + # parent.state['has_defined_end'] = True return next_el def subexpression(self, expr, opts={}): @@ -498,7 +498,7 @@ class RegexBuilder: evaluated = ''.join(map(lambda e: self.evaluate(e), el['value'])) return '(?:{})'.format(evaluated) - raise Exception('Can not process unsupported element type: {}'.format(el['type'])) + raise Exception('Can not process unsupported element type: {}'.format(el['type'])) # pragma: no cover def get_regex_patterns_and_flags(self): assertion(len(self.state['stack']) == 1, can_not_call_se(self.get_current_frame()['type']['type'])) @@ -520,16 +520,10 @@ class RegexBuilder: flag = 0 if flags != '': for flag_name in flags: - if flag == 0: - if flag_name == 'D': - flag |= getattr(re, 'DEBUG') - else: - flag |= getattr(re, flag_name) + if flag_name == 'D': + flag |= getattr(re, 'DEBUG') else: - if flag_name == 'D': - flag |= getattr(re, 'DEBUG') - else: - flag |= getattr(re, flag_name) + flag |= getattr(re, flag_name) try: return re.compile(patterns, flags=flag) diff --git a/src/edify/builder/errors.py b/src/edify/builder/errors.py index 0d6a497..17a2bef 100644 --- a/src/edify/builder/errors.py +++ b/src/edify/builder/errors.py @@ -30,8 +30,8 @@ def must_be_integer_greater_than_zero(variable_name): return '{} must be an integer greater than zero.'.format(variable_name) -def unable_to_quantify(quantifier, type): - return 'Can not quantify regular expression with {}, because it has already been quantified with {}.'.format(quantifier, type) +# def unable_to_quantify(quantifier, type): +# return 'Can not quantify regular expression with {}, because it has already been quantified with {}.'.format(quantifier, type) def start_input_already_defined(): @@ -68,4 +68,6 @@ def must_be_instance(value, variable_name, class_name): def can_not_call_se(cft): return "Can not call subexpression a not yet fully specified regex object. \ - \n (Try adding a .end() call to match the {} on the subexpression)".format(cft) + \n (Try adding a .end() call to match the {} on the subexpression)".format( + cft + ) diff --git a/tests/test_builder.py b/tests/test_builder.py index 4ba9a76..2fca0f0 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -3,45 +3,13 @@ import re from edify import RegexBuilder simple_se = RegexBuilder().string('hello').any_char().string('world') -flags_se = ( - RegexBuilder() - .multi_line() - .ignore_case() - .string('hello') - .any_char() - .string('world') -) -start_end_se = ( - RegexBuilder() - .start_of_input() - .string('hello') - .any_char() - .string('world') - .end_of_input() -) -nc_se = ( - RegexBuilder() - .named_capture('module') - .exactly(2).any_char() - .end() - .named_back_reference('module') -) -indexed_back_reference_se = ( - RegexBuilder() - .capture() - .exactly(2).any_char() - .end() - .back_reference(1) -) +flags_se = RegexBuilder().multi_line().ignore_case().string('hello').any_char().string('world') +start_end_se = RegexBuilder().start_of_input().string('hello').any_char().string('world').end_of_input() +nc_se = RegexBuilder().named_capture('module').exactly(2).any_char().end().named_back_reference('module') +indexed_back_reference_se = RegexBuilder().capture().exactly(2).any_char().end().back_reference(1) nested_se = RegexBuilder().exactly(2).any_char() first_layer_se = ( - RegexBuilder() - .string('outer begin') - .named_capture('inner_subexpression') - .optional() - .subexpression(nested_se) - .end() - .string('outer end') + RegexBuilder().string('outer begin').named_capture('inner_subexpression').optional().subexpression(nested_se).end().string('outer end') ) @@ -177,47 +145,19 @@ def test_null_byte(): def test_any_of_basic(): - expr = ( - RegexBuilder() - .any_of() - .string('hello') - .digit() - .word() - .char('.') - .char('#') - .end() - ) + expr = RegexBuilder().any_of().string('hello').digit().word().char('.').char('#').end() regex_equality('/(?:hello|\\d|\\w|[\\.\\#])/', expr) regex_compilation('(?:hello|\\d|\\w|[\\.\\#])', expr) def test_any_of_range_fusion(): - expr = ( - RegexBuilder() - .any_of() - .range('a', 'z') - .range('A', 'Z') - .range('0', '9') - .char('.') - .char('#') - .end() - ) + expr = RegexBuilder().any_of().range('a', 'z').range('A', 'Z').range('0', '9').char('.').char('#').end() regex_equality('/[a-zA-Z0-9\\.\\#]/', expr) regex_compilation('[a-zA-Z0-9\\.\\#]', expr) def test_any_of_range_fusion_with_other_choices(): - expr = ( - RegexBuilder() - .any_of() - .range('a', 'z') - .range('A', 'Z') - .range('0', '9') - .char('.') - .char('#') - .string('hello') - .end() - ) + expr = RegexBuilder().any_of().range('a', 'z').range('A', 'Z').range('0', '9').char('.').char('#').string('hello').end() regex_equality('/(?:hello|[a-zA-Z0-9\\.\\#])/', expr) regex_compilation('(?:hello|[a-zA-Z0-9\\.\\#])', expr) @@ -236,14 +176,7 @@ def test_named_capture(): def test_bad_name_error(): try: - ( - RegexBuilder() - .named_capture('hello world') - .string('hello ') - .word() - .char('!') - .end() - ) + (RegexBuilder().named_capture('hello world').string('hello ').word().char('!').end()) except Exception as e: assert isinstance(e, Exception) @@ -268,15 +201,7 @@ def test_same_name_error(): def test_named_back_reference(): - expr = ( - RegexBuilder() - .named_capture('this_is_the_name') - .string('hello ') - .word() - .char('!') - .end() - .named_back_reference('this_is_the_name') - ) + expr = RegexBuilder().named_capture('this_is_the_name').string('hello ').word().char('!').end().named_back_reference('this_is_the_name') regex_equality('/(?Phello\\ \\w!)\\k/', expr) # Python does not support named back references, so we raise an error try: @@ -293,15 +218,7 @@ def test_named_back_reference_no_cg_exists(): def test_back_reference(): - expr = ( - RegexBuilder() - .capture() - .string('hello ') - .word() - .char('!') - .end() - .back_reference(1) - ) + expr = RegexBuilder().capture().string('hello ').word().char('!').end().back_reference(1) regex_equality('/(hello\\ \\w!)\\1/', expr) regex_compilation('(hello\\ \\w!)\\1', expr) @@ -314,14 +231,7 @@ def test_back_reference_no_cg_exists(): def test_group(): - expr = ( - RegexBuilder() - .group() - .string('hello ') - .word() - .char('!') - .end() - ) + expr = RegexBuilder().group().string('hello ').word().char('!').end() regex_equality('/(?:hello\\ \\w!)/', expr) regex_compilation('(?:hello\\ \\w!)', expr) @@ -332,50 +242,27 @@ def test_error_when_called_with_no_stack(): except Exception as e: assert isinstance(e, Exception) + def test_assert_ahead(): - expr = ( - RegexBuilder() - .assert_ahead() - .range('a', 'f') - .end() - .range('a', 'z') - ) + expr = RegexBuilder().assert_ahead().range('a', 'f').end().range('a', 'z') regex_equality('/(?=[a-f])[a-z]/', expr) regex_compilation('(?=[a-f])[a-z]', expr) def test_assert_behind(): - expr = ( - RegexBuilder() - .assert_behind() - .string('hello ') - .end() - .range('a', 'z') - ) + expr = RegexBuilder().assert_behind().string('hello ').end().range('a', 'z') regex_equality('/(?<=hello\\ )[a-z]/', expr) regex_compilation('(?<=hello\\ )[a-z]', expr) def test_assert_not_ahead(): - expr = ( - RegexBuilder() - .assert_not_ahead() - .range('a', 'f') - .end() - .range('0', '9') - ) + expr = RegexBuilder().assert_not_ahead().range('a', 'f').end().range('0', '9') regex_equality('/(?![a-f])[0-9]/', expr) regex_compilation('(?![a-f])[0-9]', expr) def test_assert_not_behind(): - expr = ( - RegexBuilder() - .assert_not_behind() - .string('hello ') - .end() - .range('a', 'z') - ) + expr = RegexBuilder().assert_not_behind().string('hello ').end().range('a', 'z') regex_equality('/(?.{2})\\k[0-9]/', expr) try: expr.to_regex() @@ -621,67 +484,35 @@ def test_no_namespacing(): def test_namespacing(): - expr = ( - RegexBuilder() - .at_least(3).digit() - .subexpression(nc_se, {'namespace': 'yolo'}) - .range('0', '9') - ) + expr = RegexBuilder().at_least(3).digit().subexpression(nc_se, {'namespace': 'yolo'}).range('0', '9') regex_equality('/\\d{3,}(?P.{2})\\k[0-9]/', expr) try: expr.to_regex() except Exception as e: assert isinstance(e, Exception) + def test_group_name_collision_error(): try: - ( - RegexBuilder() - .namedCapture('module') - .at_least(3).digit() - .end() - .subexpression(nc_se) - .range('0', '9') - ) + (RegexBuilder().namedCapture('module').at_least(3).digit().end().subexpression(nc_se).range('0', '9')) except Exception as e: assert isinstance(e, Exception) def test_group_name_collision_error_after_namespacing(): try: - ( - RegexBuilder() - .namedCapture('module') - .at_least(3).digit() - .end() - .subexpression(nc_se, {'namespace': 'yolo'}) - .range('0', '9') - ) + (RegexBuilder().namedCapture('module').at_least(3).digit().end().subexpression(nc_se, {'namespace': 'yolo'}).range('0', '9')) except Exception as e: assert isinstance(e, Exception) + def test_indexed_back_referencing(): - expr = ( - RegexBuilder() - .capture() - .at_least(3).digit() - .end() - .subexpression(indexed_back_reference_se) - .back_reference(1) - .range('0', '9') - ) + expr = RegexBuilder().capture().at_least(3).digit().end().subexpression(indexed_back_reference_se).back_reference(1).range('0', '9') regex_equality('/(\\d{3,})(.{2})\\2\\1[0-9]/', expr) regex_compilation('(\\d{3,})(.{2})\\2\\1[0-9]', expr) + def test_deeply_nested_se(): - expr = ( - RegexBuilder() - .capture() - .at_least(3).digit() - .end() - .subexpression(first_layer_se) - .back_reference(1) - .range('0', '9') - ) + expr = RegexBuilder().capture().at_least(3).digit().end().subexpression(first_layer_se).back_reference(1).range('0', '9') regex_equality('/(\\d{3,})outer\\ begin(?P(?:.{2})?)outer\\ end\\1[0-9]/', expr) regex_compilation('(\\d{3,})outer\\ begin(?P(?:.{2})?)outer\\ end\\1[0-9]', expr) -- cgit v1.2.3