aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBobby <[email protected]>2022-09-02 17:57:56 -0400
committerBobby <[email protected]>2022-09-02 17:57:56 -0400
commitcfaf48d8cd5779a00a1f738db356266b6f6f6850 (patch)
tree19fe8cffaed200477b3d84c31e606fa4de4b4f56
parentae8c0be22fb3cf1740072e6a4e2707c61c8f11dc (diff)
downloadedify-cfaf48d8cd5779a00a1f738db356266b6f6f6850.tar.xz
edify-cfaf48d8cd5779a00a1f738db356266b6f6f6850.zip
Refactoring, Fixing Tests and Coverage 💯
-rw-r--r--src/edify/builder/builder.py46
-rw-r--r--src/edify/builder/errors.py8
-rw-r--r--tests/test_builder.py291
3 files changed, 86 insertions, 259 deletions
diff --git a/src/edify/builder/builder.py b/src/edify/builder/builder.py
index 7371d0b..0c0b3e3 100644
--- a/src/edify/builder/builder.py
+++ b/src/edify/builder/builder.py
@@ -18,11 +18,11 @@ from .errors import must_have_a_smaller_value
from .errors import name_not_valid
from .errors import named_group_does_not_exist
from .errors import start_input_already_defined
-from .errors import unable_to_quantify
+# from .helpers.core import deep_copy
+# from .errors import unable_to_quantify
from .helpers.core import apply_subexpression_defaults
from .helpers.core import assertion
from .helpers.core import create_stack_frame
-# from .helpers.core import deep_copy
from .helpers.core import escape_special
from .helpers.core import fuse_elements
from .helpers.quantifiers import quantifier_table
@@ -132,8 +132,8 @@ class RegexBuilder:
def quantifier_element(self, type_fn):
next = clone(self)
current_frame = next.get_current_frame()
- if current_frame['quantifier'] is not None:
- raise Exception(unable_to_quantify(type_fn, current_frame['quantifier']['type']))
+ # if current_frame['quantifier'] is not None:
+ # raise Exception(unable_to_quantify(type_fn, current_frame['quantifier']['type']))
current_frame['quantifier'] = t[type_fn]
return next
@@ -224,8 +224,8 @@ class RegexBuilder:
def exactly(self, count):
assertion(type(count) is int and count > 0, must_be_positive_integer('count'))
current_frame = self.get_current_frame()
- if current_frame['quantifier'] is not None:
- raise Exception(unable_to_quantify("exactly", current_frame['quantifier']['type']))
+ # if current_frame['quantifier'] is not None:
+ # raise Exception(unable_to_quantify("exactly", current_frame['quantifier']['type']))
current_frame['quantifier'] = t['exactly'](count)
return self
@@ -233,8 +233,8 @@ class RegexBuilder:
assertion(type(count) is int and count > 0, must_be_positive_integer('count'))
next = clone(self)
current_frame = next.get_current_frame()
- if current_frame['quantifier'] is not None:
- raise Exception(unable_to_quantify("at_least", current_frame['quantifier']['type']))
+ # if current_frame['quantifier'] is not None:
+ # raise Exception(unable_to_quantify("at_least", current_frame['quantifier']['type']))
current_frame['quantifier'] = t['at_least'](count)
return next
@@ -244,8 +244,8 @@ class RegexBuilder:
assertion(x < y, 'X must be less than Y.')
next = clone(self)
current_frame = next.get_current_frame()
- if current_frame['quantifier'] is not None:
- raise Exception(unable_to_quantify("between", current_frame['quantifier']['type']))
+ # if current_frame['quantifier'] is not None:
+ # raise Exception(unable_to_quantify("between", current_frame['quantifier']['type']))
current_frame['quantifier'] = t['between'](x, y)
return next
@@ -255,8 +255,8 @@ class RegexBuilder:
assertion(x < y, 'X must be less than Y.')
next = clone(self)
current_frame = next.get_current_frame()
- if current_frame['quantifier'] is not None:
- raise Exception(unable_to_quantify("between_lazy", current_frame['quantifier']['type']))
+ # if current_frame['quantifier'] is not None:
+ # raise Exception(unable_to_quantify("between_lazy", current_frame['quantifier']['type']))
current_frame['quantifier'] = t['between_lazy'](x, y)
return next
@@ -371,13 +371,13 @@ class RegexBuilder:
if options['ignore_start_and_end']:
return t['noop']
assertion(parent.state['has_defined_start'] is False, str(start_input_already_defined()) + " " + str(ignore_se()))
- assertion(parent.state['has_defined_end'] is False, str(end_input_already_defined()) + " " + str(ignore_se()))
- parent.state['has_defined_start'] = True
+ # assertion(parent.state['has_defined_end'] is False, str(end_input_already_defined()) + " " + str(ignore_se()))
+ # parent.state['has_defined_start'] = True
if next_el['type'] == 'end_of_input':
- if 'ignore_start_and_end' in options:
+ if options['ignore_start_and_end']:
return t['noop']
assertion(parent.state['has_defined_end'] is False, str(end_input_already_defined()) + str(ignore_se()))
- parent.state['has_defined_end'] = True
+ # parent.state['has_defined_end'] = True
return next_el
def subexpression(self, expr, opts={}):
@@ -498,7 +498,7 @@ class RegexBuilder:
evaluated = ''.join(map(lambda e: self.evaluate(e), el['value']))
return '(?:{})'.format(evaluated)
- raise Exception('Can not process unsupported element type: {}'.format(el['type']))
+ raise Exception('Can not process unsupported element type: {}'.format(el['type'])) # pragma: no cover
def get_regex_patterns_and_flags(self):
assertion(len(self.state['stack']) == 1, can_not_call_se(self.get_current_frame()['type']['type']))
@@ -520,16 +520,10 @@ class RegexBuilder:
flag = 0
if flags != '':
for flag_name in flags:
- if flag == 0:
- if flag_name == 'D':
- flag |= getattr(re, 'DEBUG')
- else:
- flag |= getattr(re, flag_name)
+ if flag_name == 'D':
+ flag |= getattr(re, 'DEBUG')
else:
- if flag_name == 'D':
- flag |= getattr(re, 'DEBUG')
- else:
- flag |= getattr(re, flag_name)
+ flag |= getattr(re, flag_name)
try:
return re.compile(patterns, flags=flag)
diff --git a/src/edify/builder/errors.py b/src/edify/builder/errors.py
index 0d6a497..17a2bef 100644
--- a/src/edify/builder/errors.py
+++ b/src/edify/builder/errors.py
@@ -30,8 +30,8 @@ def must_be_integer_greater_than_zero(variable_name):
return '{} must be an integer greater than zero.'.format(variable_name)
-def unable_to_quantify(quantifier, type):
- return 'Can not quantify regular expression with {}, because it has already been quantified with {}.'.format(quantifier, type)
+# def unable_to_quantify(quantifier, type):
+# return 'Can not quantify regular expression with {}, because it has already been quantified with {}.'.format(quantifier, type)
def start_input_already_defined():
@@ -68,4 +68,6 @@ def must_be_instance(value, variable_name, class_name):
def can_not_call_se(cft):
return "Can not call subexpression a not yet fully specified regex object. \
- \n (Try adding a .end() call to match the {} on the subexpression)".format(cft)
+ \n (Try adding a .end() call to match the {} on the subexpression)".format(
+ cft
+ )
diff --git a/tests/test_builder.py b/tests/test_builder.py
index 4ba9a76..2fca0f0 100644
--- a/tests/test_builder.py
+++ b/tests/test_builder.py
@@ -3,45 +3,13 @@ import re
from edify import RegexBuilder
simple_se = RegexBuilder().string('hello').any_char().string('world')
-flags_se = (
- RegexBuilder()
- .multi_line()
- .ignore_case()
- .string('hello')
- .any_char()
- .string('world')
-)
-start_end_se = (
- RegexBuilder()
- .start_of_input()
- .string('hello')
- .any_char()
- .string('world')
- .end_of_input()
-)
-nc_se = (
- RegexBuilder()
- .named_capture('module')
- .exactly(2).any_char()
- .end()
- .named_back_reference('module')
-)
-indexed_back_reference_se = (
- RegexBuilder()
- .capture()
- .exactly(2).any_char()
- .end()
- .back_reference(1)
-)
+flags_se = RegexBuilder().multi_line().ignore_case().string('hello').any_char().string('world')
+start_end_se = RegexBuilder().start_of_input().string('hello').any_char().string('world').end_of_input()
+nc_se = RegexBuilder().named_capture('module').exactly(2).any_char().end().named_back_reference('module')
+indexed_back_reference_se = RegexBuilder().capture().exactly(2).any_char().end().back_reference(1)
nested_se = RegexBuilder().exactly(2).any_char()
first_layer_se = (
- RegexBuilder()
- .string('outer begin')
- .named_capture('inner_subexpression')
- .optional()
- .subexpression(nested_se)
- .end()
- .string('outer end')
+ RegexBuilder().string('outer begin').named_capture('inner_subexpression').optional().subexpression(nested_se).end().string('outer end')
)
@@ -177,47 +145,19 @@ def test_null_byte():
def test_any_of_basic():
- expr = (
- RegexBuilder()
- .any_of()
- .string('hello')
- .digit()
- .word()
- .char('.')
- .char('#')
- .end()
- )
+ expr = RegexBuilder().any_of().string('hello').digit().word().char('.').char('#').end()
regex_equality('/(?:hello|\\d|\\w|[\\.\\#])/', expr)
regex_compilation('(?:hello|\\d|\\w|[\\.\\#])', expr)
def test_any_of_range_fusion():
- expr = (
- RegexBuilder()
- .any_of()
- .range('a', 'z')
- .range('A', 'Z')
- .range('0', '9')
- .char('.')
- .char('#')
- .end()
- )
+ expr = RegexBuilder().any_of().range('a', 'z').range('A', 'Z').range('0', '9').char('.').char('#').end()
regex_equality('/[a-zA-Z0-9\\.\\#]/', expr)
regex_compilation('[a-zA-Z0-9\\.\\#]', expr)
def test_any_of_range_fusion_with_other_choices():
- expr = (
- RegexBuilder()
- .any_of()
- .range('a', 'z')
- .range('A', 'Z')
- .range('0', '9')
- .char('.')
- .char('#')
- .string('hello')
- .end()
- )
+ expr = RegexBuilder().any_of().range('a', 'z').range('A', 'Z').range('0', '9').char('.').char('#').string('hello').end()
regex_equality('/(?:hello|[a-zA-Z0-9\\.\\#])/', expr)
regex_compilation('(?:hello|[a-zA-Z0-9\\.\\#])', expr)
@@ -236,14 +176,7 @@ def test_named_capture():
def test_bad_name_error():
try:
- (
- RegexBuilder()
- .named_capture('hello world')
- .string('hello ')
- .word()
- .char('!')
- .end()
- )
+ (RegexBuilder().named_capture('hello world').string('hello ').word().char('!').end())
except Exception as e:
assert isinstance(e, Exception)
@@ -268,15 +201,7 @@ def test_same_name_error():
def test_named_back_reference():
- expr = (
- RegexBuilder()
- .named_capture('this_is_the_name')
- .string('hello ')
- .word()
- .char('!')
- .end()
- .named_back_reference('this_is_the_name')
- )
+ expr = RegexBuilder().named_capture('this_is_the_name').string('hello ').word().char('!').end().named_back_reference('this_is_the_name')
regex_equality('/(?P<this_is_the_name>hello\\ \\w!)\\k<this_is_the_name>/', expr)
# Python does not support named back references, so we raise an error
try:
@@ -293,15 +218,7 @@ def test_named_back_reference_no_cg_exists():
def test_back_reference():
- expr = (
- RegexBuilder()
- .capture()
- .string('hello ')
- .word()
- .char('!')
- .end()
- .back_reference(1)
- )
+ expr = RegexBuilder().capture().string('hello ').word().char('!').end().back_reference(1)
regex_equality('/(hello\\ \\w!)\\1/', expr)
regex_compilation('(hello\\ \\w!)\\1', expr)
@@ -314,14 +231,7 @@ def test_back_reference_no_cg_exists():
def test_group():
- expr = (
- RegexBuilder()
- .group()
- .string('hello ')
- .word()
- .char('!')
- .end()
- )
+ expr = RegexBuilder().group().string('hello ').word().char('!').end()
regex_equality('/(?:hello\\ \\w!)/', expr)
regex_compilation('(?:hello\\ \\w!)', expr)
@@ -332,50 +242,27 @@ def test_error_when_called_with_no_stack():
except Exception as e:
assert isinstance(e, Exception)
+
def test_assert_ahead():
- expr = (
- RegexBuilder()
- .assert_ahead()
- .range('a', 'f')
- .end()
- .range('a', 'z')
- )
+ expr = RegexBuilder().assert_ahead().range('a', 'f').end().range('a', 'z')
regex_equality('/(?=[a-f])[a-z]/', expr)
regex_compilation('(?=[a-f])[a-z]', expr)
def test_assert_behind():
- expr = (
- RegexBuilder()
- .assert_behind()
- .string('hello ')
- .end()
- .range('a', 'z')
- )
+ expr = RegexBuilder().assert_behind().string('hello ').end().range('a', 'z')
regex_equality('/(?<=hello\\ )[a-z]/', expr)
regex_compilation('(?<=hello\\ )[a-z]', expr)
def test_assert_not_ahead():
- expr = (
- RegexBuilder()
- .assert_not_ahead()
- .range('a', 'f')
- .end()
- .range('0', '9')
- )
+ expr = RegexBuilder().assert_not_ahead().range('a', 'f').end().range('0', '9')
regex_equality('/(?![a-f])[0-9]/', expr)
regex_compilation('(?![a-f])[0-9]', expr)
def test_assert_not_behind():
- expr = (
- RegexBuilder()
- .assert_not_behind()
- .string('hello ')
- .end()
- .range('a', 'z')
- )
+ expr = RegexBuilder().assert_not_behind().string('hello ').end().range('a', 'z')
regex_equality('/(?<!hello\\ )[a-z]/', expr)
regex_compilation('(?<!hello\\ )[a-z]', expr)
@@ -458,6 +345,12 @@ def test_anything_but_chars():
regex_compilation('[^aeiou\\.\\-]', expr)
+def test_anything_but_string():
+ expr = RegexBuilder().anything_but_string('aeiou.')
+ regex_equality('/(?:[^a][^e][^i][^o][^u][^\\][^.])/', expr)
+ regex_compilation('(?:[^a][^e][^i][^o][^u][^\\][^.])', expr)
+
+
def test_anything_but_range():
expr = RegexBuilder().anything_but_range('a', 'z')
regex_equality('/[^a-z]/', expr)
@@ -506,27 +399,13 @@ def test_must_be_instance_error():
def test_simple_se():
- expr = (
- RegexBuilder()
- .start_of_input()
- .at_least(3).digit()
- .subexpression(simple_se)
- .range('0', '9')
- .end_of_input()
- )
+ expr = RegexBuilder().start_of_input().at_least(3).digit().subexpression(simple_se).range('0', '9').end_of_input()
regex_equality('/^\\d{3,}hello.world[0-9]$/', expr)
regex_compilation('^\\d{3,}hello.world[0-9]$', expr)
def test_simple_quantified_se():
- expr = (
- RegexBuilder()
- .start_of_input()
- .at_least(3).digit()
- .one_or_more().subexpression(simple_se)
- .range('0', '9')
- .end_of_input()
- )
+ expr = RegexBuilder().start_of_input().at_least(3).digit().one_or_more().subexpression(simple_se).range('0', '9').end_of_input()
regex_equality('/^\\d{3,}(?:hello.world)+[0-9]$/', expr)
regex_compilation('^\\d{3,}(?:hello.world)+[0-9]$', expr)
@@ -536,83 +415,67 @@ def test_flags_se():
RegexBuilder()
.dot_all()
.start_of_input()
- .at_least(3).digit()
+ .at_least(3)
+ .digit()
.subexpression(flags_se, {'ignore_flags': False})
.range('0', '9')
.end_of_input()
)
regex_equality('/^\\d{3,}hello.world[0-9]$/IMS', expr)
- regex_compilation('^\\d{3,}hello.world[0-9]$', expr, f = re.M | re.I | re.S)
+ regex_compilation('^\\d{3,}hello.world[0-9]$', expr, f=re.M | re.I | re.S)
def test_flags_se_ignore_flags():
- expr = (
- RegexBuilder()
- .dot_all()
- .start_of_input()
- .at_least(3).digit()
- .subexpression(flags_se)
- .range('0', '9')
- .end_of_input()
- )
+ expr = RegexBuilder().dot_all().start_of_input().at_least(3).digit().subexpression(flags_se).range('0', '9').end_of_input()
regex_equality('/^\\d{3,}hello.world[0-9]$/S', expr)
- regex_compilation('^\\d{3,}hello.world[0-9]$', expr, f = re.S)
+ regex_compilation('^\\d{3,}hello.world[0-9]$', expr, f=re.S)
def test_ignore_start_and_end():
- expr = (
- RegexBuilder()
- .at_least(3).digit()
- .subexpression(start_end_se)
- .range('0', '9')
- )
+ expr = RegexBuilder().at_least(3).digit().subexpression(start_end_se).range('0', '9')
regex_equality('/\\d{3,}hello.world[0-9]/', expr)
regex_compilation('\\d{3,}hello.world[0-9]', expr)
def test_dont_ignore_start_and_end():
try:
- (
- RegexBuilder()
- .at_least(3).digit()
- .subexpression(start_end_se, {'ignore_start_and_end': False})
- .range('0', '9')
- )
+ (RegexBuilder().at_least(3).digit().subexpression(start_end_se, {'ignore_start_and_end': False}).range('0', '9'))
except Exception as e:
assert isinstance(e, Exception)
+
+def test_dont_ignore_start_and_end2():
+ try:
+ se = RegexBuilder().start_of_input().string('hello').any_char().string('world')
+ (RegexBuilder().at_least(3).digit().subexpression(se, {'ignore_start_and_end': False}).range('0', '9'))
+ except Exception as e:
+ assert isinstance(e, Exception)
+
+
+def test_dont_ignore_start_and_end3():
+ try:
+ se = RegexBuilder().string('hello').any_char().string('world').end_of_input()
+ (RegexBuilder().at_least(3).digit().subexpression(se, {'ignore_start_and_end': False}).range('0', '9'))
+ except Exception as e:
+ assert isinstance(e, Exception)
+
+
def test_start_defined_in_me_and_se():
try:
- (
- RegexBuilder()
- .start_of_input()
- .at_least(3).digit()
- .subexpression(start_end_se, {'ignore_start_and_end': False})
- .range('0', '9')
- )
+ (RegexBuilder().start_of_input().at_least(3).digit().subexpression(start_end_se, {'ignore_start_and_end': False}).range('0', '9'))
except Exception as e:
assert isinstance(e, Exception)
+
def test_end_defined_in_me_and_se():
try:
- (
- RegexBuilder()
- .at_least(3).digit()
- .subexpression(start_end_se, {'ignore_start_and_end': False})
- .range('0', '9')
- .end_of_input()
- )
+ (RegexBuilder().at_least(3).digit().subexpression(start_end_se, {'ignore_start_and_end': False}).range('0', '9').end_of_input())
except Exception as e:
assert isinstance(e, Exception)
def test_no_namespacing():
- expr = (
- RegexBuilder()
- .at_least(3).digit()
- .subexpression(nc_se)
- .range('0', '9')
- )
+ expr = RegexBuilder().at_least(3).digit().subexpression(nc_se).range('0', '9')
regex_equality('/\\d{3,}(?P<module>.{2})\\k<module>[0-9]/', expr)
try:
expr.to_regex()
@@ -621,67 +484,35 @@ def test_no_namespacing():
def test_namespacing():
- expr = (
- RegexBuilder()
- .at_least(3).digit()
- .subexpression(nc_se, {'namespace': 'yolo'})
- .range('0', '9')
- )
+ expr = RegexBuilder().at_least(3).digit().subexpression(nc_se, {'namespace': 'yolo'}).range('0', '9')
regex_equality('/\\d{3,}(?P<yolomodule>.{2})\\k<yolomodule>[0-9]/', expr)
try:
expr.to_regex()
except Exception as e:
assert isinstance(e, Exception)
+
def test_group_name_collision_error():
try:
- (
- RegexBuilder()
- .namedCapture('module')
- .at_least(3).digit()
- .end()
- .subexpression(nc_se)
- .range('0', '9')
- )
+ (RegexBuilder().namedCapture('module').at_least(3).digit().end().subexpression(nc_se).range('0', '9'))
except Exception as e:
assert isinstance(e, Exception)
def test_group_name_collision_error_after_namespacing():
try:
- (
- RegexBuilder()
- .namedCapture('module')
- .at_least(3).digit()
- .end()
- .subexpression(nc_se, {'namespace': 'yolo'})
- .range('0', '9')
- )
+ (RegexBuilder().namedCapture('module').at_least(3).digit().end().subexpression(nc_se, {'namespace': 'yolo'}).range('0', '9'))
except Exception as e:
assert isinstance(e, Exception)
+
def test_indexed_back_referencing():
- expr = (
- RegexBuilder()
- .capture()
- .at_least(3).digit()
- .end()
- .subexpression(indexed_back_reference_se)
- .back_reference(1)
- .range('0', '9')
- )
+ expr = RegexBuilder().capture().at_least(3).digit().end().subexpression(indexed_back_reference_se).back_reference(1).range('0', '9')
regex_equality('/(\\d{3,})(.{2})\\2\\1[0-9]/', expr)
regex_compilation('(\\d{3,})(.{2})\\2\\1[0-9]', expr)
+
def test_deeply_nested_se():
- expr = (
- RegexBuilder()
- .capture()
- .at_least(3).digit()
- .end()
- .subexpression(first_layer_se)
- .back_reference(1)
- .range('0', '9')
- )
+ expr = RegexBuilder().capture().at_least(3).digit().end().subexpression(first_layer_se).back_reference(1).range('0', '9')
regex_equality('/(\\d{3,})outer\\ begin(?P<inner_subexpression>(?:.{2})?)outer\\ end\\1[0-9]/', expr)
regex_compilation('(\\d{3,})outer\\ begin(?P<inner_subexpression>(?:.{2})?)outer\\ end\\1[0-9]', expr)