aboutsummaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
Diffstat (limited to 'tests')
-rw-r--r--tests/test_builder.py291
1 files changed, 61 insertions, 230 deletions
diff --git a/tests/test_builder.py b/tests/test_builder.py
index 4ba9a76..2fca0f0 100644
--- a/tests/test_builder.py
+++ b/tests/test_builder.py
@@ -3,45 +3,13 @@ import re
from edify import RegexBuilder
simple_se = RegexBuilder().string('hello').any_char().string('world')
-flags_se = (
- RegexBuilder()
- .multi_line()
- .ignore_case()
- .string('hello')
- .any_char()
- .string('world')
-)
-start_end_se = (
- RegexBuilder()
- .start_of_input()
- .string('hello')
- .any_char()
- .string('world')
- .end_of_input()
-)
-nc_se = (
- RegexBuilder()
- .named_capture('module')
- .exactly(2).any_char()
- .end()
- .named_back_reference('module')
-)
-indexed_back_reference_se = (
- RegexBuilder()
- .capture()
- .exactly(2).any_char()
- .end()
- .back_reference(1)
-)
+flags_se = RegexBuilder().multi_line().ignore_case().string('hello').any_char().string('world')
+start_end_se = RegexBuilder().start_of_input().string('hello').any_char().string('world').end_of_input()
+nc_se = RegexBuilder().named_capture('module').exactly(2).any_char().end().named_back_reference('module')
+indexed_back_reference_se = RegexBuilder().capture().exactly(2).any_char().end().back_reference(1)
nested_se = RegexBuilder().exactly(2).any_char()
first_layer_se = (
- RegexBuilder()
- .string('outer begin')
- .named_capture('inner_subexpression')
- .optional()
- .subexpression(nested_se)
- .end()
- .string('outer end')
+ RegexBuilder().string('outer begin').named_capture('inner_subexpression').optional().subexpression(nested_se).end().string('outer end')
)
@@ -177,47 +145,19 @@ def test_null_byte():
def test_any_of_basic():
- expr = (
- RegexBuilder()
- .any_of()
- .string('hello')
- .digit()
- .word()
- .char('.')
- .char('#')
- .end()
- )
+ expr = RegexBuilder().any_of().string('hello').digit().word().char('.').char('#').end()
regex_equality('/(?:hello|\\d|\\w|[\\.\\#])/', expr)
regex_compilation('(?:hello|\\d|\\w|[\\.\\#])', expr)
def test_any_of_range_fusion():
- expr = (
- RegexBuilder()
- .any_of()
- .range('a', 'z')
- .range('A', 'Z')
- .range('0', '9')
- .char('.')
- .char('#')
- .end()
- )
+ expr = RegexBuilder().any_of().range('a', 'z').range('A', 'Z').range('0', '9').char('.').char('#').end()
regex_equality('/[a-zA-Z0-9\\.\\#]/', expr)
regex_compilation('[a-zA-Z0-9\\.\\#]', expr)
def test_any_of_range_fusion_with_other_choices():
- expr = (
- RegexBuilder()
- .any_of()
- .range('a', 'z')
- .range('A', 'Z')
- .range('0', '9')
- .char('.')
- .char('#')
- .string('hello')
- .end()
- )
+ expr = RegexBuilder().any_of().range('a', 'z').range('A', 'Z').range('0', '9').char('.').char('#').string('hello').end()
regex_equality('/(?:hello|[a-zA-Z0-9\\.\\#])/', expr)
regex_compilation('(?:hello|[a-zA-Z0-9\\.\\#])', expr)
@@ -236,14 +176,7 @@ def test_named_capture():
def test_bad_name_error():
try:
- (
- RegexBuilder()
- .named_capture('hello world')
- .string('hello ')
- .word()
- .char('!')
- .end()
- )
+ (RegexBuilder().named_capture('hello world').string('hello ').word().char('!').end())
except Exception as e:
assert isinstance(e, Exception)
@@ -268,15 +201,7 @@ def test_same_name_error():
def test_named_back_reference():
- expr = (
- RegexBuilder()
- .named_capture('this_is_the_name')
- .string('hello ')
- .word()
- .char('!')
- .end()
- .named_back_reference('this_is_the_name')
- )
+ expr = RegexBuilder().named_capture('this_is_the_name').string('hello ').word().char('!').end().named_back_reference('this_is_the_name')
regex_equality('/(?P<this_is_the_name>hello\\ \\w!)\\k<this_is_the_name>/', expr)
# Python does not support named back references, so we raise an error
try:
@@ -293,15 +218,7 @@ def test_named_back_reference_no_cg_exists():
def test_back_reference():
- expr = (
- RegexBuilder()
- .capture()
- .string('hello ')
- .word()
- .char('!')
- .end()
- .back_reference(1)
- )
+ expr = RegexBuilder().capture().string('hello ').word().char('!').end().back_reference(1)
regex_equality('/(hello\\ \\w!)\\1/', expr)
regex_compilation('(hello\\ \\w!)\\1', expr)
@@ -314,14 +231,7 @@ def test_back_reference_no_cg_exists():
def test_group():
- expr = (
- RegexBuilder()
- .group()
- .string('hello ')
- .word()
- .char('!')
- .end()
- )
+ expr = RegexBuilder().group().string('hello ').word().char('!').end()
regex_equality('/(?:hello\\ \\w!)/', expr)
regex_compilation('(?:hello\\ \\w!)', expr)
@@ -332,50 +242,27 @@ def test_error_when_called_with_no_stack():
except Exception as e:
assert isinstance(e, Exception)
+
def test_assert_ahead():
- expr = (
- RegexBuilder()
- .assert_ahead()
- .range('a', 'f')
- .end()
- .range('a', 'z')
- )
+ expr = RegexBuilder().assert_ahead().range('a', 'f').end().range('a', 'z')
regex_equality('/(?=[a-f])[a-z]/', expr)
regex_compilation('(?=[a-f])[a-z]', expr)
def test_assert_behind():
- expr = (
- RegexBuilder()
- .assert_behind()
- .string('hello ')
- .end()
- .range('a', 'z')
- )
+ expr = RegexBuilder().assert_behind().string('hello ').end().range('a', 'z')
regex_equality('/(?<=hello\\ )[a-z]/', expr)
regex_compilation('(?<=hello\\ )[a-z]', expr)
def test_assert_not_ahead():
- expr = (
- RegexBuilder()
- .assert_not_ahead()
- .range('a', 'f')
- .end()
- .range('0', '9')
- )
+ expr = RegexBuilder().assert_not_ahead().range('a', 'f').end().range('0', '9')
regex_equality('/(?![a-f])[0-9]/', expr)
regex_compilation('(?![a-f])[0-9]', expr)
def test_assert_not_behind():
- expr = (
- RegexBuilder()
- .assert_not_behind()
- .string('hello ')
- .end()
- .range('a', 'z')
- )
+ expr = RegexBuilder().assert_not_behind().string('hello ').end().range('a', 'z')
regex_equality('/(?<!hello\\ )[a-z]/', expr)
regex_compilation('(?<!hello\\ )[a-z]', expr)
@@ -458,6 +345,12 @@ def test_anything_but_chars():
regex_compilation('[^aeiou\\.\\-]', expr)
+def test_anything_but_string():
+ expr = RegexBuilder().anything_but_string('aeiou.')
+ regex_equality('/(?:[^a][^e][^i][^o][^u][^\\][^.])/', expr)
+ regex_compilation('(?:[^a][^e][^i][^o][^u][^\\][^.])', expr)
+
+
def test_anything_but_range():
expr = RegexBuilder().anything_but_range('a', 'z')
regex_equality('/[^a-z]/', expr)
@@ -506,27 +399,13 @@ def test_must_be_instance_error():
def test_simple_se():
- expr = (
- RegexBuilder()
- .start_of_input()
- .at_least(3).digit()
- .subexpression(simple_se)
- .range('0', '9')
- .end_of_input()
- )
+ expr = RegexBuilder().start_of_input().at_least(3).digit().subexpression(simple_se).range('0', '9').end_of_input()
regex_equality('/^\\d{3,}hello.world[0-9]$/', expr)
regex_compilation('^\\d{3,}hello.world[0-9]$', expr)
def test_simple_quantified_se():
- expr = (
- RegexBuilder()
- .start_of_input()
- .at_least(3).digit()
- .one_or_more().subexpression(simple_se)
- .range('0', '9')
- .end_of_input()
- )
+ expr = RegexBuilder().start_of_input().at_least(3).digit().one_or_more().subexpression(simple_se).range('0', '9').end_of_input()
regex_equality('/^\\d{3,}(?:hello.world)+[0-9]$/', expr)
regex_compilation('^\\d{3,}(?:hello.world)+[0-9]$', expr)
@@ -536,83 +415,67 @@ def test_flags_se():
RegexBuilder()
.dot_all()
.start_of_input()
- .at_least(3).digit()
+ .at_least(3)
+ .digit()
.subexpression(flags_se, {'ignore_flags': False})
.range('0', '9')
.end_of_input()
)
regex_equality('/^\\d{3,}hello.world[0-9]$/IMS', expr)
- regex_compilation('^\\d{3,}hello.world[0-9]$', expr, f = re.M | re.I | re.S)
+ regex_compilation('^\\d{3,}hello.world[0-9]$', expr, f=re.M | re.I | re.S)
def test_flags_se_ignore_flags():
- expr = (
- RegexBuilder()
- .dot_all()
- .start_of_input()
- .at_least(3).digit()
- .subexpression(flags_se)
- .range('0', '9')
- .end_of_input()
- )
+ expr = RegexBuilder().dot_all().start_of_input().at_least(3).digit().subexpression(flags_se).range('0', '9').end_of_input()
regex_equality('/^\\d{3,}hello.world[0-9]$/S', expr)
- regex_compilation('^\\d{3,}hello.world[0-9]$', expr, f = re.S)
+ regex_compilation('^\\d{3,}hello.world[0-9]$', expr, f=re.S)
def test_ignore_start_and_end():
- expr = (
- RegexBuilder()
- .at_least(3).digit()
- .subexpression(start_end_se)
- .range('0', '9')
- )
+ expr = RegexBuilder().at_least(3).digit().subexpression(start_end_se).range('0', '9')
regex_equality('/\\d{3,}hello.world[0-9]/', expr)
regex_compilation('\\d{3,}hello.world[0-9]', expr)
def test_dont_ignore_start_and_end():
try:
- (
- RegexBuilder()
- .at_least(3).digit()
- .subexpression(start_end_se, {'ignore_start_and_end': False})
- .range('0', '9')
- )
+ (RegexBuilder().at_least(3).digit().subexpression(start_end_se, {'ignore_start_and_end': False}).range('0', '9'))
except Exception as e:
assert isinstance(e, Exception)
+
+def test_dont_ignore_start_and_end2():
+ try:
+ se = RegexBuilder().start_of_input().string('hello').any_char().string('world')
+ (RegexBuilder().at_least(3).digit().subexpression(se, {'ignore_start_and_end': False}).range('0', '9'))
+ except Exception as e:
+ assert isinstance(e, Exception)
+
+
+def test_dont_ignore_start_and_end3():
+ try:
+ se = RegexBuilder().string('hello').any_char().string('world').end_of_input()
+ (RegexBuilder().at_least(3).digit().subexpression(se, {'ignore_start_and_end': False}).range('0', '9'))
+ except Exception as e:
+ assert isinstance(e, Exception)
+
+
def test_start_defined_in_me_and_se():
try:
- (
- RegexBuilder()
- .start_of_input()
- .at_least(3).digit()
- .subexpression(start_end_se, {'ignore_start_and_end': False})
- .range('0', '9')
- )
+ (RegexBuilder().start_of_input().at_least(3).digit().subexpression(start_end_se, {'ignore_start_and_end': False}).range('0', '9'))
except Exception as e:
assert isinstance(e, Exception)
+
def test_end_defined_in_me_and_se():
try:
- (
- RegexBuilder()
- .at_least(3).digit()
- .subexpression(start_end_se, {'ignore_start_and_end': False})
- .range('0', '9')
- .end_of_input()
- )
+ (RegexBuilder().at_least(3).digit().subexpression(start_end_se, {'ignore_start_and_end': False}).range('0', '9').end_of_input())
except Exception as e:
assert isinstance(e, Exception)
def test_no_namespacing():
- expr = (
- RegexBuilder()
- .at_least(3).digit()
- .subexpression(nc_se)
- .range('0', '9')
- )
+ expr = RegexBuilder().at_least(3).digit().subexpression(nc_se).range('0', '9')
regex_equality('/\\d{3,}(?P<module>.{2})\\k<module>[0-9]/', expr)
try:
expr.to_regex()
@@ -621,67 +484,35 @@ def test_no_namespacing():
def test_namespacing():
- expr = (
- RegexBuilder()
- .at_least(3).digit()
- .subexpression(nc_se, {'namespace': 'yolo'})
- .range('0', '9')
- )
+ expr = RegexBuilder().at_least(3).digit().subexpression(nc_se, {'namespace': 'yolo'}).range('0', '9')
regex_equality('/\\d{3,}(?P<yolomodule>.{2})\\k<yolomodule>[0-9]/', expr)
try:
expr.to_regex()
except Exception as e:
assert isinstance(e, Exception)
+
def test_group_name_collision_error():
try:
- (
- RegexBuilder()
- .namedCapture('module')
- .at_least(3).digit()
- .end()
- .subexpression(nc_se)
- .range('0', '9')
- )
+ (RegexBuilder().namedCapture('module').at_least(3).digit().end().subexpression(nc_se).range('0', '9'))
except Exception as e:
assert isinstance(e, Exception)
def test_group_name_collision_error_after_namespacing():
try:
- (
- RegexBuilder()
- .namedCapture('module')
- .at_least(3).digit()
- .end()
- .subexpression(nc_se, {'namespace': 'yolo'})
- .range('0', '9')
- )
+ (RegexBuilder().namedCapture('module').at_least(3).digit().end().subexpression(nc_se, {'namespace': 'yolo'}).range('0', '9'))
except Exception as e:
assert isinstance(e, Exception)
+
def test_indexed_back_referencing():
- expr = (
- RegexBuilder()
- .capture()
- .at_least(3).digit()
- .end()
- .subexpression(indexed_back_reference_se)
- .back_reference(1)
- .range('0', '9')
- )
+ expr = RegexBuilder().capture().at_least(3).digit().end().subexpression(indexed_back_reference_se).back_reference(1).range('0', '9')
regex_equality('/(\\d{3,})(.{2})\\2\\1[0-9]/', expr)
regex_compilation('(\\d{3,})(.{2})\\2\\1[0-9]', expr)
+
def test_deeply_nested_se():
- expr = (
- RegexBuilder()
- .capture()
- .at_least(3).digit()
- .end()
- .subexpression(first_layer_se)
- .back_reference(1)
- .range('0', '9')
- )
+ expr = RegexBuilder().capture().at_least(3).digit().end().subexpression(first_layer_se).back_reference(1).range('0', '9')
regex_equality('/(\\d{3,})outer\\ begin(?P<inner_subexpression>(?:.{2})?)outer\\ end\\1[0-9]/', expr)
regex_compilation('(\\d{3,})outer\\ begin(?P<inner_subexpression>(?:.{2})?)outer\\ end\\1[0-9]', expr)