aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorBobby <[email protected]>2022-09-01 00:33:14 -0400
committerBobby <[email protected]>2022-09-01 00:33:14 -0400
commit9c6c9c4570a0c583d93e197d3414c35315afa5fb (patch)
tree5aeebbd3a5b843f8e3d34a5e44d1e01901b5d920 /src
parent45b7045187a955c1957fab08c11173003f89a141 (diff)
downloadedify-9c6c9c4570a0c583d93e197d3414c35315afa5fb.tar.xz
edify-9c6c9c4570a0c583d93e197d3414c35315afa5fb.zip
Rewriting builder based on Super Expressive
Diffstat (limited to 'src')
-rw-r--r--src/edify/builder/builder.py298
-rw-r--r--src/edify/builder/errors.py38
-rw-r--r--src/edify/builder/helpers/core.py33
-rw-r--r--src/edify/builder/helpers/quantifiers.py11
-rw-r--r--src/edify/builder/helpers/regex_vars.py1
-rw-r--r--src/edify/builder/helpers/t.py48
6 files changed, 420 insertions, 9 deletions
diff --git a/src/edify/builder/builder.py b/src/edify/builder/builder.py
index 5c10288..d22ff24 100644
--- a/src/edify/builder/builder.py
+++ b/src/edify/builder/builder.py
@@ -1,16 +1,296 @@
-from .abc import Builder
+import re
+from sys import _current_frames
+from .helpers.core import create_stack_frame, assertion, escape_special
+from .helpers.t import t
+from .helpers.regex_vars import named_group_regex
+from .errors import *
+
+class RegexBuilder:
+ """Regular Expression Builder Class.
+ """
+
+ state = {}
-class RegexBuilder(Builder):
def __init__(self):
- self.parts = []
+ self.state = {
+ 'has_defined_start': False,
+ 'has_defined_end': False,
+ 'flags': {
+ 'g': False,
+ 'y': False,
+ 'm': False,
+ 'i': False,
+ 'u': False,
+ 's': False,
+ },
+ 'stack': create_stack_frame(t['root']),
+ 'named_groups': [],
+ 'total_capture_groups': 0,
+ }
+
+
+ def allow_multiple_matches(self):
+ self.state['stack']['flags']['g'] = True
+
+
+ def sticky(self):
+ self.state['stack']['flags']['y'] = True
+
+
+ def line_by_line(self):
+ self.state['stack']['flags']['m'] = True
+
+
+ def case_insensitive(self):
+ self.state['stack']['flags']['i'] = True
+
+
+ def unicode(self):
+ self.state['stack']['flags']['u'] = True
+
+
+ def single_line(self):
+ self.state['stack']['flags']['s'] = True
- def build(self):
- return "".join(map(str, self.parts))
- def add(self, part):
- if not (issubclass(type(part), Builder) or isinstance(part, str)):
- raise ValueError(f"{part} is not a valid Part")
+ def get_current_frame(self):
+ return self.state['stack']
- self.parts.append(part)
+
+ def get_current_element_array(self):
+ return self.get_current_frame()['elements']
+
+
+ def match_element(self, type_fn):
+ current_element_array = self.get_current_element_array()
+ current_element_array.append(self.apply_quantifier(type_fn))
+ return self
+
+
+ def apply_quantifier(self, element):
+ current_frame = self.get_current_frame()
+ if current_frame['quantifier'] is not None:
+ wrapped = current_frame['quantifier']['value'](element)
+ current_frame['quantifier'] = None
+ return wrapped
+ return element
+
+
+ def frame_creating_element(self, type_fn):
+ new_frame = create_stack_frame(type_fn)
+ self.state['stack'] = new_frame
return self
+
+
+ def tracked_named_group(self, name):
+ assertion(type(name) is str, must_be_a_string("Name", type(name)))
+ assertion(len(name) > 0, must_be_one_character("Name"))
+ assertion(name not in self.state['named_groups'], cannot_create_duplicate_named_group(name))
+ assertion(re.compile(named_group_regex, re.I).match(name), name_not_valid(name))
+ self.state['named_groups'].append(name)
+
+
+ def capture(self):
+ new_frame = create_stack_frame(t['capture'])
+ self.state['stack'] = new_frame
+ self.state['total_capture_groups'] += 1
+ return self
+
+
+ def named_capture(self, name):
+ new_frame = create_stack_frame(t['named_capture'](name))
+ self.tracked_named_group(name)
+ self.state['stack'] = new_frame
+ self.state['total_capture_groups'] += 1
+ return self
+
+
+ def quantifier_element(self, type_fn):
+ current_frame = self.get_current_frame()
+ if current_frame['quantifier'] is not None:
+ raise Exception(unable_to_quantify(type_fn, current_frame['quantifier']['type']))
+ current_frame['quantifier'] = t[type_fn]
+ return self
+
+
+ def any_char(self):
+ return self.match_element(t['any_char'])
+
+
+ def whitespace_char(self):
+ return self.match_element(t['whitespace_char'])
+
+
+ def non_whitespace_char(self):
+ return self.match_element(t['non_whitespace_char'])
+
+
+ def digit(self):
+ return self.match_element(t['digit'])
+
+
+ def non_digit(self):
+ return self.match_element(t['non_digit'])
+
+
+ def word(self):
+ return self.match_element(t['word'])
+
+
+ def non_word(self):
+ return self.match_element(t['non_word'])
+
+
+ def word_boundary(self):
+ return self.match_element(t['word_boundary'])
+
+
+ def non_word_boundary(self):
+ return self.match_element(t['non_word_boundary'])
+
+
+ def new_line(self):
+ return self.match_element(t['new_line'])
+
+
+ def carriage_return(self):
+ return self.match_element(t['carriage_return'])
+
+
+ def tab(self):
+ return self.match_element(t['tab'])
+
+
+ def null_byte(self):
+ return self.match_element(t['null_byte'])
+
+
+ def named_back_reference(self, name):
+ assertion(name in self.state['named_groups'], named_group_does_not_exist(name))
+ return self.match_element(t['named_back_reference'](name))
+
+
+ def back_reference(self, index: int):
+ assertion(type(index) is int, 'Index must be an integer.')
+ assertion(
+ index > 0 and index <= self.state['total_capture_groups'],
+ invalid_total_capture_groups_index(index, self.state['total_capture_groups'])
+ )
+ return self.match_element(t['back_reference'](index))
+
+
+ def any_of(self):
+ return self.frame_creating_element(t['any_of'])
+
+
+ def group(self):
+ return self.frame_creating_element(t['group'])
+
+
+ def assert_ahead(self):
+ return self.frame_creating_element(t['assert_ahead'])
+
+
+ def assert_not_ahead(self):
+ return self.frame_creating_element(t['assert_not_ahead'])
+
+
+ def assert_behind(self):
+ return self.frame_creating_element(t['assert_behind'])
+
+
+ def assert_not_behind(self):
+ return self.frame_creating_element(t['assert_not_behind'])
+
+
+ def optional(self):
+ return self.quantifier_element('optional')
+
+
+ def zero_or_more(self):
+ return self.quantifier_element('zero_or_more')
+
+
+ def zero_or_more_lazy(self):
+ return self.quantifier_element('zero_or_more_lazy')
+
+
+ def one_or_more(self):
+ return self.quantifier_element('one_or_more')
+
+
+ def one_or_more_lazy(self):
+ return self.quantifier_element('one_or_more_lazy')
+
+
+ def exactly(self, count):
+ assertion(type(count) is int and count > 0, must_be_positive_integer('count'))
+ current_frame = self.get_current_frame()
+ if current_frame['quantifier'] is not None:
+ raise Exception(unable_to_quantify("exactly", current_frame['quantifier']['type']))
+ current_frame['quantifier'] = t['exactly'](count)
+ return self
+
+
+ def at_least(self, count):
+ assertion(type(count) is int and count > 0, must_be_positive_integer('count'))
+ current_frame = self.get_current_frame()
+ if current_frame['quantifier'] is not None:
+ raise Exception(unable_to_quantify("at_least", current_frame['quantifier']['type']))
+ current_frame['quantifier'] = t['at_least'](count)
+ return self
+
+
+ def between(self, x, y):
+ assertion(type(x) is int and x >= 0, must_be_integer_greater_than_zero('x'))
+ assertion(type(y) is int and y > 0, must_be_positive_integer('y'))
+ assertion(x < y, 'X must be less than Y.')
+ current_frame = self.get_current_frame()
+ if current_frame['quantifier'] is not None:
+ raise Exception(unable_to_quantify("between", current_frame['quantifier']['type']))
+ current_frame['quantifier'] = t['between'](x, y)
+ return self
+
+
+ def between_lazy(self, x, y):
+ assertion(type(x) is int and x >= 0, must_be_integer_greater_than_zero('x'))
+ assertion(type(y) is int and y > 0, must_be_positive_integer('y'))
+ assertion(x < y, 'X must be less than Y.')
+ current_frame = self.get_current_frame()
+ if current_frame['quantifier'] is not None:
+ raise Exception(unable_to_quantify("between_lazy", current_frame['quantifier']['type']))
+ current_frame['quantifier'] = t['between_lazy'](x, y)
+ return self
+
+ def start_of_input(self):
+ assertion(self.state['has_defined_start'] is False, start_input_already_defined())
+ assertion(self.state['has_defined_end'] is False, cannot_define_start_after_end())
+ self.state['has_defined_start'] = True
+ current_element_array = self.get_current_element_array()
+ current_element_array.append(t['start_of_input'])
+ return self
+
+
+ def end_of_input(self):
+ assertion(self.state['has_defined_end'] is False, end_input_already_defined())
+ self.state['has_defined_end'] = True
+ current_element_array = self.get_current_element_array()
+ current_element_array.append(t['end_of_input'])
+ return self
+
+
+ def any_of_chars(self, chars):
+ element_value = t['any_of_chars'](escape_special(chars))
+ current_frame = self.get_current_frame()
+ current_frame['elements'].append(self.apply_quantifier(element_value))
+ return self
+
+ def end(self):
+ assertion(len(self.state['stack']) > 1, can_not_end_while_building_root_exp())
+ old_frame = self.state['stack'].pop()
+ current_frame = self.get_current_frame()
+ current_frame['elements'].append(self.apply_quantifier(old_frame['type']['value'](old_frame['elements'])))
+ return self
+
+
diff --git a/src/edify/builder/errors.py b/src/edify/builder/errors.py
new file mode 100644
index 0000000..27258cd
--- /dev/null
+++ b/src/edify/builder/errors.py
@@ -0,0 +1,38 @@
+def must_be_a_string(value, variable_name):
+ return '{} must be a string. (got {})'.format(value, type(variable_name))
+
+def must_be_one_character(variable_name):
+ return '{} must be one character long.'.format(variable_name)
+
+def cannot_create_duplicate_named_group(name):
+ return 'Can not create duplicate named group "{}".'.format(name)
+
+def name_not_valid(name):
+ return 'Name {} is not valid. (only alphanumeric characters and underscores are allowed)'.format(name)
+
+def named_group_does_not_exist(name):
+ return 'Named group "{}" does not exist (create one with .named_capture()).'.format(name)
+
+def invalid_total_capture_groups_index(index, total_capture_groups):
+ return 'Invalid index #{}. There are only {} capture groups.'.format(index, total_capture_groups)
+
+def must_be_positive_integer(variable_name):
+ return '{} must be a positive integer.'.format(variable_name)
+
+def must_be_integer_greater_than_zero(variable_name):
+ return '{} must be an integer greater than zero.'.format(variable_name)
+
+def unable_to_quantify(quantifier, type):
+ return 'Can not quantify regular expression with {}, because it has already been quantified with {}.'.format(quantifier, type)
+
+def start_input_already_defined():
+ return 'Regex already has a start of input.'
+
+def cannot_define_start_after_end():
+ return 'Can not define a start of input after defining an end of input.'
+
+def end_input_already_defined():
+ return 'Regex already has an end of input.'
+
+def can_not_end_while_building_root_exp():
+ return 'Can not end while building the root expression.'
diff --git a/src/edify/builder/helpers/core.py b/src/edify/builder/helpers/core.py
new file mode 100644
index 0000000..fa775ff
--- /dev/null
+++ b/src/edify/builder/helpers/core.py
@@ -0,0 +1,33 @@
+import re
+def as_type(type, options={}):
+ def as_type_fn(value=None):
+ return {
+ 'type': type,
+ 'value': value,
+ 'options': options,
+ }
+ return as_type_fn
+
+
+def deferred_type(type, options = {}):
+ type_fn = as_type(type, options)
+ return type_fn(type_fn)
+
+
+def create_stack_frame(type):
+ return {
+ 'type': type,
+ 'quantifier': None,
+ 'elements': [],
+ }
+
+
+def assertion(condition, message):
+ if not condition:
+ raise Exception(message)
+
+# const escapeSpecial = s => specialChars.reduce((acc, char) => replaceAll(acc, char, `\\${char}`), s);
+def escape_special(s):
+ return re.escape(s)
+
+
diff --git a/src/edify/builder/helpers/quantifiers.py b/src/edify/builder/helpers/quantifiers.py
new file mode 100644
index 0000000..1810658
--- /dev/null
+++ b/src/edify/builder/helpers/quantifiers.py
@@ -0,0 +1,11 @@
+quantifier_table = {
+ 'one_or_more': '+',
+ 'one_or_more_lazy': '+?',
+ 'zero_or_more': '*',
+ 'zero_or_more_lazy': '*?',
+ 'optional': '?',
+ 'exactly': lambda times: '{{{}}}'.format(times),
+ 'at_least': lambda times: '{{{},}}'.format(times),
+ 'between': lambda times: '{{{},{}}}'.format(times[0], times[1]),
+ 'between_lazy': lambda times: '{{{},{}}}?'.format(times[0], times[1]),
+}
diff --git a/src/edify/builder/helpers/regex_vars.py b/src/edify/builder/helpers/regex_vars.py
new file mode 100644
index 0000000..4d7f17c
--- /dev/null
+++ b/src/edify/builder/helpers/regex_vars.py
@@ -0,0 +1 @@
+named_group_regex=r"^[a-z]+\w*$"
diff --git a/src/edify/builder/helpers/t.py b/src/edify/builder/helpers/t.py
new file mode 100644
index 0000000..8ac3d7d
--- /dev/null
+++ b/src/edify/builder/helpers/t.py
@@ -0,0 +1,48 @@
+from .core import as_type, deferred_type
+
+t = {
+ 'root': as_type('root')(),
+ 'noop': as_type('noop')(),
+ 'start_of_input': as_type('start_of_input')(),
+ 'end_of_input': as_type('end_of_input')(),
+ 'any_char': as_type('any_char')(),
+ 'whitespace_char': as_type('whitespace_char')(),
+ 'non_whitespace_char': as_type('non_whitespace_char')(),
+ 'digit': as_type('digit')(),
+ 'non_digit': as_type('non_digit')(),
+ 'word': as_type('word')(),
+ 'non_word': as_type('non_word')(),
+ 'word_boundary': as_type('word_boundary')(),
+ 'non_word_boundary': as_type('non_word_boundary')(),
+ 'new_line': as_type('new_line')(),
+ 'carriage_return': as_type('carriage_return')(),
+ 'tab': as_type('tab')(),
+ 'null_byte': as_type('null_byte')(),
+ 'any_of_chars': as_type('any_of_chars'),
+ 'anything_but_string': as_type('anything_but_string'),
+ 'anything_but_chars': as_type('anything_but_chars'),
+ 'anything_but_range': as_type('anything_but_range'),
+ 'char': as_type('char'),
+ 'range': as_type('range'),
+ 'string': as_type('string', {'quantifiers_require_group': True}),
+ 'named_back_reference': lambda name: deferred_type('named_back_reference', {'name': name}),
+ 'back_reference': lambda index: deferred_type('back_reference', {'index': index}),
+ 'capture': deferred_type('capture', {'contains_children': True}),
+ 'sub_expression': as_type('sub_expression', {'contains_children': True, 'quantifiers_require_group': True}),
+ 'named_capture': lambda name: deferred_type('named_capture', {'name': name, 'contains_children': True}),
+ 'group': deferred_type('group', {'contains_children': True}),
+ 'any_of': deferred_type('any_of', {'contains_children': True}),
+ 'assert_ahead': deferred_type('assert_ahead', {'contains_children': True}),
+ 'assert_not_ahead': deferred_type('assert_not_ahead', {'contains_children': True}),
+ 'assert_behind': deferred_type('assert_behind', {'contains_children': True}),
+ 'assert_not_behind': deferred_type('assert_not_behind', {'contains_children': True}),
+ 'exactly': lambda times: deferred_type('exactly', {'times': times, 'contains_child': True}),
+ 'at_least': lambda times: deferred_type('at_least', {'times': times, 'contains_child': True}),
+ 'between': lambda x, y: deferred_type('between', {'times': [x, y], 'contains_child': True}),
+ 'between_lazy': lambda x, y: deferred_type('between_lazy', {'times': [x, y], 'contains_child': True}),
+ 'zero_or_more': deferred_type('zero_or_more', {'contains_child': True}),
+ 'zero_or_more_lazy': deferred_type('zero_or_more_lazy', {'contains_child': True}),
+ 'one_or_more': deferred_type('one_or_more', {'contains_child': True}),
+ 'one_or_more_lazy': deferred_type('one_or_more_lazy', {'contains_child': True}),
+ 'optional': deferred_type('optional', {'contains_child': True}),
+}