aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBobby <[email protected]>2022-09-01 17:49:26 -0400
committerBobby <[email protected]>2022-09-01 17:49:26 -0400
commit9740e3756b7dc91808295341a02274350fb6c05d (patch)
tree5cde37c1f686993abd5ee31df853aa9d680ec023
parent0fa612e282a68b9d87d05b851d373852813ea7c5 (diff)
downloadedify-9740e3756b7dc91808295341a02274350fb6c05d.tar.xz
edify-9740e3756b7dc91808295341a02274350fb6c05d.zip
Creating a deep copy at a different pointer location for each RegexBuilder class subroutine call
-rw-r--r--src/edify/builder/builder.py207
-rw-r--r--src/edify/builder/errors.py21
-rw-r--r--src/edify/builder/helpers/core.py19
3 files changed, 206 insertions, 41 deletions
diff --git a/src/edify/builder/builder.py b/src/edify/builder/builder.py
index a38877a..6213bb6 100644
--- a/src/edify/builder/builder.py
+++ b/src/edify/builder/builder.py
@@ -1,20 +1,28 @@
+from copy import deepcopy as clone
import re
+from .errors import can_not_call_se
from .errors import can_not_end_while_building_root_exp
from .errors import cannot_create_duplicate_named_group
from .errors import cannot_define_start_after_end
from .errors import end_input_already_defined
+from .errors import ignore_se
from .errors import invalid_total_capture_groups_index
from .errors import must_be_a_string
+from .errors import must_be_instance
from .errors import must_be_integer_greater_than_zero
from .errors import must_be_one_character
from .errors import must_be_positive_integer
+from .errors import must_be_single_character
+from .errors import must_have_a_smaller_value
from .errors import name_not_valid
from .errors import named_group_does_not_exist
from .errors import start_input_already_defined
from .errors import unable_to_quantify
+from .helpers.core import apply_subexpression_defaults
from .helpers.core import assertion
from .helpers.core import create_stack_frame
+from .helpers.core import deep_copy
from .helpers.core import escape_special
from .helpers.regex_vars import named_group_regex
from .helpers.t import t
@@ -44,22 +52,34 @@ class RegexBuilder:
}
def allow_multiple_matches(self):
- self.state['stack']['flags']['g'] = True
+ next = clone(self)
+ next.state['stack']['flags']['g'] = True
+ return next
def sticky(self):
- self.state['stack']['flags']['y'] = True
+ next = clone(self)
+ next.state['stack']['flags']['y'] = True
+ return next
def line_by_line(self):
- self.state['stack']['flags']['m'] = True
+ next = clone(self)
+ next.state['stack']['flags']['m'] = True
+ return next
def case_insensitive(self):
- self.state['stack']['flags']['i'] = True
+ next = clone(self)
+ next.state['stack']['flags']['i'] = True
+ return next
def unicode(self):
- self.state['stack']['flags']['u'] = True
+ next = clone(self)
+ next.state['stack']['flags']['u'] = True
+ return next
def single_line(self):
- self.state['stack']['flags']['s'] = True
+ next = clone(self)
+ next.state['stack']['flags']['s'] = True
+ return next
def get_current_frame(self):
return self.state['stack']
@@ -68,9 +88,9 @@ class RegexBuilder:
return self.get_current_frame()['elements']
def match_element(self, type_fn):
- current_element_array = self.get_current_element_array()
- current_element_array.append(self.apply_quantifier(type_fn))
- return self
+ next = clone(self)
+ next.get_current_element_array().append(next.apply_quantifier(type_fn))
+ return next
def apply_quantifier(self, element):
current_frame = self.get_current_frame()
@@ -81,9 +101,10 @@ class RegexBuilder:
return element
def frame_creating_element(self, type_fn):
+ next = clone(self)
new_frame = create_stack_frame(type_fn)
- self.state['stack'] = new_frame
- return self
+ next.state['stack'].append(new_frame)
+ return next
def tracked_named_group(self, name):
assertion(type(name) is str, must_be_a_string("Name", type(name)))
@@ -93,24 +114,27 @@ class RegexBuilder:
self.state['named_groups'].append(name)
def capture(self):
+ next = clone(self)
new_frame = create_stack_frame(t['capture'])
- self.state['stack'] = new_frame
- self.state['total_capture_groups'] += 1
- return self
+ next.state['stack'].append(new_frame)
+ next.state['total_capture_groups'] += 1
+ return next
def named_capture(self, name):
+ next = clone(self)
new_frame = create_stack_frame(t['named_capture'](name))
- self.tracked_named_group(name)
- self.state['stack'] = new_frame
- self.state['total_capture_groups'] += 1
- return self
+ next.tracked_named_group(name)
+ next.state['stack'].append(new_frame)
+ next.state['total_capture_groups'] += 1
+ return next
def quantifier_element(self, type_fn):
- current_frame = self.get_current_frame()
+ next = clone(self)
+ current_frame = next.get_current_frame()
if current_frame['quantifier'] is not None:
raise Exception(unable_to_quantify(type_fn, current_frame['quantifier']['type']))
current_frame['quantifier'] = t[type_fn]
- return self
+ return next
def any_char(self):
return self.match_element(t['any_char'])
@@ -206,56 +230,157 @@ class RegexBuilder:
def at_least(self, count):
assertion(type(count) is int and count > 0, must_be_positive_integer('count'))
- current_frame = self.get_current_frame()
+ next = clone(self)
+ current_frame = next.get_current_frame()
if current_frame['quantifier'] is not None:
raise Exception(unable_to_quantify("at_least", current_frame['quantifier']['type']))
current_frame['quantifier'] = t['at_least'](count)
- return self
+ return next
def between(self, x, y):
assertion(type(x) is int and x >= 0, must_be_integer_greater_than_zero('x'))
assertion(type(y) is int and y > 0, must_be_positive_integer('y'))
assertion(x < y, 'X must be less than Y.')
- current_frame = self.get_current_frame()
+ next = clone(self)
+ current_frame = next.get_current_frame()
if current_frame['quantifier'] is not None:
raise Exception(unable_to_quantify("between", current_frame['quantifier']['type']))
current_frame['quantifier'] = t['between'](x, y)
- return self
+ return next
def between_lazy(self, x, y):
assertion(type(x) is int and x >= 0, must_be_integer_greater_than_zero('x'))
assertion(type(y) is int and y > 0, must_be_positive_integer('y'))
assertion(x < y, 'X must be less than Y.')
- current_frame = self.get_current_frame()
+ next = clone(self)
+ current_frame = next.get_current_frame()
if current_frame['quantifier'] is not None:
raise Exception(unable_to_quantify("between_lazy", current_frame['quantifier']['type']))
current_frame['quantifier'] = t['between_lazy'](x, y)
- return self
+ return next
def start_of_input(self):
assertion(self.state['has_defined_start'] is False, start_input_already_defined())
assertion(self.state['has_defined_end'] is False, cannot_define_start_after_end())
- self.state['has_defined_start'] = True
- current_element_array = self.get_current_element_array()
- current_element_array.append(t['start_of_input'])
- return self
+ next = clone(self)
+ next.state['has_defined_start'] = True
+ next.get_current_element_array().append(t['start_of_input'])
+ return next
def end_of_input(self):
assertion(self.state['has_defined_end'] is False, end_input_already_defined())
- self.state['has_defined_end'] = True
- current_element_array = self.get_current_element_array()
- current_element_array.append(t['end_of_input'])
- return self
+ next = clone(self)
+ next.state['has_defined_end'] = True
+ next.get_current_element_array().append(t['end_of_input'])
+ return next
def any_of_chars(self, chars):
+ next = clone(self)
element_value = t['any_of_chars'](escape_special(chars))
- current_frame = self.get_current_frame()
- current_frame['elements'].append(self.apply_quantifier(element_value))
- return self
+ current_frame = next.get_current_frame()
+ current_frame['elements'].append(next.apply_quantifier(element_value))
+ return next
def end(self):
assertion(len(self.state['stack']) > 1, can_not_end_while_building_root_exp())
- old_frame = self.state['stack'].pop()
- current_frame = self.get_current_frame()
- current_frame['elements'].append(self.apply_quantifier(old_frame['type']['value'](old_frame['elements'])))
- return self
+ next = clone(self)
+ old_frame = next.state['stack'].pop()
+ current_frame = next.get_current_frame()
+ current_frame['elements'].append(next.apply_quantifier(old_frame['type']['value'](old_frame['elements'])))
+ return next
+
+ def anything_but_string(self, string):
+ assertion(type(string) is str, must_be_a_string('Value', string))
+ assertion(len(string) > 0, must_be_one_character('Value'))
+ next = clone(self)
+ element_value = t['anything_but_string'](escape_special(string))
+ current_frame = next.get_current_frame()
+ current_frame['elements'].append(next.apply_quantifier(element_value))
+ return next
+
+ def anything_but_chars(self, chars):
+ assertion(type(chars) is str, must_be_a_string('Value', chars))
+ assertion(len(chars) > 0, must_be_one_character('Value'))
+ next = clone(self)
+ element_value = t['anything_but_chars'](escape_special(chars))
+ current_frame = next.get_current_frame()
+ current_frame['elements'].append(next.apply_quantifier(element_value))
+ return next
+
+ def anything_but_range(self, a, b):
+ str_a = str(a)
+ str_b = str(b)
+ assertion(len(str_a) == 1, must_be_single_character('a', str_a))
+ assertion(len(str_b) == 1, must_be_single_character('b', str_b))
+ assertion(ord(str_a) < ord(str_b), must_have_a_smaller_value(str_a, str_b))
+ next = clone(self)
+ element_value = t['anything_but_range']([a, b])
+ current_frame = next.get_current_frame()
+ current_frame['elements'].append(next.apply_quantifier(element_value))
+ return next
+
+ def string(self, s):
+ assertion(type(s) is str, must_be_a_string('Value', s))
+ assertion(len(s) > 0, must_be_one_character('Value'))
+ next = clone(self)
+ element_value = t['string'](escape_special(s)) if len(s) > 1 else t['char'](escape_special(s))
+ current_frame = next.get_current_frame()
+ current_frame['elements'].append(next.apply_quantifier(element_value))
+ return next
+
+ def char(self, c):
+ assertion(type(c) is str, must_be_a_string('Value', c))
+ assertion(len(c) == 1, must_be_single_character('Value', c))
+ next = clone(self)
+ element_value = t['char'](escape_special(c))
+ current_frame = next.get_current_frame()
+ current_frame['elements'].append(next.apply_quantifier(element_value))
+ return next
+
+ def range(self, a, b):
+ str_a = str(a)
+ str_b = str(b)
+ assertion(len(str_a) == 1, must_be_single_character('a', str_a))
+ assertion(len(str_b) == 1, must_be_single_character('b', str_b))
+ assertion(ord(str_a) < ord(str_b), must_have_a_smaller_value(str_a, str_b))
+ next = clone(self)
+ element_value = t['range']([a, b])
+ current_frame = next.get_current_frame()
+ current_frame['elements'].append(next.apply_quantifier(element_value))
+ return next
+
+ def merge_subexpression(self, el, options, parent, increment_capture_groups):
+ next_el = deep_copy(el)
+
+ if next_el['type'] == 'back_reference':
+ next_el['index'] += parent['state']['total_capture_groups']
+ if next_el['type'] == 'capture':
+ increment_capture_groups()
+ if next_el['type'] == 'named_capture':
+ group_name = '{}{}'.format(options['namespace'], next_el['name']) if options['namespace'] else next_el['name']
+ parent['tracked_named_group'] = group_name
+ next_el['name'] = group_name
+ if next_el['type'] == 'named_back_reference':
+ next_el['name'] = '{}{}'.format(options['namespace'], next_el['name']) if options['namespace'] else next_el['name']
+ if next_el['contains_child']:
+ next_el['value'] = self.merge_subexpression(next_el['value'], options, parent, increment_capture_groups)
+ elif next_el['contains_children']:
+ next_el['value'] = next_el['value'].map(lambda e: self.merge_subexpression(e, options, parent, increment_capture_groups))
+ if next_el['type'] == 'start_of_input':
+ if options['ignore_start_and_end']:
+ return t['noop']
+ assertion(parent['state']['has_defined_start'] is False, str(start_input_already_defined()) + str(ignore_se()))
+ assertion(parent['state']['has_defined_end'] is False, str(end_input_already_defined()) + str(ignore_se()))
+ parent['state']['has_defined_start'] = True
+ if next_el['type'] == 'end_of_input':
+ if options['ignore_start_and_end']:
+ return t['noop']
+ assertion(parent['state']['has_defined_end'] is False, str(end_input_already_defined()) + str(ignore_se()))
+ parent['state']['has_defined_end'] = True
+ return next_el
+
+ def subexpression(self, expr, opts={}):
+ assertion(isinstance(expr, RegexBuilder), must_be_instance("Expression", expr, "RegexBuilder"))
+ assertion(len(expr['state']['stack']) == 1, can_not_call_se(expr.get_current_frame()['type']['type']))
+ options = apply_subexpression_defaults(opts)
+
diff --git a/src/edify/builder/errors.py b/src/edify/builder/errors.py
index c9fe99a..741d047 100644
--- a/src/edify/builder/errors.py
+++ b/src/edify/builder/errors.py
@@ -48,3 +48,24 @@ def end_input_already_defined():
def can_not_end_while_building_root_exp():
return 'Can not end while building the root expression.'
+
+
+def must_be_single_character(value, variable_name):
+ return '{} must be a single character. (got {})'.format(value, type(variable_name))
+
+
+def must_have_a_smaller_value(a, b):
+ return '{} must have a smaller character value than {}. (a = {}, b = {})'.format(a, b, ord(a), ord(b))
+
+
+def ignore_se():
+ return 'You can ignore a subexpressions startOfInput/endOfInput markers with the ignoreStartAndEnd option'
+
+
+def must_be_instance(value, variable_name, class_name):
+ return '{} must be an instance of {}. (got {})'.format(value, class_name, type(variable_name))
+
+
+def can_not_call_se(cft):
+ return "Can not call subexpression a not yet fully specified regex object. \
+ \n (Try adding a .end() call to match the {} on the subexpression)".format(cft)
diff --git a/src/edify/builder/helpers/core.py b/src/edify/builder/helpers/core.py
index 15cf9ef..0788e11 100644
--- a/src/edify/builder/helpers/core.py
+++ b/src/edify/builder/helpers/core.py
@@ -31,3 +31,22 @@ def assertion(condition, message):
def escape_special(s):
return re.escape(s)
+
+
+def deep_copy(o):
+ if isinstance(o, list):
+ return [deep_copy(e) for e in o]
+ if isinstance(o, dict):
+ return {k: deep_copy(v) for k, v in o.items()}
+ return o
+
+
+def apply_subexpression_defaults(expr):
+ out = {**expr}
+ out['namespace'] = "" if 'namespace' not in out else out['namespace']
+ out['ignore_flags'] = True if 'ignore_flags' not in out else out['ignore_flags']
+ out['ignore_start_and_end'] = True if 'ignore_start_and_end' not in out else out['ignore_start_and_end']
+ assertion(type(out['namespace']) == str, 'namespace must be a string')
+ assertion(type(out['ignore_flags']) == bool, 'ignore_flags must be a boolean')
+ assertion(type(out['ignore_start_and_end']) == bool, 'ignore_start_and_end must be a boolean')
+ return out