From 9875b355df71a0f935f85d3b082f6febcc663a4f Mon Sep 17 00:00:00 2001 From: Bobby <30593201+luciferreeves@users.noreply.github.com> Date: Sat, 3 Sep 2022 02:08:20 -0400 Subject: Added Builder Docs --- docs/index.rst | 2 +- docs/regex-builder/builder/index.rst | 934 +++++++++++++++++++++++++++++++++++ 2 files changed, 935 insertions(+), 1 deletion(-) diff --git a/docs/index.rst b/docs/index.rst index baa9582..831bf7d 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -3,7 +3,7 @@ Contents ======== .. toctree:: - :maxdepth: 2 + :maxdepth: 3 readme regex-builder/index diff --git a/docs/regex-builder/builder/index.rst b/docs/regex-builder/builder/index.rst index 978ae2d..ebe2e3b 100644 --- a/docs/regex-builder/builder/index.rst +++ b/docs/regex-builder/builder/index.rst @@ -1,3 +1,937 @@ RegexBuilder ============ +RegexBuilder is a class that helps you build regular expressions. It is based on the `SuperExpressive `_ library. The API uses the `fluent builder pattern `_, and is completely immutable. It is built to be discoverable and predictable. + +- Properties and methods describe what they do in plain English. +- Order matters! Quantifiers are specified before the thing they change, just like in English (e.g. ``RegexBuilder().exactly(5).digit()``.) +- If you make a mistake, you'll know how to fix it. Edify will guide you towards a fix if your expression is invalid. +- ``subexpressions`` can be used to create meaningful, reusable components. + +.any_char() +----------- + +``.any_char()`` matches any single character. + +.. code-block:: python + + from edify import RegexBuilder + + # returns re.compile('.') + expr = RegexBuilder().any_char().to_regex() + assert expr.match('a') # Matches + assert expr.match('hello') # Matches + + +.whitespace_char() +------------------ + +``.whitespace_char()`` matches any whitespace character, including the special whitespace characters: ``\r\n\t\f\v``. + +.. code-block:: python + + from edify import RegexBuilder + + # returns re.compile('\s') + expr = RegexBuilder().whitespace_char().to_regex() + assert expr.match(' ') # Matches + assert expr.match('\n') # Matches + assert expr.match('\t') # Matches + assert expr.match('\r') # Matches + assert expr.match('\f') # Matches + assert expr.match('\v') # Matches + assert not expr.match('a') # Doesn't match + assert not expr.match('hello') # Doesn't match + + +.non_whitespace_char() +---------------------- + +``.non_whitespace_char()`` matches any non-whitespace character, excluding also the special whitespace characters: ``\r\n\t\f\v``. + +.. code-block:: python + + from edify import RegexBuilder + + # returns re.compile('\S') + expr = RegexBuilder().non_whitespace_char().to_regex() + assert expr.match('a') # Matches + assert expr.match('hello') # Matches + assert not expr.match(' ') # Doesn't match + assert not expr.match('\n') # Doesn't match + assert not expr.match('\t') # Doesn't match + assert not expr.match('\r') # Doesn't match + assert not expr.match('\f') # Doesn't match + assert not expr.match('\v') # Doesn't match + assert not expr.match('\u00a0') # Doesn't match + assert not expr.match('\u2000') # Doesn't match + + +.digit() +-------- + +``.digit()`` matches any digit from ``0-9``. + +.. code-block:: python + + from edify import RegexBuilder + + # returns re.compile('\d') + expr = RegexBuilder().digit().to_regex() + assert expr.match('1') # Matches + assert expr.match('9') # Matches + assert not expr.match('a') # Doesn't match + assert not expr.match('\u00a0') # Doesn't match + + +.non_digit() +------------- + +``.non_digit()`` matches any non-digit. + +.. code-block:: python + + from edify import RegexBuilder + + # returns re.compile('\D') + expr = RegexBuilder().non_digit().to_regex() + assert expr.match('a') # Matches + assert expr.match('\u00a0') # Matches + assert not expr.match('1') # Doesn't match + assert not expr.match('9') # Doesn't match + +.. _word: + +.word() +------- + + +``.word()`` matches any alpha-numeric ``(a-z, A-Z, 0-9)`` characters, as well as ``_``. + +.. code-block:: python + + from edify import RegexBuilder + + # returns re.compile('\w') + expr = RegexBuilder().word().to_regex() + assert expr.match('a') # Matches + assert expr.match('1') # Matches + assert expr.match('_') # Matches + assert expr.match('hello') # Matches + + +.non_word() +----------- + +``.non_word()`` matches any non-alpha-numeric ``(a-z, A-Z, 0-9)`` characters, excluding ``_`` as well. + +.. code-block:: python + + from edify import RegexBuilder + + # returns re.compile('\W') + expr = RegexBuilder().non_word().to_regex() + assert not expr.match('a') # Doesn't match + assert not expr.match('1') # Doesn't match + assert expr.match('\u00a0') # Matches + assert expr.match('\u2000') # Matches + assert not expr.match('_') # Doesn't match + assert not expr.match('hello') # Doesn't match + + +.word_boundary() +----------------- + +``.word_boundary()`` matches (without consuming any characters) immediately between a character matched by :ref:`word` and a character not matched by :ref:`word` (in either order). + +.. code-block:: python + + from edify import RegexBuilder + + # returns re.compile('\d\b') + expr = RegexBuilder().digit().word_boundary().to_regex() + + +.non_word_boundary() +-------------------- + +``.non_word_boundary()`` matches (without consuming any characters) at the position between two characters matched by :ref:`word`. + +.. code-block:: python + + + from edify import RegexBuilder + + # returns re.compile('\d\B') + expr = RegexBuilder().digit().non_word_boundary().to_regex() + +.new_line() +----------- + +``.new_line()`` matches the newline ``\n`` character. + +.. code-block:: python + + from edify import RegexBuilder + + # returns re.compile('\n') + expr = RegexBuilder().new_line().to_regex() + assert expr.match('\n') # Matches + assert not expr.match('a') # Doesn't match + assert not expr.match('hello') # Doesn't match + +.carriage_return() +------------------- + +``.carriage_return()`` matches the carriage return ``\r`` character. + +.. code-block:: python + + from edify import RegexBuilder + + # returns re.compile('\r') + expr = RegexBuilder().carriage_return().to_regex() + assert expr.match('\r') # Matches + assert not expr.match('a') # Doesn't match + assert not expr.match('hello') # Doesn't match + + +.tab() +------ + +``.tab()`` matches the tab ``\t`` character. + +.. code-block:: python + + from edify import RegexBuilder + + # returns re.compile('\t') + expr = RegexBuilder().tab().to_regex() + assert expr.match('\t') # Matches + assert not expr.match('a') # Doesn't match + assert not expr.match('hello') # Doesn't match + + +.null_byte() +------------ + +``.null_byte()`` matches the null byte ``\0`` character. + +.. code-block:: python + + from edify import RegexBuilder + + # returns re.compile('\0') + expr = RegexBuilder().null_byte().to_regex() + assert expr.match('\0') # Matches + assert not expr.match('a') # Doesn't match + assert not expr.match('hello') # Doesn't match + +.. _any_of: + +.any_of() +--------- + +``.any_of()`` matches a choice between specified elements. Needs to be finalised with :ref:`end`. + +.. code-block:: python + + from edify import RegexBuilder + + # returns re.compile('(?:hello|[a-f0-9])') + expr = ( + RegexBuilder() + .any_of() + .range('a', 'f') + .range('0', '9') + .string('hello') + .end() + .to_regex() + ) + assert expr.match('a') # Matches + assert expr.match('f') # Matches + assert expr.match('9') # Matches + assert expr.match('hello') # Matches + assert not expr.match('g') # Doesn't match + assert not expr.match('good world') # Doesn't match + +.. _capture: + +.capture() +----------- + +``.capture()`` creates a capture group for the proceeding elements. Needs to be finalised with :ref:`end`. Can be later referenced with :ref:`backreference`. + +.. code-block:: python + + from edify import RegexBuilder + + # returns re.compile('([a-f][0-9]hello)') + expr = ( + RegexBuilder() + .capture() + .range('a', 'f') + .range('0', '9') + .string('hello') + .end() + .to_regex() + ) + assert expr.match('a9hello') # Matches + assert expr.match('f0hello') # Matches + assert not expr.match('g9hello') # Doesn't match + +.. _named_capture: + +.named_capture(name) +-------------------- + +``.named_capture()`` creates a named capture group for the proceeding elements. Needs to be finalised with :ref:`end`. Can be later referenced with :ref:`named_back_reference` or :ref:`backreference`. + +.. code-block:: python + + from edify import RegexBuilder + + # returns re.compile('(?P[a-f][0-9]hello)') + expr = ( + RegexBuilder() + .named_capture('interestingStuff') + .range('a', 'f') + .range('0', '9') + .string('hello') + .end() + .to_regex() + ) + assert expr.match('a9hello') # Matches + assert expr.match('f0hello') # Matches + assert not expr.match('g9hello') # Doesn't match + +.. _named_back_reference: + +.named_back_reference(name) +--------------------------- + +``.named_back_reference()`` matches exactly what was previously matched by a :ref:`named_capture`. + +.. warning:: + + Python does not support named back references. If you try to call the ``to_regex()`` method on a named back reference, it will raise an exception. For, those reasons, ``to_regex_string()`` is provided instead. It returns a string that can be used to create a regular expression. You can try using the regular expression directly with another library like `regex `_. + +.. code-block:: python + + from edify import RegexBuilder + + # returns /(?[a-f][0-9]hello)something else\k/ + expr = ( + RegexBuilder() + .named_capture('interestingStuff') + .range('a', 'f') + .range('0', '9') + .string('hello') + .end() + .string('something else') + .named_back_reference('interestingStuff') + .to_regex_string() + ) + +.. _backreference: + +.back_reference(index) +---------------------- + +``.back_reference()`` matches exactly what was previously matched by a :ref:`capture` or :ref:`named_capture` using a positional index. Note regex indexes start at 1, so the first capture group has index 1. + +.. code-block:: python + + from edify import RegexBuilder + + # returns re.compile('([a-f][0-9]hello)\\1') + expr = ( + RegexBuilder() + .capture() + .range('a', 'f') + .range('0', '9') + .string('hello') + .end() + .back_reference(1) + .to_regex() + ) + assert expr.match('a9helloa9hello') # Matches + assert not expr.match('a9helloa9hell') # Doesn't match + +.. _group: + +.group() +-------- + +``.group()`` creates a non-capturing group for the proceeding elements. Needs to be finalised with :ref:`end`. + +.. code-block:: python + + from edify import RegexBuilder + + # returns re.compile('(?:[a-f][0-9]hello)?') + expr = ( + RegexBuilder() + .optional().group() + .range('a', 'f') + .range('0', '9') + .string('hello') + .end() + .to_regex() + ) + assert expr.match('a9hello') # Matches + assert expr.match('') # Matches + assert not expr.match('g9hello') # Matches + +.. _end: + +.end() +------ + +``.end()`` signifies the end of a ``RegexBuilder`` grouping, such as :ref:`capture`, :ref:`group` or :ref:`any_of` element. + +.. code-block:: python + + from edify import RegexBuilder + + # returns re.compile('((?:hello|[a-f0-9]))') + expr = ( + RegexBuilder() + .capture() + .any_of() + .range('a', 'f') + .range('0', '9') + .string('hello') + .end() + .end() + .to_regex() + ) + +.. _assert_ahead: + +.assert_ahead() +--------------- + +``.assert_ahead()`` asserts that the proceeding elements are found without consuming them. Needs to be finalised with :ref:`end`. + +.. code-block:: python + + from edify import RegexBuilder + + # returns re.compile('(?=[a-f])[a-z]') + expr = ( + RegexBuilder() + .assert_ahead() + .range('a', 'f') + .end() + .range('a', 'z') + .to_regex() + ) + assert expr.match('a') # Matches + assert expr.match('f') # Matches + assert not expr.match('g') # Doesn't match + +.. _assert_not_ahead: + +.assert_not_ahead() +------------------- + +``.assert_not_ahead()`` asserts that the proceeding elements are not found without consuming them. Needs to be finalised with :ref:`end`. + +.. code-block:: python + + from edify import RegexBuilder + + # returns re.compile('(?![a-f])[g-z]') + expr = ( + RegexBuilder() + .assert_not_ahead() + .range('a', 'f') + .end() + .range('g', 'z') + .to_regex() + ) + assert expr.match('g') # Matches + assert expr.match('z') # Matches + assert not expr.match('a') # Doesn't match + +.. _assert_behind: + +.assert_behind() +---------------- + +``.assert_behind()`` asserts that the elements contained within are found immediately before this point in the string. Needs to be finalised with :ref:`end`. + +.. code-block:: python + + from edify import RegexBuilder + + # returns re.compile('(?<=hello )world') + expr = ( + RegexBuilder() + .assert_behind() + .string('hello ') + .end() + .string('world') + .to_regex() + ) + +.. _assert_not_behind: + +.assert_not_behind() +-------------------- + +``.assert_not_behind()`` asserts that the elements contained within are not found immediately before this point in the string. Needs to be finalised with :ref:`end`. + +.. code-block:: python + + from edify import RegexBuilder + + # returns re.compile('(?