From cc7d36c42fdf4704574a583afe6d1a74157d481e Mon Sep 17 00:00:00 2001 From: Bobby <30593201+luciferreeves@users.noreply.github.com> Date: Sat, 3 Sep 2022 00:19:51 -0400 Subject: Added Flags --- docs/index.rst | 4 +- docs/installation.rst | 7 --- docs/reference/edify.rst | 9 ---- docs/reference/index.rst | 7 --- docs/regex-builder/builder/index.rst | 3 ++ docs/regex-builder/flags/index.rst | 96 ++++++++++++++++++++++++++++++++++++ docs/regex-builder/index.rst | 9 ++++ docs/usage.rst | 7 --- 8 files changed, 109 insertions(+), 33 deletions(-) delete mode 100644 docs/installation.rst delete mode 100644 docs/reference/edify.rst delete mode 100644 docs/reference/index.rst create mode 100644 docs/regex-builder/builder/index.rst create mode 100644 docs/regex-builder/flags/index.rst create mode 100644 docs/regex-builder/index.rst delete mode 100644 docs/usage.rst (limited to 'docs') diff --git a/docs/index.rst b/docs/index.rst index ad842d5..baa9582 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -6,9 +6,7 @@ Contents :maxdepth: 2 readme - installation - usage - reference/index + regex-builder/index contributing authors changelog diff --git a/docs/installation.rst b/docs/installation.rst deleted file mode 100644 index 1fb9d10..0000000 --- a/docs/installation.rst +++ /dev/null @@ -1,7 +0,0 @@ -============ -Installation -============ - -At the command line:: - - pip install edify diff --git a/docs/reference/edify.rst b/docs/reference/edify.rst deleted file mode 100644 index 6374fd4..0000000 --- a/docs/reference/edify.rst +++ /dev/null @@ -1,9 +0,0 @@ -edify -===== - -.. testsetup:: - - from edify import * - -.. automodule:: edify - :members: diff --git a/docs/reference/index.rst b/docs/reference/index.rst deleted file mode 100644 index df7e04e..0000000 --- a/docs/reference/index.rst +++ /dev/null @@ -1,7 +0,0 @@ -Reference -========= - -.. toctree:: - :glob: - - edify* diff --git a/docs/regex-builder/builder/index.rst b/docs/regex-builder/builder/index.rst new file mode 100644 index 0000000..978ae2d --- /dev/null +++ b/docs/regex-builder/builder/index.rst @@ -0,0 +1,3 @@ +RegexBuilder +============ + diff --git a/docs/regex-builder/flags/index.rst b/docs/regex-builder/flags/index.rst new file mode 100644 index 0000000..3afdd3f --- /dev/null +++ b/docs/regex-builder/flags/index.rst @@ -0,0 +1,96 @@ +Flags +===== + +Flags in Edify are same as the flags in ``re`` module. Edify supports the following flags: + - ``A``: ASCII (standard ASCII) character + - ``D``: DEBUG, returns ``re.DEBUG`` + - ``I``: Ignore Case + - ``M``: Multi Line + - ``S``: Dot All + - ``X``: Verbose + +To learn more about the flags, please refer to the ``re`` module documentation. If you wish to use the ``/g`` or any other unsupported flag, you can use the ``re.search`` or ``re.match`` methods, according to your needs. If you need to support extra flags, you can try looking at the `regex `_ package. To get started, import the ``RegexBuilder`` class:: + + from edify import RegexBuilder + + +ASCII Only Matching +-------------------- + +Make ``\w``, ``\W``, ``\b``, ``\B``, ``\d``, ``\D``, ``\s``, and ``\S`` perform ASCII-only matching instead of full Unicode matching. This is only meaningful for Unicode patterns, and is ignored for byte patterns. Corresponds to the inline flag ``(?a)``. + +Example +^^^^^^^ + +.. code-block:: python + + # returns re.compile('hello', re.ASCII) + expr = RegexBuilder().ascii_only().string('hello').to_regex() + +Display Debug Information +------------------------- +Display debug information about compiled expression. No corresponding inline flag. + +Example +^^^^^^^ + +.. code-block:: python + + # returns re.compile('hello', re.DEBUG) + expr = RegexBuilder().debug().string('hello').to_regex() + + +Ignore Case +------------ +Perform case-insensitive matching; expressions like ``[A-Z]`` will also match lowercase letters. Full Unicode matching (such as ``Ü`` matching ``ü``) also works unless the ``re.ASCII`` flag is used to disable non-ASCII matches. Corresponds to the inline flag ``(?i)``. + +Example +^^^^^^^ + +.. code-block:: python + + # returns re.compile('hello', re.IGNORECASE) + expr = RegexBuilder().ignore_case().string('hello').to_regex() + + +Multi Line +---------- +When specified, the pattern character ``'^'`` matches at the beginning of the string and at the beginning of each line (immediately following each newline); and the pattern character ``'$'`` matches at the end of the string and at the end of each line (immediately preceding each newline). By default, ``'^'`` matches only at the beginning of the string, and ``'$'`` only at the end of the string and immediately before the newline (if any) at the end of the string. Corresponds to the inline flag ``(?m)``. + + +Example +^^^^^^^ + +.. code-block:: python + + # returns re.compile('hello', re.MULTILINE) + expr = RegexBuilder().multi_line().string('hello').to_regex() + + +Dot All +------- + +Make the ``'.'`` special character match any character at all, including a newline; without this flag, ``'.'`` will match anything *except* a newline. Corresponds to the inline flag ``(?s)``. + + +Example +^^^^^^^ + +.. code-block:: python + + # returns re.compile('hello', re.DOTALL) + expr = RegexBuilder().dot_all().string('hello').to_regex() + + +Verbose +------- +This workd same as the ``re.VERBOSE`` flag, which allows you to write regular expressions that look nicer and are more readable by allowing you to visually separate logical sections of the pattern and add comments. However, this flag is basically rendered useless with Edify, but it is still available for use to keep the API consistent with the ``re`` module. Corresponds to the inline flag ``(?x)``. + + +Example +^^^^^^^ + +.. code-block:: python + + # returns re.compile('hello', re.VERBOSE) + expr = RegexBuilder().verbose().string('hello').to_regex() diff --git a/docs/regex-builder/index.rst b/docs/regex-builder/index.rst new file mode 100644 index 0000000..a52dac7 --- /dev/null +++ b/docs/regex-builder/index.rst @@ -0,0 +1,9 @@ +RegexBuilder API Reference +========================== + +.. toctree:: + :glob: + :maxdepth: 2 + + flags/index + builder/index diff --git a/docs/usage.rst b/docs/usage.rst deleted file mode 100644 index 7cce1cb..0000000 --- a/docs/usage.rst +++ /dev/null @@ -1,7 +0,0 @@ -===== -Usage -===== - -To use Edify in a project:: - - import edify -- cgit v1.2.3 From 9875b355df71a0f935f85d3b082f6febcc663a4f Mon Sep 17 00:00:00 2001 From: Bobby <30593201+luciferreeves@users.noreply.github.com> Date: Sat, 3 Sep 2022 02:08:20 -0400 Subject: Added Builder Docs --- docs/index.rst | 2 +- docs/regex-builder/builder/index.rst | 934 +++++++++++++++++++++++++++++++++++ 2 files changed, 935 insertions(+), 1 deletion(-) (limited to 'docs') diff --git a/docs/index.rst b/docs/index.rst index baa9582..831bf7d 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -3,7 +3,7 @@ Contents ======== .. toctree:: - :maxdepth: 2 + :maxdepth: 3 readme regex-builder/index diff --git a/docs/regex-builder/builder/index.rst b/docs/regex-builder/builder/index.rst index 978ae2d..ebe2e3b 100644 --- a/docs/regex-builder/builder/index.rst +++ b/docs/regex-builder/builder/index.rst @@ -1,3 +1,937 @@ RegexBuilder ============ +RegexBuilder is a class that helps you build regular expressions. It is based on the `SuperExpressive `_ library. The API uses the `fluent builder pattern `_, and is completely immutable. It is built to be discoverable and predictable. + +- Properties and methods describe what they do in plain English. +- Order matters! Quantifiers are specified before the thing they change, just like in English (e.g. ``RegexBuilder().exactly(5).digit()``.) +- If you make a mistake, you'll know how to fix it. Edify will guide you towards a fix if your expression is invalid. +- ``subexpressions`` can be used to create meaningful, reusable components. + +.any_char() +----------- + +``.any_char()`` matches any single character. + +.. code-block:: python + + from edify import RegexBuilder + + # returns re.compile('.') + expr = RegexBuilder().any_char().to_regex() + assert expr.match('a') # Matches + assert expr.match('hello') # Matches + + +.whitespace_char() +------------------ + +``.whitespace_char()`` matches any whitespace character, including the special whitespace characters: ``\r\n\t\f\v``. + +.. code-block:: python + + from edify import RegexBuilder + + # returns re.compile('\s') + expr = RegexBuilder().whitespace_char().to_regex() + assert expr.match(' ') # Matches + assert expr.match('\n') # Matches + assert expr.match('\t') # Matches + assert expr.match('\r') # Matches + assert expr.match('\f') # Matches + assert expr.match('\v') # Matches + assert not expr.match('a') # Doesn't match + assert not expr.match('hello') # Doesn't match + + +.non_whitespace_char() +---------------------- + +``.non_whitespace_char()`` matches any non-whitespace character, excluding also the special whitespace characters: ``\r\n\t\f\v``. + +.. code-block:: python + + from edify import RegexBuilder + + # returns re.compile('\S') + expr = RegexBuilder().non_whitespace_char().to_regex() + assert expr.match('a') # Matches + assert expr.match('hello') # Matches + assert not expr.match(' ') # Doesn't match + assert not expr.match('\n') # Doesn't match + assert not expr.match('\t') # Doesn't match + assert not expr.match('\r') # Doesn't match + assert not expr.match('\f') # Doesn't match + assert not expr.match('\v') # Doesn't match + assert not expr.match('\u00a0') # Doesn't match + assert not expr.match('\u2000') # Doesn't match + + +.digit() +-------- + +``.digit()`` matches any digit from ``0-9``. + +.. code-block:: python + + from edify import RegexBuilder + + # returns re.compile('\d') + expr = RegexBuilder().digit().to_regex() + assert expr.match('1') # Matches + assert expr.match('9') # Matches + assert not expr.match('a') # Doesn't match + assert not expr.match('\u00a0') # Doesn't match + + +.non_digit() +------------- + +``.non_digit()`` matches any non-digit. + +.. code-block:: python + + from edify import RegexBuilder + + # returns re.compile('\D') + expr = RegexBuilder().non_digit().to_regex() + assert expr.match('a') # Matches + assert expr.match('\u00a0') # Matches + assert not expr.match('1') # Doesn't match + assert not expr.match('9') # Doesn't match + +.. _word: + +.word() +------- + + +``.word()`` matches any alpha-numeric ``(a-z, A-Z, 0-9)`` characters, as well as ``_``. + +.. code-block:: python + + from edify import RegexBuilder + + # returns re.compile('\w') + expr = RegexBuilder().word().to_regex() + assert expr.match('a') # Matches + assert expr.match('1') # Matches + assert expr.match('_') # Matches + assert expr.match('hello') # Matches + + +.non_word() +----------- + +``.non_word()`` matches any non-alpha-numeric ``(a-z, A-Z, 0-9)`` characters, excluding ``_`` as well. + +.. code-block:: python + + from edify import RegexBuilder + + # returns re.compile('\W') + expr = RegexBuilder().non_word().to_regex() + assert not expr.match('a') # Doesn't match + assert not expr.match('1') # Doesn't match + assert expr.match('\u00a0') # Matches + assert expr.match('\u2000') # Matches + assert not expr.match('_') # Doesn't match + assert not expr.match('hello') # Doesn't match + + +.word_boundary() +----------------- + +``.word_boundary()`` matches (without consuming any characters) immediately between a character matched by :ref:`word` and a character not matched by :ref:`word` (in either order). + +.. code-block:: python + + from edify import RegexBuilder + + # returns re.compile('\d\b') + expr = RegexBuilder().digit().word_boundary().to_regex() + + +.non_word_boundary() +-------------------- + +``.non_word_boundary()`` matches (without consuming any characters) at the position between two characters matched by :ref:`word`. + +.. code-block:: python + + + from edify import RegexBuilder + + # returns re.compile('\d\B') + expr = RegexBuilder().digit().non_word_boundary().to_regex() + +.new_line() +----------- + +``.new_line()`` matches the newline ``\n`` character. + +.. code-block:: python + + from edify import RegexBuilder + + # returns re.compile('\n') + expr = RegexBuilder().new_line().to_regex() + assert expr.match('\n') # Matches + assert not expr.match('a') # Doesn't match + assert not expr.match('hello') # Doesn't match + +.carriage_return() +------------------- + +``.carriage_return()`` matches the carriage return ``\r`` character. + +.. code-block:: python + + from edify import RegexBuilder + + # returns re.compile('\r') + expr = RegexBuilder().carriage_return().to_regex() + assert expr.match('\r') # Matches + assert not expr.match('a') # Doesn't match + assert not expr.match('hello') # Doesn't match + + +.tab() +------ + +``.tab()`` matches the tab ``\t`` character. + +.. code-block:: python + + from edify import RegexBuilder + + # returns re.compile('\t') + expr = RegexBuilder().tab().to_regex() + assert expr.match('\t') # Matches + assert not expr.match('a') # Doesn't match + assert not expr.match('hello') # Doesn't match + + +.null_byte() +------------ + +``.null_byte()`` matches the null byte ``\0`` character. + +.. code-block:: python + + from edify import RegexBuilder + + # returns re.compile('\0') + expr = RegexBuilder().null_byte().to_regex() + assert expr.match('\0') # Matches + assert not expr.match('a') # Doesn't match + assert not expr.match('hello') # Doesn't match + +.. _any_of: + +.any_of() +--------- + +``.any_of()`` matches a choice between specified elements. Needs to be finalised with :ref:`end`. + +.. code-block:: python + + from edify import RegexBuilder + + # returns re.compile('(?:hello|[a-f0-9])') + expr = ( + RegexBuilder() + .any_of() + .range('a', 'f') + .range('0', '9') + .string('hello') + .end() + .to_regex() + ) + assert expr.match('a') # Matches + assert expr.match('f') # Matches + assert expr.match('9') # Matches + assert expr.match('hello') # Matches + assert not expr.match('g') # Doesn't match + assert not expr.match('good world') # Doesn't match + +.. _capture: + +.capture() +----------- + +``.capture()`` creates a capture group for the proceeding elements. Needs to be finalised with :ref:`end`. Can be later referenced with :ref:`backreference`. + +.. code-block:: python + + from edify import RegexBuilder + + # returns re.compile('([a-f][0-9]hello)') + expr = ( + RegexBuilder() + .capture() + .range('a', 'f') + .range('0', '9') + .string('hello') + .end() + .to_regex() + ) + assert expr.match('a9hello') # Matches + assert expr.match('f0hello') # Matches + assert not expr.match('g9hello') # Doesn't match + +.. _named_capture: + +.named_capture(name) +-------------------- + +``.named_capture()`` creates a named capture group for the proceeding elements. Needs to be finalised with :ref:`end`. Can be later referenced with :ref:`named_back_reference` or :ref:`backreference`. + +.. code-block:: python + + from edify import RegexBuilder + + # returns re.compile('(?P[a-f][0-9]hello)') + expr = ( + RegexBuilder() + .named_capture('interestingStuff') + .range('a', 'f') + .range('0', '9') + .string('hello') + .end() + .to_regex() + ) + assert expr.match('a9hello') # Matches + assert expr.match('f0hello') # Matches + assert not expr.match('g9hello') # Doesn't match + +.. _named_back_reference: + +.named_back_reference(name) +--------------------------- + +``.named_back_reference()`` matches exactly what was previously matched by a :ref:`named_capture`. + +.. warning:: + + Python does not support named back references. If you try to call the ``to_regex()`` method on a named back reference, it will raise an exception. For, those reasons, ``to_regex_string()`` is provided instead. It returns a string that can be used to create a regular expression. You can try using the regular expression directly with another library like `regex `_. + +.. code-block:: python + + from edify import RegexBuilder + + # returns /(?[a-f][0-9]hello)something else\k/ + expr = ( + RegexBuilder() + .named_capture('interestingStuff') + .range('a', 'f') + .range('0', '9') + .string('hello') + .end() + .string('something else') + .named_back_reference('interestingStuff') + .to_regex_string() + ) + +.. _backreference: + +.back_reference(index) +---------------------- + +``.back_reference()`` matches exactly what was previously matched by a :ref:`capture` or :ref:`named_capture` using a positional index. Note regex indexes start at 1, so the first capture group has index 1. + +.. code-block:: python + + from edify import RegexBuilder + + # returns re.compile('([a-f][0-9]hello)\\1') + expr = ( + RegexBuilder() + .capture() + .range('a', 'f') + .range('0', '9') + .string('hello') + .end() + .back_reference(1) + .to_regex() + ) + assert expr.match('a9helloa9hello') # Matches + assert not expr.match('a9helloa9hell') # Doesn't match + +.. _group: + +.group() +-------- + +``.group()`` creates a non-capturing group for the proceeding elements. Needs to be finalised with :ref:`end`. + +.. code-block:: python + + from edify import RegexBuilder + + # returns re.compile('(?:[a-f][0-9]hello)?') + expr = ( + RegexBuilder() + .optional().group() + .range('a', 'f') + .range('0', '9') + .string('hello') + .end() + .to_regex() + ) + assert expr.match('a9hello') # Matches + assert expr.match('') # Matches + assert not expr.match('g9hello') # Matches + +.. _end: + +.end() +------ + +``.end()`` signifies the end of a ``RegexBuilder`` grouping, such as :ref:`capture`, :ref:`group` or :ref:`any_of` element. + +.. code-block:: python + + from edify import RegexBuilder + + # returns re.compile('((?:hello|[a-f0-9]))') + expr = ( + RegexBuilder() + .capture() + .any_of() + .range('a', 'f') + .range('0', '9') + .string('hello') + .end() + .end() + .to_regex() + ) + +.. _assert_ahead: + +.assert_ahead() +--------------- + +``.assert_ahead()`` asserts that the proceeding elements are found without consuming them. Needs to be finalised with :ref:`end`. + +.. code-block:: python + + from edify import RegexBuilder + + # returns re.compile('(?=[a-f])[a-z]') + expr = ( + RegexBuilder() + .assert_ahead() + .range('a', 'f') + .end() + .range('a', 'z') + .to_regex() + ) + assert expr.match('a') # Matches + assert expr.match('f') # Matches + assert not expr.match('g') # Doesn't match + +.. _assert_not_ahead: + +.assert_not_ahead() +------------------- + +``.assert_not_ahead()`` asserts that the proceeding elements are not found without consuming them. Needs to be finalised with :ref:`end`. + +.. code-block:: python + + from edify import RegexBuilder + + # returns re.compile('(?![a-f])[g-z]') + expr = ( + RegexBuilder() + .assert_not_ahead() + .range('a', 'f') + .end() + .range('g', 'z') + .to_regex() + ) + assert expr.match('g') # Matches + assert expr.match('z') # Matches + assert not expr.match('a') # Doesn't match + +.. _assert_behind: + +.assert_behind() +---------------- + +``.assert_behind()`` asserts that the elements contained within are found immediately before this point in the string. Needs to be finalised with :ref:`end`. + +.. code-block:: python + + from edify import RegexBuilder + + # returns re.compile('(?<=hello )world') + expr = ( + RegexBuilder() + .assert_behind() + .string('hello ') + .end() + .string('world') + .to_regex() + ) + +.. _assert_not_behind: + +.assert_not_behind() +-------------------- + +``.assert_not_behind()`` asserts that the elements contained within are not found immediately before this point in the string. Needs to be finalised with :ref:`end`. + +.. code-block:: python + + from edify import RegexBuilder + + # returns re.compile('(?