Regular Expressions 101

Save & Share

Regex Version: ver. 1
Fork Regex
ctrl+s
Go to community entry

Flavor

PCRE2 (PHP >=7.3)
PCRE (PHP <7.3)
ECMAScript (JavaScript)
Python
Golang
Java 8
.NET 7.0 (C#)
Rust
Regex Flavor Guide

Function

Match
Substitution
List
Unit Tests

Tools

Code Generator
Regex Debugger
Export Matches
Benchmark Regex

Regular Expression
No Match

xgJ

Test String

Code Generator

Language

Generated Code

# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility

import re

regex = r"""
	(?(DEFINE)
	(?<add> \s*\+\s* )
	(?<eq> \s*=\s* )
	# Remove all zeroes except the last one if the number is 0
	(?<zero> (?:0(?=\d))*+ )
	# cl: last digit of left operand being 1, cr: last digit of right operand being 1, \d(?:0|\b) check if last digit from result is 0
	# there will be carry if cl and cr are set, or cl or cr are set and the last digit from result is 0
	(?<carry> (?(cl)(?(cr)|\d(?:0|\b))|(?(cr)\d(?:0|\b)|(*F))) )
	# add carry with l1 (current digit of left operand being 1) and r1 (current digit of right operand being 1)
	# i.e. returns result of carry + l1 + r1 in Z/2Z
	(?<digitadd> (?(?= (?(?=(?(l1)(?(r1)|(*F))|(?(r1)(*F))))(?&carry)|(?!(?&carry))) )1|0) )
	# check for a single digit at the current offset whether the result is correct
	# ro: right operand out of bounds (i.e. the current digit is at a higher offset than the size of the left operand)
	# if we're out of bounds of the right operand, cr is just not set (i.e. handled as if there were leading zeroes)
	(?<recursedigit>
	# now, with the r and f, we can figure out r1 and cr at the current offset and also perform binary carry addition at that offset in the result
	  (?&add) (?&zero) (?:\d*(?:0|1(?<r1>)))? (?(ro)|(?=(?<cr>1)?))\k<r> (?&eq) \d*(?&digitadd)\k<f>\b
	# iterate through the whole left operand to find the sequences (for right operand and result) of the same length as the offset of the current digit
	| (?=\d* (?&add) (?&zero) (?:\k<r>(?<ro>)|\d*(?<r>\d\k<r>)) (?&eq) \d*(?<f>\d\k<f>)\b) \d(?&recursedigit)
	)
	# run the check, sets l1 and cl accordingly and initializes the r (right operand) and f (final result) groups to be empty
	(?<checkdigit> (?:0|1(?<l1>)) (?=(?<cl>1)?) (?<r>) (?<f>) (?&recursedigit) )
	# "trivial" increment of a binary number, i.e. a +1 is applied to the part of the right operand which exceeds the length of the left operand
	(?<carryoverflow>
	# number contains a zero, just update the part after the last zero
	  (?<x>\d+) 0 (?<y> \k<r> (?&eq) 0*\k<x>1 | 1(?&y)0 )
	# number contains only ones, add a leading 1 and replace all the ones by zeroes
	| (?<z> 1\k<r> (?&eq) 0*10 | 1(?&z)0 )
	)
	# ensure correct lengths of the final operand and handle right operands being longer than the left operand
	(?<recurseoverflow>
	# the left operand is longer than or as long as the right one. In the latter case, the final result will always be exactly one digit longer than the operands
	# in the former case, if the first non-leading zero (from the left) of the left operand is at a higher or equal offset to the length of the right operand, the final result will be one digit longer than the left operand
	  (?&add) 0*+ (?(rlast) \k<r> (?&eq) 0*(?(ro)(?(addone)1)|1)\k<f>\b
	# the right operand has a zero at the offset equal to the length of the left operand. Then just copy the leading digits to the final result
	                | (?:(?<remaining>\d+)(?=0\d* (?&eq) \d*(?=1)\k<f>\b)\k<r> (?&eq) (*PRUNE) 0*\k<remaining>\k<f>\b
	# otherwise there will be some carry which needs to be applied before copying the leading digits to the final result
	                   | (?&carryoverflow)\k<f>\b))
	# iterate through the whole left operand to find the sequences (for right operand and result) of the same length as the left operand
	| (?=\d* (?&add) 0*+ (?:\k<r>(?<ro>)|(?=(?:\d\k<r>(?&eq)(?<rlast>))?)\d*(?<r>\d\k<r>)) (?&eq) \d*(?<f>\d\k<f>)\b)
	# check - only at the first non-leading zero - whether the right operand is longer than the current offset of the iteration, or just as long and having a carry (i.e. the digit at that offset in the final result is 0)
	  (?(nullchecked)|(?=(?<addone>(?=0)(?=(?:\d(?=\d*(?&add)\d*(?&eq)\d*(?<c>\d\k<c>)\b))+(?&add))(?<longer>(?&add)0*|\d(?&longer)\d)(\d+(?&eq)|(?&eq)\d*(?=0)\k<c>))?)(?=(?<nullchecked>0)?))
	  \d(?&recurseoverflow)
	)
	(?<s>
	  (?=\d) 0*? (?<arg>[01]+)? (?&add) (?=\d) 0*? (?<arg>(?(arg)(*F))[01]+)? (?&eq) (*PRUNE) \k<arg>
	| (?&zero)
	# traverse the digits one by one and verify the correctness of each offset individually
	  (?=(?<iteratedigits> (?=(?&checkdigit))\d (?:\b|(?&iteratedigits)) ))
	# assert exact format here
	  (?=[01]+ (?&add) [01]+ (?&eq) [01]+ \b)
	# remove leading zeroes and force an additional digit on the final result in case the left operand is only ones and the right operand not longer than the left
	  0*? (?<r>) (?<f>) (?<c>) (?=(?<addone>1+(?&add))?) (?&recurseoverflow)
	# Handle 0 + x or x + 0 separately to avoid messing around in the big subpatterns
	)
	)
	\b(?&s)\b
	"""

test_str = ("- 1 + 1 = 0\n"
	"- 1 + 1 = 1\n"
	"- 0 + 1 = 11\n"
	"+ 1 + 1 = 10\n"
	"- 1 + 10 = 101\n"
	"- 0 + 11 = 101\n"
	"+ 0 + 1 = 1\n"
	"- 0 + 0 = 10\n"
	"+ 1 + 0 = 1\n"
	"- 1 + 101 = 1010\n"
	"+ 10 + 0 = 10\n"
	"+ 10 + 1 = 11\n"
	"+ 11 + 1 = 100\n"
	"+ 100 + 1 = 101\n"
	"+ 01 + 10 = 11\n"
	"+ 10 + 10 = 100\n"
	"+ 010 + 010 = 100\n"
	"+ 101 + 11 = 1000\n"
	"- 101 + 10 = 1111\n"
	"- 100 + 10 = 111\n"
	"- 101 + 10 = 101\n"
	"+ 101 + 10 = 111\n"
	"+ 11 + 101 = 1000\n"
	"+ 11 + 1011 = 1110\n"
	"+ 11 + 10101 = 11000\n"
	"+ 1 + 1111 = 10000\n"
	"+ 111 + 11 = 1010\n"
	"+ 1110 + 100 = 10010\n"
	"+ 11 + 11 = 110\n"
	"+ 1111 + 11 = 10010\n"
	"- 010 + 010 = 000\n"
	"- 100 + 100 = 1100\n"
	"+ 100 + 100 = 1000\n"
	"+ 1000 + 100 = 1100\n"
	"+ 1 + 1000 = 1001\n"
	"+ 1000 + 1 = 1001\n"
	"+ 110 + 1100 = 10010\n"
	"- 110 + 1010 = 1000\n"
	"- 10 + 1 = 11111001")

matches = re.finditer(regex, test_str, re.VERBOSE)

for matchNum, match in enumerate(matches, start=1):
    
    print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
    
    for groupNum in range(0, len(match.groups())):
        groupNum = groupNum + 1
        
        print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))

# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.

Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html

Regular Expressions 101

Save & Share

Flavor

Function

Tools

Explanation

Match Information

Quick Reference

Regular Expression
No Match

Test String

Code Generator

Language

Generated Code

Save & Share

Flavor

Function

Tools

Explanation

Match Information

Quick Reference

Regular ExpressionNo Match

Test String

Regular Expression
No Match