# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"^(?:(?:25[0-5]|(?:2[0-4]|1\d|[1-9]|)\d)\.?\b){4}$"
test_str = ("Purpose:\n"
"This is supposed to be a learning process. The first 1-9 tasks are supposed to be done with using \"standard\" regex, ie, teach the user to use \\s, \\w, \\d, aswell as backrefs and similar basic things.\n"
"Later down the road, at task 10-13 we start introducing them to lookarounds. These are fairly basic for now.\n"
"At task 14-15 we show them the true power of lookarounds and encourage them to use them properly and what can truly be done with them.\n"
"Task 16 and forward is supposed to introduce \\G and other nifty ideas. This is where its supposed to be more advanced and trickier.\n\n\n"
"Task order:\n"
" 1: Word Boundaries\n"
" 2: Capitalize i\n"
" 3: Uppercase Consonants\n"
" 4: Retreive Numbers\n"
" 5: Whitespaces\n"
" 6: Broken Keyboard\n"
" 7: Validate an IP\n"
" 8: Html Tags\n"
" 9: Validate floating numbers\n"
" 10: Followed by #\n"
" 11: Spam filter\n"
" 12: Match an E-Mail (Simplified)\n"
" 13: Not surrounded by digits\n"
" 14: Repeated words\n"
" 15: start before end\n"
" 16: Every other digit\n"
" 17: The thousands\n"
" 18: Quoted text with escapes\n"
" 19: Replace text, not code\n"
" 20: Tokenized list\n"
" 21: Replace in between - Match * inside square brackets\n"
" 22: Outermost Brackets\n\n"
"Task 1: Word Boundaries\n"
" Strings: \n"
" word valid\n"
" aworda invalid\n"
" thisisnotaword invalid\n"
" wordnot invalid\n"
" wor invalid\n"
" wOrdd invalid\n"
" WORD valid\n"
" WORDz invalid\n"
" zWORD invalid\n"
" Valid regex:\n"
" /\\bword\\b/i\n"
" \n"
"Task 2: Capitalize\n"
" Strings:\n"
" I am a cat invalid (questionable)\n"
" this is invalid invalid\n"
" i am a cat valid -> I am a cat\n"
" capitalize this i valid -> capitalize this I\n"
" abc i abc valid -> abc I abc\n"
" ii i ii valid -> ii I ii\n"
" i valid -> I\n"
" abc i abc i abc i abc valid -> abc I abc I abc I abc\n"
" Valid regsub:\n"
" /\\bi\\b/g I\n"
" \n"
"Task 3: Uppercase Consonants\n"
" Strings:\n"
" BCDFGHJKLMNPQRSTVXZ valid\n"
" Valid regex:\n"
" /[B-DF-HJ-NP-TV-Z]/g\n"
" \n"
"Task 4: Retreive Numbers\n"
" Strings:\n"
" 12345 valid -> 12345\n"
" 12abc12 valid -> 12, 12\n"
" 1a2b3 valid -> 1, 2, 3\n"
" Valid Regex:\n"
" /(\\d+)/g\n"
" \n"
"Task 5: Whitespaces\n"
" Strings:\n"
" ??\n"
" Valid regex:\n"
" /[ ]{4}/S\n"
" \n"
"Task 6: Match a e-mail (simplified)\n"
" Strings:\n"
" @someone.com invalid\n"
" a.@com invalid\n"
" .a@.com invalid\n"
" .@.com invalid\n"
" .@abc.com invalid\n"
" email@.a.com invalid\n"
" mail@mail.com valid\n"
" ()[]\\;:,<>@example.com invalid\n"
" A@b@c@example.com invalid\n"
" abc@abc.loooong invalid\n"
" abc@abc.abc.com valid\n"
" Valid regex:\n"
" /^[\\w.%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,4}$/i\n"
" \n"
"Task 7: Validate an IP\n"
" Strings:\n"
" too lazy for this one lol\n"
" Valid regex:\n"
" /^(?:(?:[01]?\\d?\\d|2[0-4]\\d|25[0-5])\\.){3}(?:[01]?\\d?\\d|2[0-4]\\d|25[0-5])$/\n"
" \n"
"Task 8: HTML Tags\n"
" Strings:\n"
" ...\n"
" Valid regex:\n"
" /<[^>]*|[^<]*>/g\n"
" \n"
"Task 9: Validate floating numbers\n"
" Strings:\n"
" 1.0 valid\n"
" 12,123 valid\n"
" +10 valid\n"
" -0.1 valid\n"
" -.1 valid\n"
" -1.5e20 valid\n"
" +50e1 valid\n"
" +20.0 valid\n"
" +1.01e10 valid\n"
" +1. invalid\n"
" +1 valid\n"
" .+1 invalid\n"
" 1.4e10 valid\n"
" 1.4.e10 invalid\n"
" . invalid\n"
" .e0 invalid\n"
" Valid regex:\n"
" /^[+-]?(?:\\d+(?:\\.(?!$))?|\\.)\\d*(?:e\\d+)?$/ need update\n"
" \n"
"Task 10: Broken keyboard\n"
" Strings:\n"
" ...\n"
" Valid regex:\n"
" /(.)\\1\\1/\n"
" \n"
"Task 11: Followed by #\n"
" Strings:\n"
" ...\n"
" Valid regex:\n"
" /(.)(?=#)/g\n\n"
"Task 12: Spam filter\n"
" Strings:\n"
" ...\n"
" Valid regex:\n"
" /^(?!.*(filter|mirc|not allowed)).*(?:http:\\/\\/|www\\.|porn|credit card)/i\n"
" \n"
"Task 13: Not surrounded by digits\n"
" Strings:\n"
" ...\n"
" Valid regex:\n"
" /(?<!\\d)\\.|\\.(?!\\d)/g\n\n"
"Task 14: Repeated words\n"
" Strings:\n"
" ...\n"
" Valid regex:\n"
" /(\\b\\w{4,}\\b)(?=(?:.*\\b\\1\\b){2})(?!(.*\\b\\1\\b){3})/ig\n\n"
"Task 15: start before end\n"
" Strings:\n"
" ...\n"
" Valid regex:\n"
" /^(?:(?!end).)*start/\n"
" \n"
"Task 16: Every other digit\n"
" Strings:\n"
" ...\n"
" Valid regsub:\n"
" /\\G((?:.\\D)*.)\\d/g \\1*\n"
" \n"
"Task 17: The thousands\n"
" Strings:\n"
" 100 invalid\n"
" 1000 valid -> 1,000\n"
" 9999 valid -> 9,999\n"
" 12345 valid -> 12,345\n"
" 9999999999999999 valid -> 9,999,999,999,999,999\n"
" Valid regsub:\n"
" /(\\d)(?=(\\d{3})+\\b)/ \\1,\n"
" \n"
"Task 18: Quoted text with escapes\n"
" Strings:\n"
" ...\n"
" Valid regex:\n"
" /^\"((?>\\\\.|[^\"])*)\"$/\n"
" \n"
"Task 21: Match * inside square brackets\n"
" Strings:\n"
" ...\n"
" Valid regex/regsub:\n"
" /^([^[]+)|([^]]+)$|(\\][^[]+\\[)|\\*/ \\1\\2\\3\n"
" /(?(?=^)[^]]*\\[\\K|(?(?=\\])..*?\\[\\K|\\*))/g\n"
" \n"
"Task 22: Outermost Brackets\n"
" Strings: \n"
" ...\n"
" Valid regex:\n"
" /(\\((?>[^()]+|(?R))+\\))/g\n"
" \n"
"Task 23: Validate string with X lower case and X upper case characters.\n\n"
"Left to document:\n"
" Match regex\n"
" Match a valid mathematical expression\n"
" Highlight text with colors (maintaining current color codes)\n"
" Palindromes\n"
" Ignoring punctuation\n\n"
"Task 26: Word Boundaries\n"
" Strings:\n"
" Check if a string contains the word word in it (case insensitive). If you have no idea, I guess you could try /word/.\n"
" Valid regex/regsub:\n"
" /\\bword\\b/i\n\n"
"Task 27: Capitalizing I\n"
" Strings:\n"
" Use substitution to replace every occurrence of the word i with the word I (uppercase, I as in me). E.g.: i'm replacing it. am i not? -> I'm replacing it. am I not?.\n"
" A regex match is replaced with the text in the Substitution field when using substitution.\n"
" Valid regex/regsub:\n"
" /\\bi\\b/I/g\n\n"
"Task 28: Uppercase Consonants\n"
" Strings:\n"
" With regex you can count the number of matches. Can you make it return the number of uppercase consonants (B,C,D,F,..,X,Y,Z) in a given string? E.g.: it should return 3 with the text ABcDeFO!. Note: Only ASCII....\n"
" Example: the regex /./g will return 3 when run against the string abc.\n"
" Valid regex/regsub:\n"
" /[^AEIOUa-z_\\W\\d]\\g\n\n"
"Task 29: Retrieve Numbers\n"
" Strings:\n"
" Count the number of integers in a given string. Integers are, for example: 1, 2, 65, 2579, etc.\n"
" Valid regex/regsub:\n"
" /\\d+/g\n\n"
"Task 30: Whitespace\n"
" Strings:\n"
" Find all occurrences of 4 or more whitespace characters in a row throughout the string.\n"
" Valid regex/regsub:\n"
" /\\s{4,}/g\n\n"
"Task 31: Broken Keyboard\n"
" Strings:\n"
" Oh no! It seems my friends spilled beer all over my keyboard last night and my keys are super sticky now. Some of the time whennn I press a key, I get two duplicates.\n"
" Can you ppplease help me fix thhhis?\n"
" Valid regex/regsub:\n"
" /(.)\\1{2}/$1/g\n\n"
"Task 32: Validate IP\n"
" Strings:\n"
" Validate an IPv4 address. The addresses are four numbered separated by three dots, and can only have a maximum value of 255 in either octet. Start by trying to validate 172.16.254.1.\n"
" Valid regex/regsub:\n"
" /^(?:(?:25[0-5]|(?:2[0-4]|1\\d|[1-9]|)\\d)\\.?\\b){4}$/\n\n"
"Task 33: HTML TAGS\n"
" Strings:\n"
" Strip all HTML tags from a string. HTML tags are enclosed in < and >.\n"
" The regex will be applied on a line-by-line basis, meaning partial tags will need to be handled by the regex. Don't worry about opening or closing tags; we just want to get rid of them all.\n"
" Note: This task is meant to be a learning exercise, and not necessarily the best way to parse HTML.\n"
" Valid regex/regsub:\n"
" /<?[^<]*>|<.*//g\n\n"
"Task 34: MATCH AN EMAIL\n"
" Strings:\n"
" Verify that a given e-mail address is valid.\n"
" We all know how complex emails are, but despite this, let's give it a try and see what we can come up with.\n"
" You could start by trying to match contact@regex101.com (denoted as <local-part>@<domain>.<top-level-domain>).\n"
" Valid regex/regsub:\n"
" /^((?!\\.)[\\w\\d](?!.*\\.\\.)[a-zA-Z0-9\\.!#$%&'*+\\-\\/=?^_`{|}~]*[^\\.\\s])@([a-zA-Z][a-zA-Z\\d-]+[a-zA-Z\\d]+\\.)+[a-zA-Z]{2,6}$/gm\n\n"
"Task 35: Followed bY\n"
" Strings:\n"
" For every occurrence of the char #, match the previous character and save it in a group (backreference).\n"
" Example: for the text \"a#bc# -#\", set backreferences with a, c and -.\n"
" You are not allowed to consume the hash character.\n"
" Valid regex/regsub:\n"
" /(\\S)(?=#)/g\n\n"
"Task 36: Validate Floating Point\n"
" Strings:\n"
" Check if a floating point number (e.g. 3.14159) is in a valid format.\n"
" Valid regex/regsub:\n"
" /^[-+]?(\\d+[,.]|\\d*[.,]?\\d+)(e[-+]?\\d+)?$/i\n\n"
"Task 37: Match any number between 0-100\n"
" Strings:\n"
" Could you help me validate my input and only match positive integers between the range of 0 and 100?\n"
" There can be several numbers in a string which I would want to retrieve.\n"
" Try out these example strings:\n"
" Sam has 200 apples. He gives Todd 20 and Mary 125.\n"
" The weather is -5 C today, but will be +5 C tomorrow.\n"
" Valid regex/regsub:\n"
" /\\b(?<!-)(?:\\d{1,2}|100)\\b/gmi\n\n"
"Task 38: Match alternating 0s 1s\n"
" Strings:\n"
" I'm trying to match bit sequences which are alternating between 1 and 0 and never have more than one 1 or 0 in a row. They can be single digits.\n"
" Try matching this: 0101010, 1010101010 or 1\n"
" Valid regex/regsub:\n"
" /\\b(?!\\d*(\\d)\\1)[10]+\\b/gmi\n\n"
"Task 39: Spam Filter\n"
" Strings:\n"
" Match a string that contains any of the following substrings: http://, www., porn, or credit card. But don't match the text if it contains one of: not allowed, filter, or mirc.\n"
" Don't use word boundaries (anywhere in the text is fine).\n"
" Valid regex/regsub:\n"
" /^(?!.*(filter|mirc|not allowed)).*(?:http:\\/\\/|www\\.|porn|credit card)/i\n\n"
"Task 40: Not surrounded by digits\n"
" Strings:\n"
" Replace every . (dot) with a - (hyphen) except when the dot is surrounded by digits. E.g.: .a.b.1.2. should become -a-b-1.2-\n"
" Valid regex/regsub:\n"
" /(?<!\\d(?=\\.\\d))\\./g\n\n"
"Task 41: Repeated Words\n"
" Strings:\n"
" I'd like to know if a text contains words with 4 characters or more which are repeated 3 or more times in the text (anywhere in the text).\n"
" If so, set one (and only one) backreference for each word.\n"
" Valid regex/regsub:\n"
" /(\\b\\w{4,}\\b)(?=(?:.*\\b\\1\\b){2})(?!(.*\\b\\1\\b){3})/ig\n\n"
"Task 42: Start before end\n"
" Strings:\n"
" Only match lines with the text start, unless text end appears prior to start. Note: end may or may not be in the string.\n"
" Match start line_end; and don't match line_end; start\n"
" Valid regex/regsub:\n"
" /^(?:(?!end).)*start/\n\n"
"Task 43: Every other digit\n"
" Strings:\n"
" Replace every other character if it's a \\d with * (only those in even positions: 2, 4, 6, etc).\n"
" Example: a1b2cde3~g45hi6 should become a*b*cde*~g4*hi6\n"
" Valid regex/regsub:\n"
" /((?:.\\D)*.)\\d/$1*/gA\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n")
matches = re.search(regex, test_str)
if matches:
print ("Match was found at {start}-{end}: {match}".format(start = matches.start(), end = matches.end(), match = matches.group()))
for groupNum in range(0, len(matches.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = matches.start(groupNum), end = matches.end(groupNum), group = matches.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html