# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"(?:<[^>]*>)?(accessed(?:\s*\:?\s*| on ))((?:[0-2]?[0-9]?|30|31)(?:st|nd|rd|th)?)(?:<\/[^>]*>)?([\.\-\/ ])(Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|June?|July?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?|[1-9]|1[012])([\.\-\/ ])(?:<[^>]*>)?(20\d{2})(?:<\/[^>]*>)?"
test_str = ("<note>accessed 13</note> March <year>2022</year>\n"
"<note>accessed 13th</note> March <year>2022</year>\n"
" <note>accessed 8</note> April <year>2022</year>.\n"
"accessed 21st February 2020\n"
"accessed 32 Febr 2022\n"
"accessed 32 February 2022\n"
"accessed 30 February 202\n"
"accessed 30 January 2022\n"
"accessed 30 Jan 2022\n"
"accessed 30 February 2022\n"
"accessed 30 Feb 2022\n"
"accessed: 30 March 2022\n"
"accessed on 30 Mar 2022\n"
"accessed 30 April 2022\n"
"accessed 30 Apr 2022\n"
"accessed 30 May 2022\n"
"accessed 30 June 2022\n"
"accessed 30 Jun 2022\n"
"accessed 30 July 2022\n"
"accessed 30 Jul 2022\n"
"accessed 30 August 2022\n"
"accessed 30 Aug 2022\n"
"accessed 30 September 2022\n"
"accessed 30 Sep 2022\n"
"accessed 30 October 2022\n"
"accessed 30 Oct 2022\n"
"accessed 30 November 2022\n"
"accessed 30 Nov 2022\n"
"accessed 30 December 2022\n"
"accessed 30 Dec 2022\n\n"
"accessed on 30.12.2022\n"
"accessed on 30.13.2022\n"
"accessed on 34.12.2022\n"
"accessed on 30.12.2023\n"
"accessed on 30.15.2022\n"
"accessed on 01.12.2022\n"
"accessed on 41.12.2092\n\n"
"accessed on 30-12-2022\n"
"accessed on 30-13-2022\n"
"accessed on 34-12-2022\n"
"accessed :30-12-2023\n"
"accessed :30-15-2022\n"
"accessed :01-12-2022\n"
"accessed :41-12-2092\n\n"
"accessed : 30/12/2022\n"
"accessed : 30/13/2022\n"
"accessed :34/12/2022\n"
"accessed :30/12/2023\n"
"accessed : 30/15/2022\n"
"accessed 01/12/2022\n"
"accessed 41/12/2092\n\n\n\n\n\n\n\n")
matches = re.finditer(regex, test_str, re.MULTILINE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html