# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"(?:\b(?P<month>0?[1-9]|0?1[012])[\/-](?P<day>0?[1-9]|[12][0-9]|3[01])[\/-](?P<year>\d{2,4})\b)|(?:(?P<month1>0?[1-9]|1[012])[\/](?P<year1>\d{4})\b)|(?:\b(?P<month2>Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]{0,6}[. \/-]+(?P<day2>0?[1-9]|[12][0-9]|3[01])(?:th|st|nd)?[ ,\/-]+(?P<year2>\d{2,4})\b)|(?:\b(?P<day3>0?[1-9]|[12][0-9]|3[01])[ ,\/-]+(?P<month3>Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]{0,6}[,. \/-]+(?P<year3>\d{2,4})\b)|(?:(?P<month4>Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]{0,6}[,. \/-]+(?P<year4>\d{2,4})\b)|(?:(?P<year5>[12]\d{3})\b)"
test_str = ("\n"
"04/20/2009; 04/20/09; 4/20/09; 4/3/09\n"
"Mar-20-2009; Mar 20, 2009; March 20, 2009; Mar. 20, 2009; Mar 20 2009;\n"
"20 Mar 2009; 20 March 2009; 20 Mar. 2009; 20 March, 2009\n"
"Mar 20th, 2009; Mar 21st, 2009; Mar 22nd, 2009\n"
"Feb 2009; Sep 2009; Oct 2010\n"
"6/2008; 12/2009\n"
"2009; 2010\n"
"011/14/83\n"
"16/22 \n"
"9/22\n"
"r1978\n"
"n4/2004\n"
"e12/2007\n"
"08-810-7787\n"
"pOct 2015\n"
"Janaury 1993\n"
"Married to husband for 50+ years\n\n"
"0 03/25/93 Total time of visit (in minutes):\\n\n"
"1 6/18/85 Primary Care Doctor:\\n\n"
"2 sshe plans to move as of 7/8/71 In-Home Servic...\n"
"3 7 on 9/27/75 Audit C Score Current:\\n\n"
"4 2/6/96 sleep studyPain Treatment Pain Level (N...\n"
"5 .Per 7/06/79 Movement D/O note:\\n\n"
"6 4, 5/18/78 Patient's thoughts about current su...\n"
"7 10/24/89 CPT Code: 90801 - Psychiatric Diagnos...\n"
"8 3/7/86 SOS-10 Total Score:\\n\n"
"9 (4/10/71)Score-1Audit C Score Current:\\n\n"
"10 (5/11/85) Crt-1.96, BUN-26; AST/ALT-16/22; WBC...\n"
"11 4/09/75 SOS-10 Total Score:\\n\n"
"12 8/01/98 Communication with referring physician...\n"
"13 1/26/72 Communication with referring physician...\n"
"14 5/24/1990 CPT Code: 90792: With medical servic...\n"
"15 1/25/2011 CPT Code: 90792: With medical servic...\n"
"16 4/12/82 Total time of visit (in minutes):\\n\n"
"17 1; 10/13/1976 Audit C Score, Highest/Date:\\n\n"
"18 4, 4/24/98 Relevant Drug History:\\n\n"
"19 ) 59 yo unemployed w referred by Urgent Care f...\n"
"20 7/21/98 Total time of visit (in minutes):\\n\n"
"21 10/21/79 SOS-10 Total Score:\\n\n"
"22 3/03/90 CPT Code: 90792: With medical services\\n\n"
"23 2/11/76 CPT Code: 90792: With medical services\\n\n"
"24 07/25/1984 CPT Code: 90791: No medical services\\n\n"
"25 4-13-82 Other Child Mental Health Outcomes Sca...\n"
"26 9/22/89 CPT Code: 90792: With medical services\\n\n"
"27 9/02/76 CPT Code: 90791: No medical services\\n\n"
"28 9/12/71 [report_end]\\n\n"
"29 10/24/86 Communication with referring physicia...")
matches = re.finditer(regex, test_str, re.MULTILINE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html