# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"\d+[thndst]+\s+(?=district)"
test_str = ("assumed office\\s(.*?\\d{4})\n"
"\\d+th.*?(?=district)\n"
"in office\\s(.*\\d{4})\n\n\n\n"
" member of the u.s. house of representatives from new jersey 's 5th district incumbent assumed office january 3, 2017 preceded by scott garrett\n\n"
" adam schiff chair of the house intelligence committee in office january 3, 2019 – january 3, 2023 preceded by devin nunes succeeded by mike turner ranking member of the house intelligence committee in office january 3, 2015 – january 3, 2019 preceded by dutch ruppersberger \n"
"succeeded by devin nunes member of the u.s. house of representatives from california incumbent assumed office january 3, 2001 preceded by james e. rogan constituency 27th district (2001–2003) 29th district (2003–2013) 28th district (2013–2023) 30th district (2023–present)\n\n"
" david valadao member of the u.s. house of representatives from california incumbent assumed office january 3, 2021 preceded by tj cox constituency 21st district (2021–2023) 22nd district (2023–present) in office january 3, 2013 – january 3, \n"
"2019 preceded by devin nunes succeeded by tj cox constituency 21st district\n\n"
" nancy pelosi official portrait, 2019 52nd speaker of the united states house of representatives in office january 3, 2019 – january 3, 2023 preceded by paul ryan succeeded by kevin mccarthy in office january 4, 2007 – january 3, 2011 preceded by dennis hastert succeeded by john boehner\n\n"
" bill pascrell member of the u.s. house of representatives from new jersey incumbent assumed office january 3, 1997 preceded by william j. martini constituency 8th district (1997–2013) 9th district (2013–present)\n\n"
"roy freiman member of the new jersey general assemblyfrom the 16th district incumbent assumed office january 9, 2018serving with andrew zwicker (2018-2022)sadaf jaffer (2022-present) preceded byjack ciattarelli\n\n"
" josh gottheimer member of the u.s. house of representativesfrom new jersey's 5th district incumbent assumed office january 3, 2017\n\n\n"
" roy freiman member of the new jersey general assemblyfrom the 16th district incumbent assumed office january 9, 2018serving with andrew zwicker (2018-2022)sadaf jaffer (2022-present) preceded byjack ciattarelli\n\n"
"district 9,republican primary,candidates,declared billy prempeh, sales consultant, u.s. air force veteran, and nominee in 2020 and 2022[5], name: billy prempeh\n\n"
"bonnie watson coleman member u.s. house representatives new jersey 's 12th district incumbent assumed office january 3, 2015 preceded rush holt jr.\n\n"
" bonnie watson coleman member of the u.s. house of representatives from new jersey 's 12th district incumbent assumed office january 3, 2015 preceded by rush holt jr.\n\n"
" tom malinowski member of the u.s. house of representatives from new jersey 's 7th district in office january 3, 2019 – january 3, 2023 preceded by leonard lance succeeded by thomas kean jr.")
matches = re.finditer(regex, test_str, re.MULTILINE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html