# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"\b[A-Z][a-z]+\b\s+\b[A-Z][a-z]+\b"
test_str = ("54. Mikhail Stepanovich ZUSKO\n"
"(Михаил Степанович ЗУСЬКО)\n"
"Function: Russian military leader – Lieutenant- General\n"
"Chief of staff of the “West” grouping\n"
"Former Commander of the 58th Combined Arms Army of the Southern Military District\n"
"DOB: 24.5.1972\n"
"POB: Vetly, Volyn region, former Ukrainian SSR (now Ukraine)\n"
"Nationality: Russian\n"
"Gender: male\n"
"Associated entities: Ministry of Defence of the Russian Federation\n\n"
"55. Mikhail Yuryevich TEPLINSKIY\n"
"(Михаил Юрьевич ТЕПЛИНСКИЙ) \n"
"Function: Russian military leader – Colonel-General\n"
"Commander of Airborne Forces of the Armed Forces of the Russian Federation\n"
"DOB: 9.1.1969\n"
"POB: Mospino, Donetsk oblast, former Ukrainian SSR (now Ukraine)\n"
"Nationality: Russian\n"
"Gender: male\n"
"Associated entities: Ministry of Defence of the Russian Federation\n\n"
"56. Nikolay Mikhailovich PARSHIN\n"
"(Николай Михайлович ПАРШИН) \n"
"Function: Russian military leader – Lieutenant- General\n"
"Head of the Main Rocket and Artillery Directorate of the Ministry of Defense of the Russian Federation\n"
"DOB: 20.12.1962\n"
"POB: The village of Mordovskoye- Kolomasovo, former Mordovian ASSR (now Russian Federation)\n"
"Nationality: Russian\n"
"Gender: male\n"
"Associated entities: Ministry of Defence of the Russian Federation\n\n"
"57. Oleg Yurievich TSOKOV\n"
"(Олег Юрьевич ЦОКОВ) \n"
"Function: Russian military leader – Major-General\n"
"Commander of the 144th Motor Rifle Division of the 20th Combined Arms Army of the Armed Forces of the Russian Federation\n"
"DOB: 23.9.1971\n"
"POB: former USSR (now Russian Federation)\n"
"Nationality: Russian\n"
"Gender: male\n"
"Associated entities: Ministry of Defence of the Russian Federation\n\n"
"58. Ramil Rakhmatulovich IBATULLIN\n"
"(Рамиль Рахматуллович ИБАТУЛЛИН)\n"
"Function: Russian military leader – Major-General\n"
"Commander of the 90th Guards Tank Division\n"
"DOB: 22.10.1976\n"
"POB: Bagishevo, Apastovsky District, Tatarstan, former USSR (now Russian Federation)\n"
"Nationality: Russian\n"
"Gender: male\n"
"Associated entities: Ministry of Defence of the Russian Federation\n\n"
"59. Sergey Viktorovich KARAKAEV\n"
"(Сергей Викторович КАРАКАЕВ) \n"
"Function: Russian military leader – Colonel-General\n"
"Commander of the Strategic Missile Forces\n"
"DOB: 4.6.1961\n"
"POB: The village of Ivano-Slyusarevka, Krasnodar Territory, former USSR (now Russian Federation)\n"
"Nationality: Russian\n"
"Gender: male\n"
"Associated entities: Ministry of Defence of the Russian Federation\n")
matches = re.finditer(regex, test_str, re.MULTILINE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html