# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"\b(?<=\w)\s{20}"
test_str = ("Ateles_geoffroyi GATGAGTGTGGCAAGGCCCAAGCGGAAGTGCCG--GTTGCT\n"
"Ateles_hybridus GATGAGTGTGGCAAGGCCCAAGCGGAAGTGCCG--GTTGCT\n"
"Ateles_fusciceps GATGAGTGTGGCAAGGCCCAAGCGGAAGTGC??????????\n"
"Ateles_chamek GATGAGTGTGGCAAGGCCCA?????????????????????\n"
"Ateles_paniscus ?????????????????????????????????????????\n"
"Brachyteles_arachnoides GATGAGTGTGGCAAGGCCCAAGCGGAAGT????????????\n"
"Brachyteles_hypoxanthus GATGAGTGTGGCAAG??????????????????????????\n"
"Lagothrix_cana GATGAGTGTGGCAAGGCCCAAGCGGAAGTGCCG--GTTGCT\n"
"Lagothrix_lagotricha GATGAGTGTGGCAAGGCCCAAGCGGAAGTGCCG--GTTGCT\n"
"Alouatta_belzebul ?????????????????????????????????????????\n"
"Alouatta_caraya ?????????????????????????????????????????\n"
"Alouatta_sara GATGAATGTGGCAAGGCCCAAGCAGAAGTGCCG--GTTGCT\n"
"Alouatta_palliata GATGAATGTGGCAAGGCCCAAGCAGAAGTGCCG--GTTGCT\n"
"Callicebus_brunneus ?????????????????????????????????????????\n"
"Callicebus_moloch GATGAGTGTGGCAAGGCCCAAGCAGAAGTGCCG--G?????\n"
"Callicebus_cupreus GATGAGTGTGGCAAGGCCCAAGCAGAAGTGCCG--GTTGCT\n"
"Callicebus_caligatus GATGAGTGTGGCAAGGCCCAAGCAGAAGTGCCG--GTTGCT\n"
"Callicebus_donacophilus GATGAGTGTGGCAAGGCCCAAGCAGAAGTGCCG--GTTGCT\n"
"Callicebus_coimbrai GATGAGTGTGGCAAGGCCCAAGCGGAAGTGCCG--GTTGCT\n"
"Callicebus_personatus GATGAGTGTGGCAAGGCCCAAGCGGAA??????????????\n"
"Callicebus_nigrifrons GATGAGTGTGGCAAGGCCCAAGCGGAAGTGCCG--GTTGCT\n"
"Cacajao_melanocephalus GATGAGTGTGGCAAAGCCCA?????????????????????\n"
"Cacajao_calvus GATGAGTGTGGCAAGGCCCAAGCGGAAGTGCCG--GTTGCT\n"
"Chiropotes_israelita GATGAGTGTGGCAAGGCCCAAGCGGAAGTGCCG--GTTGCT\n"
"Chiropotes_satanas_chiro ?????????????????????????????????????????\n"
"Pithecia_irrorata GATGAGTGTGGCAAGGCCCAAGCGGAAGTGCCG--G?????\n"
"Pithecia_pithecia GATGAGTGTGGCAAGGCCCAAGCGGAAGTGCCG--GTTGCT\n"
"Tarsius_bancanus ?????????????????????????????????????????\n"
"Tarsius_syrichta ?????????????????????????????????????????\n"
"Lepilemur_ankaranensis AACGAGTGTGGCAAGGCCCAGGCGGAAGTTCCG--GTTGCT\n"
"Lepilemur_septentrionali AACGAGTGTGGCAAGGCCCAGGCGGAAGTTCCG--GTTGCT\n"
"Lepilemur_dorsalis AACGAGTGTGGCAAGGCCCAGGCGGAAGTTCCG--GTTGCT\n"
"Lepilemur_ruficaudatus AACGAGTGTGGCAAGGCCCAGGCGGAAGTTCCG--GTTGCT\n"
"Lepilemur_jamesi AACGAGTGTGGCAAGGCCCAGG???????????????????\n"
"Microcebus_murinus_subsp AACGAGTGTGGCAAGGCCCAGGCGGAAGTTCCG--GTTGCT\n"
"Microcebus_murinus_subsp ?????????????????????????????????????????\n"
"Mirza_zaza AACGAGTGTGGCAAGGCCCAGGCAGAAGTTCCG--GTTGCT\n"
"Cheirogaleus_medius ?????????????????????????????????????????\n"
"Propithecus_verreauxi_co ?????????????????????????????????????????\n"
"Propithecus_verreauxi AACGAGTGTGGCAAGGCCCAGGCGGAAGTTCCG--GTTGCT\n"
"Propithecus_tattersalli AACGAGTGTGGCAAGGCCCAGGCTGAAGTTCCG--GTTGCT\n"
"Propithecus_diadema AACGAGTGTGGCAAGGCCCAGGCGGAAGTTCCG--GTTGCT\n"
"Propithecus_edwardsi AACGAGTGTGGCAAGGCCCAGGCG?????????????????\n"
"Avahi_laniger AACGAGTGTGGCAAGGCCCAGGCGGAAGTTCCG--GTTGCT\n"
"Eulemur_rufus AACGAGTGTGGCAAGGCCCAGGCGGAAGTTCCG--GTTGCT\n"
"Eulemur_collaris AACGAGTGTGGCAAGGCCCAGGCGGAAGTTCCG--GTTGCT\n"
"Eulemur_fulvus AACGAGTGTGGCAAGGCCCAGGCGGAAGTTCCG--GTTGCT\n"
"Eulemur_albifrons AACGAGTGTGGCAAGGCCCAGGCGGAAGTTCCG--GTTGCT\n"
"Eulemur_sanfordi AACGAGTGTGGCAAGGCCCAGGCGGAAGTTCCG--GTTGCT\n"
"Eulemur_mongoz AACGAGTGTGGCAAGGCCCAGGCGGAAGTTCCG--GTTGCT\n"
"Eulemur_macaco AACGAGTGTGGCAAGGCCCAGGCGGAAGTTCCG--GTTGCT\n"
"Eulemur_macaco_flavifron AACGAGTGTGGCAAGGCCCAGGCGGAAGTTCCG--GTTGCT\n"
"Eulemur_coronatus AACGAGTGTGGCAAGGCCCAGGCGGAAGTTCCG--GTTGCT\n"
"Eulemur_rubriventer AACGAGTGTGGCAAGGCCCAGGCGGAAGTTCCG--GTTGCT\n"
"Hapalemur_occidentalis AACGAGTGTGGCAAGGCCCAGGCGGAAGTTCCG--GTTGCT\n"
"Hapalemur_griseus AACGAGTGTGGCAAGGCCCAGGCGGAAGTTCCG--GTTGCT\n"
"Lemur_catta AACGAGTGTGGCAAGGCCCAGGCGGAAGTTCCG--GTTGCT\n"
"Varecia_variegata_varieg AACGAGTGTGGCAAGGCCCAGGCAGAAGTTCCG--GTTGCT\n"
"Varecia_variegata_rubra AACGAGTGTGGCAAGGCCCAGGCAGAAGTTCCG--GTTGCT\n"
"Daubentonia_madagascarie ?????????????????????????????????????????\n"
"Nycticebus_bengalensis AACGAGTGTGGCAAGGCCCAGGCAGAAGTTCCG--GTTGCT\n"
"Nycticebus_coucang AACGAGTGTGGCAAGGCCCAGGCAGAAGTTCCG--GTTGCT\n"
"Nycticebus_pygmaeus AACGAGTATGGCAAGGCCCAGGCAGAAGTTCCG--GTTGCT\n"
"Loris_tardigradus AACGAGTGTGGCAAGGCCCAGGCAGAAGTTCCG--GTTGCT\n"
"Arctocebus_calabarensis AACGAGTGTGGCAAGGCCCAGGCAGAAGTTCCG--GTTGCT\n"
"Perodicticus_potto AACGAGTGTGGCAAGGCCCAGGCAGAAGTTCCG--GTTGCT\n"
"Otolemur_garnetti AACGAGTGTGGCAAGGCCCAGGCAGAAGTTCCG--GTTGCT\n"
"Otolemur_crassicaudatus AACGAGTGTGGCAAGGCCCAGGCAGAAGTTCCG--GTTGCT\n"
"Galago_moholi ?????????????????????????????????????????\n"
"Galago_senegalensis ?????????????????????????????????????????\n"
"Galago_thomasi AACGAGTGTGGCAAGGCCCAGGCG?????????????????\n"
"Galeopterus_variegatus GATGAGTGTGGCAAGGCCCAAGCAGAAGTTCCG--G?????\n"
"Cynocephalus_volans GATGAGTGTGGCAAGGCCCAAGCAGAAGTCCCG--GTTGCT\n"
"Tupaia_glis ?????????????????????????????????????????\n"
"Tupaia_minor ?????????????????????????????????????????\n"
"rabbit_rabbit_rabbit ?????????????????????????????????????????\n")
matches = re.finditer(regex, test_str, re.MULTILINE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html