# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"\d{1,9}(eur.*)? honoraires? exclus"
test_str = ("----------------------------------------------------------------------------------\n"
"[REGEX_1 = hors honoraires?:? (:\\s)?\\d{1,9}]\n"
"----------------------------------------------------------------------------------\n"
"prix hors honoraires 900 000 €. no\n"
"prix hors honoraire 900 000 €. no\n"
"prix hors honoraire: 900 000 €. no\n"
"prix hors honoraires 168 000 € ttc.\n"
"prix hors honoraires 600 000 € ht. \n"
"prix hors honoraires 125 000 €. no\n"
"prix hors honoraires 2 500 000 € pa\n"
"prix hors honoraires 100 000 €. no\n"
"prix hors honoraires 250 000 € h\n"
"prix hors honoraires: 250 000 € h\n"
"prix de vente hors honoraires : 620 000 €\n\n"
"----------------------------------------------------------------------------------\n"
"[REGEX_2 = \\d{1,9} ([a-zA-Z_\\/€²]{1,12}) hors honoraires?]\n"
"----------------------------------------------------------------------------------\n"
"1 050 000 € hors honoraires\n"
"450 000 euros hors honoraires\n"
"287600 eur hors honoraires\n"
"450 ht/hc/m²/an hors honoraires\n"
"450 € /m2 ht/hc/an hors honoraires\n"
"450 m²/an/ ht hc et hors honoraires\n\n"
"----------------------------------------------------------------------------------\n"
"[REGEX_3 = \\d{1,9}(eur.*)? hors honoraires?]\n"
"----------------------------------------------------------------------------------\n"
"65 000 hors honoraires \n"
"400000eur hors honoraires \n\n"
"----------------------------------------------------------------------------------\n"
"[REGEX_4 = honoraires? exclus:? (:\\s)?\\d{1,9}]\n"
"----------------------------------------------------------------------------------\n"
"prix honoraires exclus : 80 000 eur\n"
"prix honoraires exclus : 550 000 eur\n"
"prix honoraires exclus: 550 000 eur\n\n"
"----------------------------------------------------------------------------------\n"
"[REGEX_5 = \\d{1,9} ([a-zA-Z_\\/€²]{1,12}) honoraires? exclus]\n"
"----------------------------------------------------------------------------------\n"
"65 000 eur honoraires exclus\n"
"400000 ht/hc/m²/an honoraires exclus\n\n"
"----------------------------------------------------------------------------------\n"
"[REGEX_6 = \\d{1,9}(eur.*)? honoraires? exclus]\n"
"----------------------------------------------------------------------------------\n"
"65 000 honoraires exclus\n"
"400000eur ht/hc/m²/an honoraires exclus\n"
"400000eur honoraires exclus\n\n"
"[GOOD ONE]\n"
"prix honoraires exclus \n"
"honoraires exclus \n"
"prix hors honoraires \n"
"bail 30/06/22 hors honoraires\n\n")
matches = re.finditer(regex, test_str, re.MULTILINE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html