# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"<!DOCTYPE html>|</?\s*[a-z-][^>]*\s*>|(\&(?:[\w\d]+|#\d+|#x[a-f\d]+);|<!--[\s\S\n]*?-->)"
test_str = ("\n"
"# Above is a blank line, no match.\n\n"
"foo bar baz\n"
"this is a string\n"
"Testing\n"
"<>\n"
"Hello, World\n"
"This is less than <, this is greater than >.\n"
" a < 3 && b > 3\n"
"<<Important Text>>\n"
"# Not HTML-like.\n\n"
"<p>fizz buzz</p>\n"
"<a>this is a string</a>\n"
"this is a <b>string</b>\n"
"<p>Testing</p>\n"
"<img src=\"hello.jpg\">\n"
"<a>Foo</a>\n"
"<input type='submit' value='Ok' />\n"
"<input type='submit' value='Ok'>\n"
"<br/>\n"
"<br>\n"
"<!-- comment -- doesn't work! -->\n"
"<hr>\n"
"Foo & bar\n"
"# These one-line samples are totally HTML-like.\n\n"
"<file-upload>\n"
"<absurd example>\n"
"<closed example></closed>\n"
"# Custom tags.\n\n"
"<a>\n"
"# Not matched by others, but actually valid.\n\n"
"My < weird > string\n"
"# Not actually a false positive; this is valid HTML!\n\n"
"# Sample \"smallest complete HTML document\":\n"
"<!DOCTYPE html>\n"
"<title>testing</title>\n"
"<p>This is a test.</p>\n"
"<strange>This is strange.</strange>\n"
"# And yes, <strange> IS VALID HTML.\n\n"
"résume\n"
"résume\n"
"rÉsume\n"
"r&x00C9;sume\n"
"# Entities\n\n"
"# List Tricks\n"
"<ul><li>Foo</li\n"
"><li>Bar</li\n"
"></ul>\n\n"
"# From https://stackoverflow.com/a/51325984/211827\n"
"Hello, World\n"
"This is less than <, this is greater than >.\n"
" a < 3 && b > 3\n"
"<<Important Text>>\n"
"<a> # This actually is HTML, not a false positive.\n"
"<a>Foo</a>\n"
"<input type='submit' value='Ok' /> # XHTML, not HTML...\n"
"<br/> # XHTML again...\n"
"<br> # These didn't work with that answer.\n"
"Foo & bar\n"
"<input type='submit' value='Ok'>\n\n"
"# From https://regex101.com/r/cX0eP2/1\n"
"<a href=bla>sdfsdf</a>\n"
"<div>something</div>\n"
"<br>\n"
"<span>mayhem</div>\n"
"<hr />\n"
"<input name=bla / >\n"
"<div>some<span>thing</span>here</div>\n\n\n"
"# Prepare your eye bleach.\n"
"<p style=\"line-height:normal; margin-top:0px\"><span style=\"font-size:12px; line-height:normal\"><span style=\"font-family:Arial; line-height:normal\">Notre spécialité : offrir de l’assistance à plus d’un million de Québécois. Nous sommes fiers d’aider! Participez vous aussi à cette mission en réalisant les rêves d’aventure, de détente et de découverte de nos membres et clients au sein de notre agence de voyages.</span></span></p>\\r\\n\\r\\n<p style=\"line-height:normal; margin-top:0px\"><span style=\"font-size:12px; line-height:normal\"><span style=\"font-family:Arial; line-height:normal\">Joignez-vous à nous! Vous bénéficierez de nombreux avantages : </span></span></p>\\r\\n\\r\\n<ul style=\"line-height:normal\">\\r\\n\\t<li style=\"line-height: normal;\"><span style=\"font-size:12px; line-height:normal\"><span style=\"font-family:Arial; line-height:normal\">Salaire fixe, et primes lorsque vous dépassez vos objectifs.</span></span></li>\\r\\n\\t<li style=\"line-height: normal;\"><span style=\"font-size:12px; line-height:normal\"><span style=\"font-family:Arial; line-height:normal\">20 jours de congé après une année.</span></span></li>\\r\\n\\t<li><span style=\"font-size:12px\"><span style=\"font-family:Arial\">Régime de retraite - CAA-Québec égale votre mise!</span></span></li>\\r\\n\\t<li><span style=\"font-size:12px\"><span style=\"font-family:Arial\">Assurance collective complète (soins médicaux et paramédicaux, invalidité, etc.).</span></span></li>\\r\\n\\t<li style=\"line-height: normal;\"><span style=\"font-size:12px; line-height:normal\"><span style=\"font-family:Arial; line-height:normal\">Rabais trippants chez nos partenaires, dans nos centres Voyages et pour vos assurances.</span></span></li>\\r\\n\\t<li style=\"line-height: normal;\"><span style=\"font-size:12px; line-height:normal\"><span style=\"font-family:Arial; line-height:normal\">Plus de 1,2 million de membres comme clients potentiels.</span></span></li>\\r\\n\\t<li style=\"line-height: normal;\"><span style=\"font-size:12px; line-height:normal\"><span style=\"font-family:Arial; line-height:normal\">Soutien administratif pour vous concentrer sur la vente de voyages.</span></span></li>\\r\\n\\t<li style=\"line-height: normal;\"><span style=\"font-size:12px; line-height:normal\"><span style=\"font-family:Arial; line-height:normal\">Allocation généreuse pour les éducotours.</span></span></li>\\r\\n</ul>\\r\\n\\r\\n<p style=\"line-height:normal; margin-bottom:0px; margin-top:0px\"> </p>\\r\\n\\r\\n<p style=\"line-height:normal; margin-bottom:0px; margin-top:0px\"><img class=\"largeimage\" src=\"https://gestiondestalents.caaquebec.com/caa/login/caa/_attachments/news_files/activex/image%20conseiller%20en%20voyage.JPG\" style=\"line-height:normal; width:100%\" /></p>\\r\\n\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t <br/><br/>\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<p style=\"line-height:normal; margin-top:0px\"><span style=\"font-size:12px; line-height:normal\"><span style=\"font-family:Arial; line-height:normal\">En tant que conseiller en voyages, vos principales tâches et responsabilités seront celles-ci :</span></span></p>\\r\\n\\r\\n<ul style=\"line-height:normal\">\\r\\n\\t<li style=\"line-height: normal;\"><span style=\"font-size:12px; line-height:normal\"><span style=\"font-family:Arial; line-height:normal\">Réaliser une analyse des besoins des clients et leur fournir des renseignements précis et utiles.</span></span></li>\\r\\n\\t<li style=\"line-height: normal;\"><span style=\"font-size:12px; line-height:normal\"><span style=\"font-family:Arial; line-height:normal\">Effectuer les réservations et achats (forfaits, croisières, circuits, hôtels, automobiles et assurances voyage).</span></span></li>\\r\\n</ul>\\r\\n\\r\\n<p style=\"line-height:normal; margin-top:0px\"><span style=\"font-size:12px; line-height:normal\"><span style=\"font-family:Arial; line-height:normal\">Vous occuperez un poste régulier à temps plein (35 heures par semaine). L’horaire sera variable et vous devrez parfois travailler le soir et la fin de semaine afin de bien servir les voyageurs.</span></span></p>\\r\\n\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t <br/><br/>\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<p style=\"line-height:normal; margin-top:0px\"><span style=\"font-size:12px; line-height:normal\"><span style=\"font-family:Arial; line-height:normal\">Nous sommes toujours à la recherche de personnes de talent. Mais vous devrez avoir un profil précis pour ce poste!</span></span></p>\\r\\n\\r\\n<ul style=\"line-height:normal\">\\r\\n\\t<li style=\"line-height: normal;\"><span style=\"font-size:12px; line-height:normal\"><span style=\"font-family:Arial; line-height:normal\">Diplôme d’études collégiales en tourisme ou formation d’agent de voyages.</span></span></li>\\r\\n\\t<li style=\"line-height: normal;\"><span style=\"font-size:12px; line-height:normal\"><span style=\"font-family:Arial; line-height:normal\">2 à 3 années d’expérience comme conseiller en voyages.</span></span></li>\\r\\n\\t<li style=\"line-height: normal;\"><span style=\"font-size:12px; line-height:normal\"><span style=\"font-family:Arial; line-height:normal\">Certificat de conseiller en voyages de l’Office de la protection du consommateur, ou être en mesure de l’obtenir.</span></span></li>\\r\\n\\t<li style=\"line-height: normal;\"><span style=\"font-size:12px; line-height:normal\"><span style=\"font-family:Arial; line-height:normal\">Connaissance des systèmes de délivrance de billets : GDS, Galileo/Apollo, PcVoyages et SIREV (un atout).</span></span></li>\\r\\n\\t<li style=\"line-height: normal;\"><span style=\"font-size:12px; line-height:normal\"><span style=\"font-family:Arial; line-height:normal\">Maîtrise du français et de l’anglais.</span></span></li>\\r\\n\\t<li style=\"line-height: normal;\"><span style=\"font-size:12px; line-height:normal\"><span style=\"font-family:Arial; line-height:normal\">Professionnalisme, attention aux besoins des clients et volonté d’offrir un service de qualité.</span></span></li>\\r\\n\\t<li style=\"line-height: normal;\"><span style=\"font-size:12px; line-height:normal\"><span style=\"font-family:Arial; line-height:normal\">Souci d’atteindre les objectifs de vente.</span></span></li>\\r\\n</ul>\\r\\n\\r\\n<p style=\"line-height:normal; margin-top:0px\"><span style=\"font-size:12px; line-height:normal\"><span style=\"font-family:Arial; line-height:normal\"><span style=\"line-height:normal\">Si explorer de nouveaux horizons vous passionne, et que vous aimez aider d’autres personnes à découvrir le monde, vous serez heureux à Voyages CAA-Québec. Postulez dès aujourd’hui. Nous vous attendons avec impatience!</span></span></span></p>")
matches = re.finditer(regex, test_str)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html