import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Example {
public static void main(String[] args) {
final String regex = "<!DOCTYPE html>|</?\\s*[a-z-][^>]*\\s*>|(\\&(?:[\\w\\d]+|#\\d+|#x[a-f\\d]+);|<!--[\\s\\S\\n]*?-->)";
final String string = "\n"
+ "# Above is a blank line, no match.\n\n"
+ "foo bar baz\n"
+ "this is a string\n"
+ "Testing\n"
+ "<>\n"
+ "Hello, World\n"
+ "This is less than <, this is greater than >.\n"
+ " a < 3 && b > 3\n"
+ "<<Important Text>>\n"
+ "# Not HTML-like.\n\n"
+ "<p>fizz buzz</p>\n"
+ "<a>this is a string</a>\n"
+ "this is a <b>string</b>\n"
+ "<p>Testing</p>\n"
+ "<img src=\"hello.jpg\">\n"
+ "<a>Foo</a>\n"
+ "<input type='submit' value='Ok' />\n"
+ "<input type='submit' value='Ok'>\n"
+ "<br/>\n"
+ "<br>\n"
+ "<!-- comment -- doesn't work! -->\n"
+ "<hr>\n"
+ "Foo & bar\n"
+ "# These one-line samples are totally HTML-like.\n\n"
+ "<file-upload>\n"
+ "<absurd example>\n"
+ "<closed example></closed>\n"
+ "# Custom tags.\n\n"
+ "<a>\n"
+ "# Not matched by others, but actually valid.\n\n"
+ "My < weird > string\n"
+ "# Not actually a false positive; this is valid HTML!\n\n"
+ "# Sample \"smallest complete HTML document\":\n"
+ "<!DOCTYPE html>\n"
+ "<title>testing</title>\n"
+ "<p>This is a test.</p>\n"
+ "<strange>This is strange.</strange>\n"
+ "# And yes, <strange> IS VALID HTML.\n\n"
+ "résume\n"
+ "résume\n"
+ "rÉsume\n"
+ "r&x00C9;sume\n"
+ "# Entities\n\n"
+ "# List Tricks\n"
+ "<ul><li>Foo</li\n"
+ "><li>Bar</li\n"
+ "></ul>\n\n"
+ "# From https://stackoverflow.com/a/51325984/211827\n"
+ "Hello, World\n"
+ "This is less than <, this is greater than >.\n"
+ " a < 3 && b > 3\n"
+ "<<Important Text>>\n"
+ "<a> # This actually is HTML, not a false positive.\n"
+ "<a>Foo</a>\n"
+ "<input type='submit' value='Ok' /> # XHTML, not HTML...\n"
+ "<br/> # XHTML again...\n"
+ "<br> # These didn't work with that answer.\n"
+ "Foo & bar\n"
+ "<input type='submit' value='Ok'>\n\n"
+ "# From https://regex101.com/r/cX0eP2/1\n"
+ "<a href=bla>sdfsdf</a>\n"
+ "<div>something</div>\n"
+ "<br>\n"
+ "<span>mayhem</div>\n"
+ "<hr />\n"
+ "<input name=bla / >\n"
+ "<div>some<span>thing</span>here</div>\n\n\n"
+ "# Prepare your eye bleach.\n"
+ "<p style=\"line-height:normal; margin-top:0px\"><span style=\"font-size:12px; line-height:normal\"><span style=\"font-family:Arial; line-height:normal\">Notre spécialité : offrir de l’assistance à plus d’un million de Québécois. Nous sommes fiers d’aider! Participez vous aussi à cette mission en réalisant les rêves d’aventure, de détente et de découverte de nos membres et clients au sein de notre agence de voyages.</span></span></p>\\r\\n\\r\\n<p style=\"line-height:normal; margin-top:0px\"><span style=\"font-size:12px; line-height:normal\"><span style=\"font-family:Arial; line-height:normal\">Joignez-vous à nous! Vous bénéficierez de nombreux avantages : </span></span></p>\\r\\n\\r\\n<ul style=\"line-height:normal\">\\r\\n\\t<li style=\"line-height: normal;\"><span style=\"font-size:12px; line-height:normal\"><span style=\"font-family:Arial; line-height:normal\">Salaire fixe, et primes lorsque vous dépassez vos objectifs.</span></span></li>\\r\\n\\t<li style=\"line-height: normal;\"><span style=\"font-size:12px; line-height:normal\"><span style=\"font-family:Arial; line-height:normal\">20 jours de congé après une année.</span></span></li>\\r\\n\\t<li><span style=\"font-size:12px\"><span style=\"font-family:Arial\">Régime de retraite - CAA-Québec égale votre mise!</span></span></li>\\r\\n\\t<li><span style=\"font-size:12px\"><span style=\"font-family:Arial\">Assurance collective complète (soins médicaux et paramédicaux, invalidité, etc.).</span></span></li>\\r\\n\\t<li style=\"line-height: normal;\"><span style=\"font-size:12px; line-height:normal\"><span style=\"font-family:Arial; line-height:normal\">Rabais trippants chez nos partenaires, dans nos centres Voyages et pour vos assurances.</span></span></li>\\r\\n\\t<li style=\"line-height: normal;\"><span style=\"font-size:12px; line-height:normal\"><span style=\"font-family:Arial; line-height:normal\">Plus de 1,2 million de membres comme clients potentiels.</span></span></li>\\r\\n\\t<li style=\"line-height: normal;\"><span style=\"font-size:12px; line-height:normal\"><span style=\"font-family:Arial; line-height:normal\">Soutien administratif pour vous concentrer sur la vente de voyages.</span></span></li>\\r\\n\\t<li style=\"line-height: normal;\"><span style=\"font-size:12px; line-height:normal\"><span style=\"font-family:Arial; line-height:normal\">Allocation généreuse pour les éducotours.</span></span></li>\\r\\n</ul>\\r\\n\\r\\n<p style=\"line-height:normal; margin-bottom:0px; margin-top:0px\"> </p>\\r\\n\\r\\n<p style=\"line-height:normal; margin-bottom:0px; margin-top:0px\"><img class=\"largeimage\" src=\"https://gestiondestalents.caaquebec.com/caa/login/caa/_attachments/news_files/activex/image%20conseiller%20en%20voyage.JPG\" style=\"line-height:normal; width:100%\" /></p>\\r\\n\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t <br/><br/>\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<p style=\"line-height:normal; margin-top:0px\"><span style=\"font-size:12px; line-height:normal\"><span style=\"font-family:Arial; line-height:normal\">En tant que conseiller en voyages, vos principales tâches et responsabilités seront celles-ci :</span></span></p>\\r\\n\\r\\n<ul style=\"line-height:normal\">\\r\\n\\t<li style=\"line-height: normal;\"><span style=\"font-size:12px; line-height:normal\"><span style=\"font-family:Arial; line-height:normal\">Réaliser une analyse des besoins des clients et leur fournir des renseignements précis et utiles.</span></span></li>\\r\\n\\t<li style=\"line-height: normal;\"><span style=\"font-size:12px; line-height:normal\"><span style=\"font-family:Arial; line-height:normal\">Effectuer les réservations et achats (forfaits, croisières, circuits, hôtels, automobiles et assurances voyage).</span></span></li>\\r\\n</ul>\\r\\n\\r\\n<p style=\"line-height:normal; margin-top:0px\"><span style=\"font-size:12px; line-height:normal\"><span style=\"font-family:Arial; line-height:normal\">Vous occuperez un poste régulier à temps plein (35 heures par semaine). L’horaire sera variable et vous devrez parfois travailler le soir et la fin de semaine afin de bien servir les voyageurs.</span></span></p>\\r\\n\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t\\r\\n\\t\\t <br/><br/>\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<p style=\"line-height:normal; margin-top:0px\"><span style=\"font-size:12px; line-height:normal\"><span style=\"font-family:Arial; line-height:normal\">Nous sommes toujours à la recherche de personnes de talent. Mais vous devrez avoir un profil précis pour ce poste!</span></span></p>\\r\\n\\r\\n<ul style=\"line-height:normal\">\\r\\n\\t<li style=\"line-height: normal;\"><span style=\"font-size:12px; line-height:normal\"><span style=\"font-family:Arial; line-height:normal\">Diplôme d’études collégiales en tourisme ou formation d’agent de voyages.</span></span></li>\\r\\n\\t<li style=\"line-height: normal;\"><span style=\"font-size:12px; line-height:normal\"><span style=\"font-family:Arial; line-height:normal\">2 à 3 années d’expérience comme conseiller en voyages.</span></span></li>\\r\\n\\t<li style=\"line-height: normal;\"><span style=\"font-size:12px; line-height:normal\"><span style=\"font-family:Arial; line-height:normal\">Certificat de conseiller en voyages de l’Office de la protection du consommateur, ou être en mesure de l’obtenir.</span></span></li>\\r\\n\\t<li style=\"line-height: normal;\"><span style=\"font-size:12px; line-height:normal\"><span style=\"font-family:Arial; line-height:normal\">Connaissance des systèmes de délivrance de billets : GDS, Galileo/Apollo, PcVoyages et SIREV (un atout).</span></span></li>\\r\\n\\t<li style=\"line-height: normal;\"><span style=\"font-size:12px; line-height:normal\"><span style=\"font-family:Arial; line-height:normal\">Maîtrise du français et de l’anglais.</span></span></li>\\r\\n\\t<li style=\"line-height: normal;\"><span style=\"font-size:12px; line-height:normal\"><span style=\"font-family:Arial; line-height:normal\">Professionnalisme, attention aux besoins des clients et volonté d’offrir un service de qualité.</span></span></li>\\r\\n\\t<li style=\"line-height: normal;\"><span style=\"font-size:12px; line-height:normal\"><span style=\"font-family:Arial; line-height:normal\">Souci d’atteindre les objectifs de vente.</span></span></li>\\r\\n</ul>\\r\\n\\r\\n<p style=\"line-height:normal; margin-top:0px\"><span style=\"font-size:12px; line-height:normal\"><span style=\"font-family:Arial; line-height:normal\"><span style=\"line-height:normal\">Si explorer de nouveaux horizons vous passionne, et que vous aimez aider d’autres personnes à découvrir le monde, vous serez heureux à Voyages CAA-Québec. Postulez dès aujourd’hui. Nous vous attendons avec impatience!</span></span></span></p>";
final Pattern pattern = Pattern.compile(regex);
final Matcher matcher = pattern.matcher(string);
while (matcher.find()) {
System.out.println("Full match: " + matcher.group(0));
for (int i = 1; i <= matcher.groupCount(); i++) {
System.out.println("Group " + i + ": " + matcher.group(i));
}
}
}
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Java, please visit: https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html