import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Example {
public static void main(String[] args) {
final String regex = "<!--(.*?)-->|<(?P<tag>[a-z0-9]+?)[^>]*>.*?<\\/(?P=tag)>|<([a-z0-9]+).*?\\/>|[\\S ]+";
final String string = "<!--comment1\n"
+ " -->\n"
+ "<h1>header</h1>\n\n"
+ "<img src=\"http://example.com/img.png\" title=\"single tag\"/>\n\n"
+ "<p>long text</p>\n\n"
+ "<img src=\"http://example.com/img2.png\"\n"
+ " title=\"single tag\"/>\n\n"
+ " \n\n"
+ " \n"
+ "<!-- comment1 -->\n"
+ "<ul>\n"
+ " <li>item1</li>\n"
+ " <li>item2</li>\n"
+ " <li>item3</li>\n"
+ "</ul>\n\n"
+ " \n"
+ "some text\n"
+ " \n\n\n"
+ "<br class=\"unclosed\">\n\n";
final Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
final Matcher matcher = pattern.matcher(string);
while (matcher.find()) {
System.out.println("Full match: " + matcher.group(0));
for (int i = 1; i <= matcher.groupCount(); i++) {
System.out.println("Group " + i + ": " + matcher.group(i));
}
}
}
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Java, please visit: https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html