import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Example {
public static void main(String[] args) {
final String regex = "(?:([-‐‑‒–—―−⁃﹘﹣-])|(?:&(?:(?:#x(2d|201[0-5]|2212|2043|fe58|fe63|ff0d))|(?:#(45|820[89]|821[0123]|8722|8259|65112|65123|65293))|(hyphen|[nm]?dash|hybull|horbar|minus));?))";
final String string = "This captures an entity even if it lacks the ';', which is commonly encountered in the wild.\n\n"
+ "kbdash - - - - -; -\n"
+ "dash ‐ ‐ ‐ ‐ ‐ ‐; ‐\n"
+ "hyphen ‑ ‐ ‑ ‑ ‑ ‑; ‑\n"
+ "figure ‒ ‒ ‒ ‒ ‒; ‒\n"
+ "em – – – – – –; –\n"
+ "en — — — — — —; —\n"
+ "horbar ― ― ― ― ― ―; ―\n"
+ "minus − − − − − −; −\n"
+ "hybull ⁃ ⁃ ⁃ ⁃ ⁃ ⁃; ⁃\n"
+ "fe58 ﹘ ﹘ ﹘ ﹘ ﹘; ﹘\n"
+ "fe63 ﹣ ﹣ ﹣ ﹣ ﹣; ﹣\n"
+ "ff0d - - - - -; -\n\n"
+ "(?:([-‐‑‒–—―−⁃﹘﹣-])|(?:&(?:(?:#x(2d|201[0-5]|2212|2043|fe58|fe63|ff0d))|(?:#(45|820[89]|821[0123]|8722|8259|65112|65123|65293))|(hyphen|[nm]?dash|hybull|horbar|minus));?))";
final Pattern pattern = Pattern.compile(regex, Pattern.MULTILINE | Pattern.UNICODE_CASE);
final Matcher matcher = pattern.matcher(string);
while (matcher.find()) {
System.out.println("Full match: " + matcher.group(0));
for (int i = 1; i <= matcher.groupCount(); i++) {
System.out.println("Group " + i + ": " + matcher.group(i));
}
}
}
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Java, please visit: https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html