const regex = /(?:([-‐‑‒–—―−⁃﹘﹣-])|(?:&(?:(?:#x(2d|201[0-5]|2212|2043|fe58|fe63|ff0d))|(?:#(45|820[89]|821[0123]|8722|8259|65112|65123|65293))|(hyphen|[nm]?dash|hybull|horbar|minus));?))/gmu;
// Alternative syntax using RegExp constructor
// const regex = new RegExp('(?:([-‐‑‒–—―−⁃﹘﹣-])|(?:&(?:(?:#x(2d|201[0-5]|2212|2043|fe58|fe63|ff0d))|(?:#(45|820[89]|821[0123]|8722|8259|65112|65123|65293))|(hyphen|[nm]?dash|hybull|horbar|minus));?))', 'gmu')
const str = `This captures an entity even if it lacks the ';', which is commonly encountered in the wild.
kbdash - - - - -; -
dash ‐ ‐ ‐ ‐ ‐ ‐; ‐
hyphen ‑ ‐ ‑ ‑ ‑ ‑; ‑
figure ‒ ‒ ‒ ‒ ‒; ‒
em – – – – – –; –
en — — — — — —; —
horbar ― ― ― ― ― ―; ―
minus − − − − − −; −
hybull ⁃ ⁃ ⁃ ⁃ ⁃ ⁃; ⁃
fe58 ﹘ ﹘ ﹘ ﹘ ﹘; ﹘
fe63 ﹣ ﹣ ﹣ ﹣ ﹣; ﹣
ff0d - - - - -; -
(?:([-‐‑‒–—―−⁃﹘﹣-])|(?:&(?:(?:#x(2d|201[0-5]|2212|2043|fe58|fe63|ff0d))|(?:#(45|820[89]|821[0123]|8722|8259|65112|65123|65293))|(hyphen|[nm]?dash|hybull|horbar|minus));?))`;
// Reset `lastIndex` if this regex is defined globally
// regex.lastIndex = 0;
let m;
while ((m = regex.exec(str)) !== null) {
// This is necessary to avoid infinite loops with zero-width matches
if (m.index === regex.lastIndex) {
regex.lastIndex++;
}
// The result can be accessed through the `m`-variable.
m.forEach((match, groupIndex) => {
console.log(`Found match, group ${groupIndex}: ${match}`);
});
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for JavaScript, please visit: https://developer.mozilla.org/en/docs/Web/JavaScript/Guide/Regular_Expressions