const regex = /^
# Uncomment to allow prefix continuation (second letter of a dangling match)
# ([abcdefghiklmnoprstuvy\ ])?
( \
|(?: # Single-letter elements
[BCFHIKNOPSUVWY]
)
|(?: # Two-letter elements.
(?:A[cglmrstu])|(?:B[aehikr])|(?:C[adeflmnorsu])|(?:D[bsy])
| (?:E[rsu])|(?:F[elmr])|(?:G[ade])|(?:H[efgos])|(?:I[nr])
| (?:K[r])|(?:L[airuv])|(?:M[cdgnot])|(?:N[abdehiop])
| (?:O[gs])|(?:P[abdmortu])|(?:R[abefghnu])|(?:S[bcegimnr])
| (?:T[abcehilms])|(?:X[e])|(?:Yb)|(?:Z[nr])
)
|(?: # Smushed trigrams. No need to go past trigrams because two twos make four
(?:A(?:ga|gd|ge|la|lr|md|mg|mt|ra|re|rg|ta|te|tl|tm))
|(?:E(?:ra|re|rg))
|(?:G(?:ag|al|am|ar|at|er))
|(?:L(?:ag|al|am|ar|at|ra|re|rg))
|(?:M(?:ga|gd|ge|ta|te|tl|tm))
|(?:R(?:ag|al|am|ar|at|er|ga|gd|ge))
|(?:T(?:ag|al|am|ar|at|er|la|lr|md|mg|mt))
|(?:X(?:er))
|(?:Z(?:ra|re|rg))
)
# repeated any number of times
)+
# Uncomment to allow last letter if it's a potential first letter of the next word
# ([ADEGLMRTXZ]\ *)?
$/gim;
// Alternative syntax using RegExp constructor
// const regex = new RegExp('^
# Uncomment to allow prefix continuation (second letter of a dangling match)
# ([abcdefghiklmnoprstuvy\\ ])?
( \\
|(?: # Single-letter elements
[BCFHIKNOPSUVWY]
)
|(?: # Two-letter elements.
(?:A[cglmrstu])|(?:B[aehikr])|(?:C[adeflmnorsu])|(?:D[bsy])
| (?:E[rsu])|(?:F[elmr])|(?:G[ade])|(?:H[efgos])|(?:I[nr])
| (?:K[r])|(?:L[airuv])|(?:M[cdgnot])|(?:N[abdehiop])
| (?:O[gs])|(?:P[abdmortu])|(?:R[abefghnu])|(?:S[bcegimnr])
| (?:T[abcehilms])|(?:X[e])|(?:Yb)|(?:Z[nr])
)
|(?: # Smushed trigrams. No need to go past trigrams because two twos make four
(?:A(?:ga|gd|ge|la|lr|md|mg|mt|ra|re|rg|ta|te|tl|tm))
|(?:E(?:ra|re|rg))
|(?:G(?:ag|al|am|ar|at|er))
|(?:L(?:ag|al|am|ar|at|ra|re|rg))
|(?:M(?:ga|gd|ge|ta|te|tl|tm))
|(?:R(?:ag|al|am|ar|at|er|ga|gd|ge))
|(?:T(?:ag|al|am|ar|at|er|la|lr|md|mg|mt))
|(?:X(?:er))
|(?:Z(?:ra|re|rg))
)
# repeated any number of times
)+
# Uncomment to allow last letter if it\'s a potential first letter of the next word
# ([ADEGLMRTXZ]\\ *)?
$', 'gim')
const str = `**(See bottom for Javascript/one-line version)**
If you want continuation -- to look for "fez" "rave" "rites" chains, strike the second and second-to-last lines. Latex needs a suffix and ear needs a prefix.
latex
ear
latexear
fez
rave
rites
fezraverites
if you don't want overlap, comment out the big group with (?<=..) tests. Walter and lag will not match: lag is La + Ag but can't be L+Ag or La+G; walter is W+Al+Te+Er but there's no single A, L, T, E or R to make it a strict chain of elements.
Walter
aga
McLvinandfezrobHogwartsWizrdsofMoney
## Match:
McLvIn
McLvinandfezrobHogwartsWizrdsofMoney
zr
McLvinand
ergo
Orgasmicallabkitscashflow
update
Organicfurbies
Babkes
WalterWhite
That
ibexesnogladybirds
Xenophobic
picnicforgus
snapes
siriusblack
## match until last letter, and last letter potentially starts a new word (allows continuation)
fez
McLvinandfez
bobatea
# La + At + Te
late
# the x awaits an e in the next word
latex
# N + Nd + Dy + Y + Yb
ndyb
# the a is a potential continuation
andy
# ...but the x can't be a second letter
xndyb
## Don't Match at some interior point in line
Jemmamead
## Don't Match anywhere in line
JemmaQmead
### For Javascript:
^([abcdefghiklmnoprstuvy\\s])?(\\ |(?:[BCFHIKNOPSUVWY])|(?:(?:A[cglmrstu])|(?:B[aehikr])|(?:C[adeflmnorsu])|(?:D[bsy])|(?:E[rsu])|(?:F[elmr])|(?:G[ade])|(?:H[efgos])|(?:I[nr])|(?:K[r])|(?:L[airuv])|(?:M[cdgnot])|(?:N[abdehiop])|(?:O[gs])|(?:P[abdmortu])|(?:R[abefghnu])|(?:S[bcegimnr])|(?:T[abcehilms])|(?:X[e])|(?:Yb)|(?:Z[nr]))|(?:(?:A(?:ga|gd|ge|la|lr|md|mg|mt|ra|re|rg|ta|te|tl|tm))|(?:E(?:ra|re|rg))|(?:G(?:ag|al|am|ar|at|er))|(?:L(?:ag|al|am|ar|at|ra|re|rg))|(?:M(?:ga|gd|ge|ta|te|tl|tm))|(?:R(?:ag|al|am|ar|at|er|ga|gd|ge))|(?:T(?:ag|al|am|ar|at|er|la|lr|md|mg|mt))|(?:X(?:er))|(?:Z(?:ra|re|rg))))+([ADEGLMRTXZ]\\ *)?\$`;
// Reset `lastIndex` if this regex is defined globally
// regex.lastIndex = 0;
let m;
while ((m = regex.exec(str)) !== null) {
// This is necessary to avoid infinite loops with zero-width matches
if (m.index === regex.lastIndex) {
regex.lastIndex++;
}
// The result can be accessed through the `m`-variable.
m.forEach((match, groupIndex) => {
console.log(`Found match, group ${groupIndex}: ${match}`);
});
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for JavaScript, please visit: https://developer.mozilla.org/en/docs/Web/JavaScript/Guide/Regular_Expressions