import Foundation
let pattern = ##"""
^
# Uncomment to allow prefix continuation (second letter of a dangling match)
# ([abcdefghiklmnoprstuvy\ ])?
( \
|(?: # Single-letter elements
[BCFHIKNOPSUVWY]
)
|(?: # Two-letter elements.
(?:A[cglmrstu])|(?:B[aehikr])|(?:C[adeflmnorsu])|(?:D[bsy])
| (?:E[rsu])|(?:F[elmr])|(?:G[ade])|(?:H[efgos])|(?:I[nr])
| (?:K[r])|(?:L[airuv])|(?:M[cdgnot])|(?:N[abdehiop])
| (?:O[gs])|(?:P[abdmortu])|(?:R[abefghnu])|(?:S[bcegimnr])
| (?:T[abcehilms])|(?:X[e])|(?:Yb)|(?:Z[nr])
)
|(?: # Smushed trigrams. No need to go past trigrams because two twos make four
(?:A(?:ga|gd|ge|la|lr|md|mg|mt|ra|re|rg|ta|te|tl|tm))
|(?:E(?:ra|re|rg))
|(?:G(?:ag|al|am|ar|at|er))
|(?:L(?:ag|al|am|ar|at|ra|re|rg))
|(?:M(?:ga|gd|ge|ta|te|tl|tm))
|(?:R(?:ag|al|am|ar|at|er|ga|gd|ge))
|(?:T(?:ag|al|am|ar|at|er|la|lr|md|mg|mt))
|(?:X(?:er))
|(?:Z(?:ra|re|rg))
)
# repeated any number of times
)+
# Uncomment to allow last letter if it's a potential first letter of the next word
# ([ADEGLMRTXZ]\ *)?
$
"""##
let regex = try! NSRegularExpression(pattern: pattern, options: [.anchorsMatchLines, .caseInsensitive, .allowCommentsAndWhitespace])
let testString = ####"""
**(See bottom for Javascript/one-line version)**
If you want continuation -- to look for "fez" "rave" "rites" chains, strike the second and second-to-last lines. Latex needs a suffix and ear needs a prefix.
latex
ear
latexear
fez
rave
rites
fezraverites
if you don't want overlap, comment out the big group with (?<=..) tests. Walter and lag will not match: lag is La + Ag but can't be L+Ag or La+G; walter is W+Al+Te+Er but there's no single A, L, T, E or R to make it a strict chain of elements.
Walter
aga
McLvinandfezrobHogwartsWizrdsofMoney
## Match:
McLvIn
McLvinandfezrobHogwartsWizrdsofMoney
zr
McLvinand
ergo
Orgasmicallabkitscashflow
update
Organicfurbies
Babkes
WalterWhite
That
ibexesnogladybirds
Xenophobic
picnicforgus
snapes
siriusblack
## match until last letter, and last letter potentially starts a new word (allows continuation)
fez
McLvinandfez
bobatea
# La + At + Te
late
# the x awaits an e in the next word
latex
# N + Nd + Dy + Y + Yb
ndyb
# the a is a potential continuation
andy
# ...but the x can't be a second letter
xndyb
## Don't Match at some interior point in line
Jemmamead
## Don't Match anywhere in line
JemmaQmead
### For Javascript:
^([abcdefghiklmnoprstuvy\s])?(\ |(?:[BCFHIKNOPSUVWY])|(?:(?:A[cglmrstu])|(?:B[aehikr])|(?:C[adeflmnorsu])|(?:D[bsy])|(?:E[rsu])|(?:F[elmr])|(?:G[ade])|(?:H[efgos])|(?:I[nr])|(?:K[r])|(?:L[airuv])|(?:M[cdgnot])|(?:N[abdehiop])|(?:O[gs])|(?:P[abdmortu])|(?:R[abefghnu])|(?:S[bcegimnr])|(?:T[abcehilms])|(?:X[e])|(?:Yb)|(?:Z[nr]))|(?:(?:A(?:ga|gd|ge|la|lr|md|mg|mt|ra|re|rg|ta|te|tl|tm))|(?:E(?:ra|re|rg))|(?:G(?:ag|al|am|ar|at|er))|(?:L(?:ag|al|am|ar|at|ra|re|rg))|(?:M(?:ga|gd|ge|ta|te|tl|tm))|(?:R(?:ag|al|am|ar|at|er|ga|gd|ge))|(?:T(?:ag|al|am|ar|at|er|la|lr|md|mg|mt))|(?:X(?:er))|(?:Z(?:ra|re|rg))))+([ADEGLMRTXZ]\ *)?$
"""####
let stringRange = NSRange(location: 0, length: testString.utf16.count)
let matches = regex.matches(in: testString, range: stringRange)
var result: [[String]] = []
for match in matches {
var groups: [String] = []
for rangeIndex in 1 ..< match.numberOfRanges {
let nsRange = match.range(at: rangeIndex)
guard !NSEqualRanges(nsRange, NSMakeRange(NSNotFound, 0)) else { continue }
let string = (testString as NSString).substring(with: nsRange)
groups.append(string)
}
if !groups.isEmpty {
result.append(groups)
}
}
print(result)
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Swift 5.2, please visit: https://developer.apple.com/documentation/foundation/nsregularexpression