Code to generate the regex:
// Get the string version of each regex that we need to match PO boxes
const separator = /[ ./\-_]/.source; // Any of these characters can separate the PO box words (space, period, slash, dash, underscore)
const postalGroup = /(P(?:ost(?:al)?)?)/i.source; // "P", "Post", "Postal"
const officeGroup = /(O(?:ff(?:ice)?)?)/i.source; // "O", "Off", "Office"
const boxGroup = /(b(?:o?x|in)?)/i.source; // "b", "bx", "box", "bin"
const fullBoxGroup = /(box|bin)/i.source; // "box" or "bin" (full word only)
const ruralRouteGroup = /(r(?:ural)? r(?:oute)?)/i.source; // "rr", "r r", "ruralr", "rroute", "rural route", etc. (space will be replaced with full separators later)
const numTextGroup = /(n\.?(?:o|um(?:ber)?)?\.?)/i.source; // "n", "no", "num", "number", "n.", "n.o.", "no.", "num.", etc.
const digitsGroup = /(\d+)/.source; // 1 or more digits
// Spaces in below strings will be replaced later with the separator regex
// Construct number part of PO box regex string
const poBoxNumberGroup = `(${numTextGroup}? #* ${digitsGroup})`; // Match "number # 123", "no 123", "# 123", "123", etc.
// Construct PO box words regex string
const poBoxWordsOfficeOptional = `(?:${postalGroup} ${officeGroup}? ${boxGroup})`; // Match stuff like "post box", "post office box", or "P B" (where the office part is optional)
const poBoxWordsBoxOptional = `(?:${postalGroup} ${officeGroup} ${boxGroup}?)`; // Match stuff like "post office", "post office box", or "PO" (where the box part is optional)const ruralRouteNegativeGroup = /(?!r(?:ural)? r(?:oute)? (\d+)?)/i.source; // Negative lookahead for "rr 12", "r r 1", "ruralr 1", "rroute 1", "rural route 1", etc. (space will be replaced with full separators later)
const poBoxWordsBoxOnly = `(?:(?<!${ruralRouteGroup} ${poBoxNumberGroup}? )${fullBoxGroup})`; // Match just "box" or "bin" (unless preceded by a rural route)
const poBoxWordsGroup = `(${poBoxWordsOfficeOptional}|${poBoxWordsBoxOptional}|${poBoxWordsBoxOnly})`; // Match either of the above
// Construct the whole PO box regex string (with word boundaries, but still excluding the separators)
const wholePOBoxGroup = `\\b(${poBoxWordsGroup} ${poBoxNumberGroup})\\b`;
// Construct the final PO box regex
const PO_BOX_REGEX = new RegExp(
wholePOBoxGroup.replaceAll(" ", `${separator}*`), // Replace all spaces with regex matching any number of the separators
"i"
);
// Check if the address is a PO box
export function isPOBox(addressString): boolean {
return PO_BOX_REGEX.test(addressString);
}
// Extract the PO box part from the address
export function getPOBox(addressString) {
const match = addressString?.match(PO_BOX_REGEX);
return match ? match[1] : null;
}