# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"([a-zA-Z ]+),"
test_str = ("California\n\n"
"Alameda, CA\n"
"Albany, CA\n"
"Altadena, CA\n"
"Alviso, CA\n"
"Atherton, CA\n"
"Belmont, CA\n"
"Berkeley, CA\n"
"Beverly Hills, CA\n"
"Burbank, CA\n"
"Burlingame, CA\n"
"Campbell, CA\n"
"Culver City, CA\n"
"Cupertino, CA\n"
"El Cerrito, CA\n"
"Emeryville, CA\n"
"Encino, CA\n"
"Glendale, CA\n"
"La Canada Flintridge, CA\n"
"Los Altos, CA\n"
" \n"
"Los Angeles, CA\n"
"Los Gatos, CA\n"
"Marina Del Rey, CA\n"
"Menlo Park, CA\n"
"Millbrae, CA\n"
"Mountain View, CA\n"
"North Hollywood, CA\n"
"Oakland, CA\n"
"Pacific Palisades, CA\n"
"Palo Alto, CA\n"
"Pasadena, CA\n"
"Playa Del Rey, CA\n"
"Portola Valley, CA\n"
"Redwood City, CA\n"
"Reseda, CA\n"
"San Bruno, CA\n"
"San Carlos, CA\n"
"San Francisco, CA\n"
"San Jose, CA\n"
" \n"
"San Leandro, CA\n"
"San Marino, CA\n"
"San Mateo, CA\n"
"Santa Clara, CA\n"
"Santa Monica, CA\n"
"Saratoga, CA\n"
"Sherman Oaks, CA\n"
"South Pasadena, CA\n"
"South San Francisco, CA\n"
"Stanford, CA\n"
"Studio City, CA\n"
"Sunnyvale, CA\n"
"Tarzana, CA\n"
"Valley Village, CA\n"
"Van Nuys, CA\n"
"Venice, CA\n"
"West Hollywood, CA\n"
"Colorado\n\n"
"Boulder, CO\n"
"Denver, CO\n"
" \n"
"Englewood, CO\n"
"Lafayette, CO\n"
" \n"
"Louisville, CO\n"
"Connecticut\n\n"
"Canaan, CT\n"
"East Canaan, CT\n"
"Falls Village, CT\n"
" \n"
"Lakeville, CT\n"
"Norfolk, CT\n"
"Salisbury, CT\n"
" \n"
"Sharon, CT\n"
"Stamford, CT\n"
"West Cornwall, CT\n"
"District of Columbia\n\n"
"Washington, DC\n"
"Florida\n\n"
"Key Biscayne, FL\n"
"Miami Beach, FL\n"
" \n"
"Miami, FL\n"
"North Miami Beach, FL\n"
"Georgia\n\n"
"Alpharetta, GA\n"
"Atlanta, GA\n"
"Avondale Estates, GA\n"
" \n"
"Decatur, GA\n"
"Marietta, GA\n"
"Roswell, GA\n"
" \n"
"Scottdale, GA\n"
"Smyrna, GA\n"
"Illinois\n\n"
"Aurora, IL\n"
"Buffalo Grove, IL\n"
"Chicago, IL\n"
"Clarendon Hills, IL\n"
"Deerfield, IL\n"
"Downers Grove, IL\n"
"Elmhurst, IL\n"
"Evanston, IL\n"
"Fort Sheridan, IL\n"
"Glencoe, IL\n"
"Glenview Nas, IL\n"
"Glenview, IL\n"
"Grayslake, IL\n"
" \n"
"Highland Park, IL\n"
"Highwood, IL\n"
"Hinsdale, IL\n"
"Kenilworth, IL\n"
"Lake Bluff, IL\n"
"Lake Forest, IL\n"
"Lake Zurich, IL\n"
"Libertyville, IL\n"
"Lincolnshire, IL\n"
"Lincolnwood, IL\n"
"Lisle, IL\n"
"Mundelein, IL\n"
"Naperville, IL\n"
" \n"
"Northbrook, IL\n"
"Oak Brook, IL\n"
"Prospect Heights, IL\n"
"Skokie, IL\n"
"Vernon Hills, IL\n"
"Villa Park, IL\n"
"Warrenville, IL\n"
"Westmont, IL\n"
"Wheeling, IL\n"
"Wilmette, IL\n"
"Winnetka, IL\n"
"Indiana\n\n"
"Carmel, IN\n"
" \n"
"Fishers, IN\n"
" \n"
"Indianapolis, IN\n"
"Maryland\n\n"
"Bethesda, MD\n"
"Cabin John, MD\n"
"Chevy Chase, MD\n"
"Gaithersburg, MD\n"
" \n"
"Garrett Park, MD\n"
"Kensington, MD\n"
"Potomac, MD\n"
"Rockville, MD\n"
" \n"
"Silver Spring, MD\n"
"Takoma Park, MD\n"
"Massachusetts\n\n"
"Allston, MA\n"
"Arlington, MA\n"
"Ashley Falls, MA\n"
"Auburndale, MA\n"
"Babson Park, MA\n"
"Belmont, MA\n"
"Boston, MA\n"
"Brighton, MA\n"
"Brookline, MA\n"
"Cambridge, MA\n"
"Charlestown, MA\n"
" \n"
"Chestnut Hill, MA\n"
"Jamaica Plain, MA\n"
"Medford, MA\n"
"Needham, MA\n"
"New Town, MA\n"
"Newton Center, MA\n"
"Newton Highlands, MA\n"
"Newton Lower Falls, MA\n"
"Newton Upper Falls, MA\n"
"Newton, MA\n"
"Newtonville, MA\n"
" \n"
"Roslindale, MA\n"
"Sheffield, MA\n"
"Somerville, MA\n"
"South Egremont, MA\n"
"Waban, MA\n"
"Waltham, MA\n"
"Watertown, MA\n"
"Wellesley Hills, MA\n"
"Wellesley, MA\n"
"West Newton, MA\n"
"Michigan\n\n"
"Ann Arbor, MI\n"
"Birmingham, MI\n"
"Bloomfield Hills, MI\n"
"Chelsea, MI\n"
"Commerce Township, MI\n"
"Dexter, MI\n"
"Farmington, MI\n"
" \n"
"Franklin, MI\n"
"Keego Harbor, MI\n"
"Novi, MI\n"
"Plymouth, MI\n"
"Rochester, MI\n"
"Royal Oak, MI\n"
"Saline, MI\n"
" \n"
"Southfield, MI\n"
"Troy, MI\n"
"Walled Lake, MI\n"
"Waterford, MI\n"
"West Bloomfield, MI\n"
"Ypsilanti, MI\n"
"Minnesota\n\n"
"Hopkins, MN\n"
" \n"
"Minneapolis, MN\n"
" \n"
"Saint Paul, MN\n"
"New Jersey\n\n"
"Bloomfield, NJ\n"
"Caldwell, NJ\n"
"Cedar Grove, NJ\n"
"Clifton, NJ\n"
"Elmwood Park, NJ\n"
"Fair Lawn, NJ\n"
"Fairfield, NJ\n"
"Garfield, NJ\n"
" \n"
"Haledon, NJ\n"
"Lincoln Park, NJ\n"
"Little Falls, NJ\n"
"Montclair, NJ\n"
"New York, NJ\n"
"Nutley, NJ\n"
"Passaic, NJ\n"
"Paterson, NJ\n"
" \n"
"Pequannock, NJ\n"
"Rutherford, NJ\n"
"Saddle Brook, NJ\n"
"Totowa, NJ\n"
"Verona, NJ\n"
"Wayne, NJ\n"
"New York\n\n"
"Amenia, NY\n"
"Ancramdale, NY\n"
"Arverne, NY\n"
"Astoria, NY\n"
"Bayside, NY\n"
"Bellerose, NY\n"
"Breezy Point, NY\n"
"Brooklyn, NY\n"
"Cambria Heights, NY\n"
"College Point, NY\n"
"Copake, NY\n"
"Corona, NY\n"
"East Elmhurst, NY\n"
"Elmhurst, NY\n"
"Far Rockaway, NY\n"
"Flushing, NY\n"
" \n"
"Forest Hills, NY\n"
"Fresh Meadows, NY\n"
"Hollis, NY\n"
"Howard Beach, NY\n"
"Jackson Heights, NY\n"
"Jamaica, NY\n"
"Kew Gardens, NY\n"
"Little Neck, NY\n"
"Long Island City, NY\n"
"Maspeth, NY\n"
"Middle Village, NY\n"
"Millerton, NY\n"
"New York, NY\n"
"Oakland Gardens, NY\n"
"Ozone Park, NY\n"
"Pine Plains, NY\n"
" \n"
"Queens Village, NY\n"
"Rego Park, NY\n"
"Richmond Hill, NY\n"
"Ridgewood, NY\n"
"Rockaway Park, NY\n"
"Rosedale, NY\n"
"Saint Albans, NY\n"
"South Ozone Park, NY\n"
"South Richmond Hill, NY\n"
"Springfield Gardens, NY\n"
"Sunnyside, NY\n"
"Whitestone, NY\n"
"Woodhaven, NY\n"
"Woodside, NY\n"
"Oregon\n\n"
"Beaverton, OR\n"
"Hillsboro, OR\n"
" \n"
"Lake Oswego, OR\n"
"Portland, OR\n"
" \n"
"Tualatin, OR\n"
"West Linn, OR\n"
"Pennsylvania\n\n"
"Abington, PA\n"
"Ambler, PA\n"
"Ardmore, PA\n"
"Bala Cynwyd, PA\n"
"Bensalem, PA\n"
"Berwyn, PA\n"
"Blue Bell, PA\n"
"Bridgeport, PA\n"
"Bryn Athyn, PA\n"
"Bryn Mawr, PA\n"
"Cheltenham, PA\n"
"Collegeville, PA\n"
"Conshohocken, PA\n"
"Devon, PA\n"
"Dresher, PA\n"
"Drexel Hill, PA\n"
"Elkins Park, PA\n"
" \n"
"Flourtown, PA\n"
"Fort Washington, PA\n"
"Gladwyne, PA\n"
"Glenside, PA\n"
"Gwynedd Valley, PA\n"
"Gwynedd, PA\n"
"Haverford, PA\n"
"Havertown, PA\n"
"Huntingdon Valley, PA\n"
"Jenkintown, PA\n"
"King Of Prussia, PA\n"
"Lafayette Hill, PA\n"
"Lansdale, PA\n"
"Lansdowne, PA\n"
"Malvern, PA\n"
"Merion Station, PA\n"
"Mont Clare, PA\n"
" \n"
"Montgomeryville, PA\n"
"Narberth, PA\n"
"Norristown, PA\n"
"North Wales, PA\n"
"Oreland, PA\n"
"Paoli, PA\n"
"Philadelphia, PA\n"
"Phoenixville, PA\n"
"Plymouth Meeting, PA\n"
"Upper Darby, PA\n"
"Villanova, PA\n"
"Wayne, PA\n"
"Willow Grove, PA\n"
"Wyncote, PA\n"
"Wynnewood, PA\n"
"Texas\n\n"
"Austin, TX\n"
"Bellaire, TX\n"
"Cedar Park, TX\n"
"Conroe, TX\n"
" \n"
"Houston, TX\n"
"Katy, TX\n"
"Pflugerville, TX\n"
"Round Rock, TX\n"
" \n"
"Sandy, TX\n"
"Spring, TX\n"
"Virginia\n\n"
"Alexandria, VA\n"
"Arlington, VA\n"
" \n"
"Dunn Loring, VA\n"
"Falls Church, VA\n"
" \n"
"Ft Myer, VA\n"
"McLean, VA\n"
"Washington\n\n"
"Bellevue, WA\n"
"Kirkland, WA\n"
"Lynnwood, WA\n"
" \n"
"Medina, WA\n"
"Mercer Island, WA\n"
"Mountlake Terrace, WA")
matches = re.finditer(regex, test_str)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html