/
( # 1: start
( # 2: look for header...
( # 3: start
(ht|f)tps?\: # 4: http or some variant, or
)| # 3: end.
(www) # 5: www.
) # done with header.
(?: # start repeating group
([.\/:?=&-]+) # 6: allowed punctuations
( # 9: start. chose one:
(((\s?)([a-z0-9]+) # a space and lower case chars
)| #or
(\w+) # 11: all word chars, but no spaces
)
) # 9: end.
)+ # end repeating group
) # end capture group 1
(\/?\.?\s?[A-Z]?) # exclude this.
/
gmx