use strict;
my $str = '// Match
http://userid:password@example.com/file1/file2?test=1&test=2#here
http://foo.com/blah_blah
http://foo.com/blah_blah/
http://foo.com/blah_blah_(wikipedia)
http://foo.com/blah_blah_(wikipedia)_(again)
http://www.example.com/wpstyle/?p=364
https://www.example.com/foo/?bar=baz&inga=42&quux
http://✪df.ws/123
http://userid:password@example.com:8080
http://userid:password@example.com:8080/
http://userid@example.com
http://userid@example.com/
http://userid@example.com:8080
http://userid@example.com:8080/
http://userid:password@example.com
http://userid:password@example.com/
http://➡.ws/䨹
http://⌘.ws
http://⌘.ws/
http://foo.com/blah_(wikipedia)#cite-1
http://foo.com/blah_(wikipedia)_blah#cite-1
http://foo.com/unicode_(✪)_in_parens
http://foo.com/(something)?after=parens
http://☺.damowmow.com/
http://code.google.com/events/#&product=browser
http://j.mp
ftp://foo.bar/baz
http://foo.bar/?q=Test%20URL-encoded%20stuff
http://مثال.إختبار
http://例子.测试
http://उदाहरण.परीक्षा
http://-.~_!$&\'()*+,;=:%40:80%2f::::::@example.com
http://1337.net
http://a.b-c.de
http://a.b--c.de/
ftps://foo.bar/
http://-error-.invalid/
http://-a.b.co
http://a.b-.co
http://0.0.0.0
http://10.1.1.0
http://10.1.1.255
http://224.1.1.1
http://10.1.1.1
http://10.1.1.254
https://142.42.1.1/
ftp://142.42.1.1:8080/
ftps://223.255.255.254
// Fail
http://
http://.
http://..
http://../
http://?
http://??
http://??/
http://#
http://##
http://##/
http://foo.bar?q=Spaces should be encoded
//
//a
///a
///
http:///a
foo.com
rdar://1234
h://test
http:// shouldfail.com
:// should fail
http://foo.bar/foo(bar)baz quux
http://356.354.165.654
http://1.1.1.1.1
http://123.123.123
http://3628126748
http://.www.foo.bar/
http://www.foo.bar./
http://.www.foo.bar./';
my $regex = qr/^(?#protocol)(?<protocol>(?:ht|f)tps?)\:(?:\/\/)?(?#user/password)(?:(?<user>\S+?)(?::(?<password>\S+))?@)?(?#domaine)(?<domain>(?:(?:[a-z\x{00a1}-\x{ffff}0-9-]++\.)+(?#top_level_domain)(?<top_level_domain>[a-z\x{00a1}-\x{ffff}]{2,}))|(?<ip>(?:1?\d{1,2}|2[0-4]\d|25[0-5])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){3}))(?#port)(?::(?<port>\d{1,5}))?(?#directory)(?<directory>\/(?:[^?#\s])*)?(?#query)(?:\?(?<query>[^#\s]*))?(?#anchor)(?:#(?<anchor>[^\s]*))?$/uimp;
if ( $str =~ /$regex/g ) {
print "Whole match is ${^MATCH} and its start/end positions can be obtained via \$-[0] and \$+[0]\n";
# print "Capture Group 1 is $1 and its start/end positions can be obtained via \$-[1] and \$+[1]\n";
# print "Capture Group 2 is $2 ... and so on\n";
}
# ${^POSTMATCH} and ${^PREMATCH} are also available with the use of '/p'
# Named capture groups can be called via $+{name}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Perl, please visit: http://perldoc.perl.org/perlre.html