use strict;
my $str = 'https://en.wikipedia.org/wiki/Uniform_Resource_Identifier#Syntax
(↑ assuming URL, not URI; the only difference is that the host is mandatory)
https://regex101.com/
https://e./
https://org.
https://elearning.tgm.ac.at/pluginfile.php/297744/mod_resource/content/0/SEW5_UML_Einfuehrung.pdf
https://elearning.tgm.ac.at/mod/page/view.php?id%40_-ae=212356
https://elearning.tgm.ac.at/mod/assign/view.php?id=212239
https://elearning.tgm.ac.at/course/view.php?id=5454§ion=4
nonstandardscheme://is.still.valid
http://he.e/?
https://hy.a?a
http://h.?a&b&c=a
https://e.o.a.
http://0riedler:0%20tab-a_sco@riedler.a.wien/
http://emptypasswd:@riedler.wien/
http://a.e:33/
https://de.wikipedia.org/wiki/Wikipedia:Hauptseite
https://de.wikipedia.org/wiki/Preiselbeersauce#Zubereitung
https://toptoptoptoptoptoptoptoptoptoptoptoptoptoptoptoptoptoptoptop.top/
https://9.9.9.9/ipv4
https://2620:fe::fe/ipv6
ipv6 addresses are their own source of evil, not implementing those for now';
my $regex = qr/^(?<scheme>[a-zA-Z][a-zA-Z0-9-+.]*)://(?:(?<username>(?:[\w-]|%\d\d)+)(?::(?<passwd>(?:[\w-]|%\d\d)*))?@)?(?<host>(?:[a-zA-Z][a-zA-Z0-9-]*\.?)+|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})(?<port>:\d+)?(?<path>(?:/(?:[\w-.:]|%\d\d)+)+/?|/)?(?<query>\?(?:(?:[\w-]|%\d\d)+(?:=[\w]+)?(?:&|$))*)?(?<fragment>#.*)?$/mp;
if ( $str =~ /$regex/g ) {
print "Whole match is ${^MATCH} and its start/end positions can be obtained via \$-[0] and \$+[0]\n";
# print "Capture Group 1 is $1 and its start/end positions can be obtained via \$-[1] and \$+[1]\n";
# print "Capture Group 2 is $2 ... and so on\n";
}
# ${^POSTMATCH} and ${^PREMATCH} are also available with the use of '/p'
# Named capture groups can be called via $+{name}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Perl, please visit: http://perldoc.perl.org/perlre.html