// include the latest version of the regex crate in your Cargo.toml
extern crate regex;
use regex::Regex;
fn main() {
let regex = Regex::new(r"(?m)([\w+]+\:\/\/)?([\w\d-]+\.)*[\w-]+[\(\)\#\.\:]\w+([\(\)\/\?\=\&\#\.]?[\w-]+)*\/?[\(\-\w\)\?\=\#_]*").unwrap();
let string = "https://daringfireball.net/2010/07/improved_regex_for_matching_urls
Some of the advantages of the new pattern, compared to the previous one:
It no longer uses the [:punct:] named character class. I thought this was universally supported in modern regex engines, but apparently it is not.
It does a better job with URLs containing literal parentheses, correctly matching the following URLs that the previous pattern did not:
.http://foo.com/more_(than)_one_(parens)
http://foo.com/blah_(wikipedia)#cite-1.
http://foo.com/blah_(wikipedia)_blah#cite-1.
http://foo.com/unicode_(✪)_in_parens,
http://foo.com/(something)?after=parens.
It now matches mailto: URLs.
It correctly guesses that things like “bit.ly/foo” and “is.gd/foo/” are URLs. Basically: something-dot-something-slash-something.
teste www.google.com teste
https://www.facebook.com.
https://app-1.number123.com.
http://facebook.com.
ftp://facebook.com
http://localhost:3000
localhost:3000/
unitedkingdomurl.co.uk
this.is.a.url.com/its/still=going?wow
shop.facebook.org
app.number123.com
app1.number123.com
app-1.numbEr123.com
app.dashes-dash.com
www.facebook.com
facebook.com
fb.com/hello_123
fb.com/hel-lo
fb.com/hello/goodbye
fb.com/hello/goodbye?okay
fb.com/hello/goodbye?okay=alright
Hello www.google.com World http://yahoo.com
https://www.google.com.tr/admin/subPage?qs1=sss1&qs2=sss2&qs3=sss3#Services
https://google.com.tr/test/subPage?qs1=sss1&qs2=sss2&qs3=sss3#Services
http://google.com/test/subPage?qs1=sss1&qs2=sss2&qs3=sss3#Services
ftp://google.com/test/subPage?qs1=sss1&qs2=sss2&qs3=sss3#Services
www.google.com.tr/test/subPage?qs1=sss1&qs2=sss2&qs3=sss3#Services
www.google.com/test/subPage?qs1=sss1&qs2=sss2&qs3=sss3#Services
drive.google.com/test/subPage?qs1=sss1&qs2=sss2&qs3=sss3#Services
https://www.example.pl
http://www.example.com
www.example.pl
example.com
http://blog.example.com
http://www.example.com/product
http://www.example.com/products?id=1&page=2
http://www.example.com#up
http://255.255.255.255
255.255.255.255
shop.facebook.org/derf.html
Hello www.google.com World http://yahoo.com
https://www.google.com.tr/admin/subPage?qs1=sss1&qs2=sss2&qs3=sss3#Services
https://google.com.tr/test/subPage?qs1=sss1&qs2=sss2&qs3=sss3#Services
http://google.com/test/subPage?qs1=sss1&qs2=sss2&qs3=sss3#Services
ftp://google.com/test/subPage?qs1=sss1&qs2=sss2&qs3=sss3#Services
www.google.com.tr/test/subPage?qs1=sss1&qs2=sss2&qs3=sss3#Services
www.google.com/test/subPage?qs1=sss1&qs2=sss2&qs3=sss3#Services
drive.google.com/test/subPage?qs1=sss1&qs2=sss2&qs3=sss3#Services
Will match the following cases
http://www.foufos.gr
https://www.foufos.gr
http://foufos.gr
http://www.foufos.gr/kino
http://werer.gr
www.foufos.gr
www.mp3.com
www.t.co
http://t.co
http://www.t.co
https://www.t.co
www.aa.com
http://aa.com
http://www.aa.com
https://www.aa.com
Will NOT match the following
www.foufos
www.foufos-.gr
www.-foufos.gr
foufos.gr
http://www.foufos
http://foufos
www.mp3#.com
";
// result will be an iterator over tuples containing the start and end indices for each match in the string
let result = regex.captures_iter(string);
for mat in result {
println!("{:?}", mat);
}
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Rust, please visit: https://docs.rs/regex/latest/regex/