// include the latest version of the regex crate in your Cargo.toml
extern crate regex;
use regex::Regex;
fn main() {
let regex = Regex::new(r#"(?m)(?P<Element><(?P<TagName>[:_A-z][-.0-9:_A-z\xB7]*)(?:[\x09\x0A\x0D\x20]+[:_A-z][-.0-9:_A-z\xB7]*[\x09\x0A\x0D\x20]*=[\x09\x0A\x0D\x20]*(?:"(?:[^<&"]|&(?:[:_A-z][-.0-9:_A-z\xB7]*|#(?:[0-9]+|x[0-9a-fA-F]+));)*"|'(?:[^<&']|&(?:[:_A-z][-.0-9:_A-z\xB7]*|#(?:[0-9]+|x[0-9a-fA-F]+));)*'))*[\x09\x0A\x0D\x20]*(?:>(?:(?:[^<&\]]|](?!]>))*(?:(?:(?P>Element)|&(?:[:_A-z][-.0-9:_A-z\xB7]*|#(?:[0-9]+|x[0-9a-fA-F]+));|<!\[CDATA\[(?:[^\x01-\x08\x0B\x0C\x0E-\x1F\x5D]|](?!]>))*]]>|<\?[:_A-z][-.0-9:_A-z\xB7]*(?<!(?i:\?xml))(?:[\x09\x0A\x0D\x20]+(?:[^\x01-\x08\x0B\x0C\x0E-\x1F\x3F]|\?(?!>))*)?\?>|<!--(?:[^\x01-\x08\x0B\x0C\x0E-\x1F\x2D]|-(?!-))*-->)(?:[^<&\]]|](?!]>))*)*)<\/(?P=TagName)[\x09\x0A\x0D\x20]*|\/)>)|&(?:[:_A-z][-.0-9:_A-z\xB7]*|#(?:[0-9]+|x[0-9a-fA-F]+));|<!\[CDATA\[(?:[^\x01-\x08\x0B\x0C\x0E-\x1F\x5D]|](?!]>))*]]>|<\?[:_A-z][-.0-9:_A-z\xB7]*(?<!(?i:\?xml))(?:[\x09\x0A\x0D\x20]+(?:[^\x01-\x08\x0B\x0C\x0E-\x1F\x3F]|\?(?!>))*)?\?>|<!--(?:[^\x01-\x08\x0B\x0C\x0E-\x1F\x2D]|-(?!-))*-->|<!DOCTYPE[\x09\x0A\x0D\x20]+[:_A-z][-.0-9:_A-z\xB7]*(?:[\x09\x0A\x0D\x20]+(?:SYSTEM[\x09\x0A\x0D\x20]+(?:"[^"]*"|'[^']*')|PUBLIC[\x09\x0A\x0D\x20]+(?:"[\x0A\x0D\x20\x21\x23-\x25\x27-\x2F\x3A\x3B\x3D\x3F\x40_0-9A-z]*"|'[\x0A\x0D\x20\x21\x23-\x25\x28-\x2F\x3A\x3B\x3D\x3F\x40_0-9A-z]*')[\x09\x0A\x0D\x20]+(?:"[^"]*"|'[^']*')))?[\x09\x0A\x0D\x20]*(?:\[(?:(?:<!ELEMENT[\x09\x0A\x0D\x20]+[:_A-z][-.0-9:_A-z\xB7]*[\x09\x0A\x0D\x20]+(?:EMPTY|ANY|\([\x09\x0A\x0D\x20]*#PCDATA(?:(?:[\x09\x0A\x0D\x20]*\|[\x09\x0A\x0D\x20]*[:_A-z][-.0-9:_A-z\xB7]*)*[\x09\x0A\x0D\x20]*\)\*|[\x09\x0A\x0D\x20]*\))|(?:(?P<choice>\([\x09\x0A\x0D\x20]*(?:[:_A-z][-.0-9:_A-z\xB7]*|(?P>choice)|(?P>seq))[?*+]?(?:[\x09\x0A\x0D\x20]*\|[\x09\x0A\x0D\x20]*(?:[:_A-z][-.0-9:_A-z\xB7]*|(?P>choice)|(?P>seq))[?*+]?)+[\x09\x0A\x0D\x20]*\))|(?P<seq>\([\x09\x0A\x0D\x20]*(?:[:_A-z][-.0-9:_A-z\xB7]*|(?P>choice)|(?P>seq))[?*+]?(?:[\x09\x0A\x0D\x20]*,[\x09\x0A\x0D\x20]*(?:[:_A-z][-.0-9:_A-z\xB7]*|(?P>choice)|(?P>seq))[?*+]?)*[\x09\x0A\x0D\x20]*\)))[?*+]?)[\x09\x0A\x0D\x20]*>|<!ATTLIST[\x09\x0A\x0D\x20]+[:_A-z][-.0-9:_A-z\xB7]*(?:[\x09\x0A\x0D\x20]+[:_A-z][-.0-9:_A-z\xB7]*[\x09\x0A\x0D\x20]+(?:CDATA|(?:ID(?:REFS?)?|ENTIT(?:Y|IES)|NMTOKENS?)|(?:NOTATION[\x09\x0A\x0D\x20]+\([\x09\x0A\x0D\x20]*[:_A-z][-.0-9:_A-z\xB7]*(?:[\x09\x0A\x0D\x20]*\|[\x09\x0A\x0D\x20]*[:_A-z][-.0-9:_A-z\xB7]*)*[\x09\x0A\x0D\x20]*\)|\([\x09\x0A\x0D\x20]*(?:[-.0-9:_A-z\xB7])+(?:[\x09\x0A\x0D\x20]*\|[\x09\x0A\x0D\x20]*(?:[-.0-9:_A-z\xB7])+)*[\x09\x0A\x0D\x20]*\)))[\x09\x0A\x0D\x20]+(?:#(?:REQUIRED|IMPLIED)|(?:#FIXED[\x09\x0A\x0D\x20]+)?(?:"(?:[^<&"]|&(?:[:_A-z][-.0-9:_A-z\xB7]*|#(?:[0-9]+|x[0-9a-fA-F]+));)*"|'(?:[^<&']|&(?:[:_A-z][-.0-9:_A-z\xB7]*|#(?:[0-9]+|x[0-9a-fA-F]+));)*')))*[\x09\x0A\x0D\x20]*>|(?:<!ENTITY[\x09\x0A\x0D\x20]+[:_A-z][-.0-9:_A-z\xB7]*[\x09\x0A\x0D\x20]+(?:(?:"(?:[^%&"]|%[:_A-z][-.0-9:_A-z\xB7]*;|&(?:[:_A-z][-.0-9:_A-z\xB7]*|#(?:[0-9]+|x[0-9a-fA-F]+));)*"|'(?:[^%&']|%[:_A-z][-.0-9:_A-z\xB7]*;|&(?:[:_A-z][-.0-9:_A-z\xB7]*|#(?:[0-9]+|x[0-9a-fA-F]+));)*')|(?:SYSTEM[\x09\x0A\x0D\x20]+(?:"[^"]*"|'[^']*')|PUBLIC[\x09\x0A\x0D\x20]+(?:"[\x0A\x0D\x20\x21\x23-\x25\x27-\x2F\x3A\x3B\x3D\x3F\x40_0-9A-z]*"|'[\x0A\x0D\x20\x21\x23-\x25\x28-\x2F\x3A\x3B\x3D\x3F\x40_0-9A-z]*')[\x09\x0A\x0D\x20]+(?:"[^"]*"|'[^']*'))(?:[\x09\x0A\x0D\x20]+NDATA[\x09\x0A\x0D\x20]+[:_A-z][-.0-9:_A-z\xB7]*)?)[\x09\x0A\x0D\x20]*>|<!ENTITY[\x09\x0A\x0D\x20]+%[\x09\x0A\x0D\x20]+[:_A-z][-.0-9:_A-z\xB7]*[\x09\x0A\x0D\x20]+(?:(?:"(?:[^%&"]|%[:_A-z][-.0-9:_A-z\xB7]*;|&(?:[:_A-z][-.0-9:_A-z\xB7]*|#(?:[0-9]+|x[0-9a-fA-F]+));)*"|'(?:[^%&']|%[:_A-z][-.0-9:_A-z\xB7]*;|&(?:[:_A-z][-.0-9:_A-z\xB7]*|#(?:[0-9]+|x[0-9a-fA-F]+));)*')|(?:SYSTEM[\x09\x0A\x0D\x20]+(?:"[^"]*"|'[^']*')|PUBLIC[\x09\x0A\x0D\x20]+(?:"[\x0A\x0D\x20\x21\x23-\x25\x27-\x2F\x3A\x3B\x3D\x3F\x40_0-9A-z]*"|'[\x0A\x0D\x20\x21\x23-\x25\x28-\x2F\x3A\x3B\x3D\x3F\x40_0-9A-z]*')[\x09\x0A\x0D\x20]+(?:"[^"]*"|'[^']*')))[\x09\x0A\x0D\x20]*>)|<!NOTATION[\x09\x0A\x0D\x20]+[:_A-z][-.0-9:_A-z\xB7]*[\x09\x0A\x0D\x20]+(?:(?:SYSTEM[\x09\x0A\x0D\x20]+(?:"[^"]*"|'[^']*')|PUBLIC[\x09\x0A\x0D\x20]+(?:"[\x0A\x0D\x20\x21\x23-\x25\x27-\x2F\x3A\x3B\x3D\x3F\x40_0-9A-z]*"|'[\x0A\x0D\x20\x21\x23-\x25\x28-\x2F\x3A\x3B\x3D\x3F\x40_0-9A-z]*')[\x09\x0A\x0D\x20]+(?:"[^"]*"|'[^']*'))|PUBLIC[\x09\x0A\x0D\x20]+(?:"[\x0A\x0D\x20\x21\x23-\x25\x27-\x2F\x3A\x3B\x3D\x3F\x40_0-9A-z]*"|'[\x0A\x0D\x20\x21\x23-\x25\x28-\x2F\x3A\x3B\x3D\x3F\x40_0-9A-z]*'))[\x09\x0A\x0D\x20]*>|<\?[:_A-z][-.0-9:_A-z\xB7]*(?<!(?i:\?xml))(?:[\x09\x0A\x0D\x20]+(?:[^\x01-\x08\x0B\x0C\x0E-\x1F\x3F]|\?(?!>))*)?\?>|<!--(?:[^\x01-\x08\x0B\x0C\x0E-\x1F\x2D]|-(?!-))*-->)|(?:%[:_A-z][-.0-9:_A-z\xB7]*;|[\x09\x0A\x0D\x20]+))*][\x09\x0A\x0D\x20]*)?>"#).unwrap();
let string = "<appointments>
<event date=\"03-05-02\" start-time=\"09:00\" end-time=\"10:00\">
<type>Meeting</type>
<title>Staff Meeting</title>
<description>Weekly staff meeting</description>
<location>Conference Room</location>
<reminder status=\"no\"/>
</event>
<event date=\"03-06-02\" start-time=\"14:00\" end-time=\"15:00\">
<type>Interview</type>
<title>Developer Interview</title>
<description>Interview new developer candidate.</description>
<location>Office</location>
<reminder status=\"yes\" interval=\"15-min\" method=\"ICQ\"/>
</event>
<event date=\"03-15-02\" start-time=\"13:45\" end-time=\"15:00\">
<type>Dentist</type>
<title>Root Canal</title>
<description>Root canal on lower left molar.</description>
<location>Dr. Scrivello's Office</location>
<reminder status=\"yes\" interval=\"1-day\" method=\"e-mail\"/>
</event>
</appointments>";
// result will be an iterator over tuples containing the start and end indices for each match in the string
let result = regex.captures_iter(string);
for mat in result {
println!("{:?}", mat);
}
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Rust, please visit: https://docs.rs/regex/latest/regex/