re = /(?P<Element><(?P<TagName>[:_A-z][-.0-9:_A-z\xB7]*)(?:[\x09\x0A\x0D\x20]+[:_A-z][-.0-9:_A-z\xB7]*[\x09\x0A\x0D\x20]*=[\x09\x0A\x0D\x20]*(?:"(?:[^<&"]|&(?:[:_A-z][-.0-9:_A-z\xB7]*|#(?:[0-9]+|x[0-9a-fA-F]+));)*"|'(?:[^<&']|&(?:[:_A-z][-.0-9:_A-z\xB7]*|#(?:[0-9]+|x[0-9a-fA-F]+));)*'))*[\x09\x0A\x0D\x20]*(?:>(?:(?:[^<&\]]|](?!]>))*(?:(?:(?P>Element)|&(?:[:_A-z][-.0-9:_A-z\xB7]*|#(?:[0-9]+|x[0-9a-fA-F]+));|<!\[CDATA\[(?:[^\x01-\x08\x0B\x0C\x0E-\x1F\x5D]|](?!]>))*]]>|<\?[:_A-z][-.0-9:_A-z\xB7]*(?<!(?i:\?xml))(?:[\x09\x0A\x0D\x20]+(?:[^\x01-\x08\x0B\x0C\x0E-\x1F\x3F]|\?(?!>))*)?\?>|<!--(?:[^\x01-\x08\x0B\x0C\x0E-\x1F\x2D]|-(?!-))*-->)(?:[^<&\]]|](?!]>))*)*)<\/(?P=TagName)[\x09\x0A\x0D\x20]*|\/)>)|&(?:[:_A-z][-.0-9:_A-z\xB7]*|#(?:[0-9]+|x[0-9a-fA-F]+));|<!\[CDATA\[(?:[^\x01-\x08\x0B\x0C\x0E-\x1F\x5D]|](?!]>))*]]>|<\?[:_A-z][-.0-9:_A-z\xB7]*(?<!(?i:\?xml))(?:[\x09\x0A\x0D\x20]+(?:[^\x01-\x08\x0B\x0C\x0E-\x1F\x3F]|\?(?!>))*)?\?>|<!--(?:[^\x01-\x08\x0B\x0C\x0E-\x1F\x2D]|-(?!-))*-->|<!DOCTYPE[\x09\x0A\x0D\x20]+[:_A-z][-.0-9:_A-z\xB7]*(?:[\x09\x0A\x0D\x20]+(?:SYSTEM[\x09\x0A\x0D\x20]+(?:"[^"]*"|'[^']*')|PUBLIC[\x09\x0A\x0D\x20]+(?:"[\x0A\x0D\x20\x21\x23-\x25\x27-\x2F\x3A\x3B\x3D\x3F\x40_0-9A-z]*"|'[\x0A\x0D\x20\x21\x23-\x25\x28-\x2F\x3A\x3B\x3D\x3F\x40_0-9A-z]*')[\x09\x0A\x0D\x20]+(?:"[^"]*"|'[^']*')))?[\x09\x0A\x0D\x20]*(?:\[(?:(?:<!ELEMENT[\x09\x0A\x0D\x20]+[:_A-z][-.0-9:_A-z\xB7]*[\x09\x0A\x0D\x20]+(?:EMPTY|ANY|\([\x09\x0A\x0D\x20]*#PCDATA(?:(?:[\x09\x0A\x0D\x20]*\|[\x09\x0A\x0D\x20]*[:_A-z][-.0-9:_A-z\xB7]*)*[\x09\x0A\x0D\x20]*\)\*|[\x09\x0A\x0D\x20]*\))|(?:(?P<choice>\([\x09\x0A\x0D\x20]*(?:[:_A-z][-.0-9:_A-z\xB7]*|(?P>choice)|(?P>seq))[?*+]?(?:[\x09\x0A\x0D\x20]*\|[\x09\x0A\x0D\x20]*(?:[:_A-z][-.0-9:_A-z\xB7]*|(?P>choice)|(?P>seq))[?*+]?)+[\x09\x0A\x0D\x20]*\))|(?P<seq>\([\x09\x0A\x0D\x20]*(?:[:_A-z][-.0-9:_A-z\xB7]*|(?P>choice)|(?P>seq))[?*+]?(?:[\x09\x0A\x0D\x20]*,[\x09\x0A\x0D\x20]*(?:[:_A-z][-.0-9:_A-z\xB7]*|(?P>choice)|(?P>seq))[?*+]?)*[\x09\x0A\x0D\x20]*\)))[?*+]?)[\x09\x0A\x0D\x20]*>|<!ATTLIST[\x09\x0A\x0D\x20]+[:_A-z][-.0-9:_A-z\xB7]*(?:[\x09\x0A\x0D\x20]+[:_A-z][-.0-9:_A-z\xB7]*[\x09\x0A\x0D\x20]+(?:CDATA|(?:ID(?:REFS?)?|ENTIT(?:Y|IES)|NMTOKENS?)|(?:NOTATION[\x09\x0A\x0D\x20]+\([\x09\x0A\x0D\x20]*[:_A-z][-.0-9:_A-z\xB7]*(?:[\x09\x0A\x0D\x20]*\|[\x09\x0A\x0D\x20]*[:_A-z][-.0-9:_A-z\xB7]*)*[\x09\x0A\x0D\x20]*\)|\([\x09\x0A\x0D\x20]*(?:[-.0-9:_A-z\xB7])+(?:[\x09\x0A\x0D\x20]*\|[\x09\x0A\x0D\x20]*(?:[-.0-9:_A-z\xB7])+)*[\x09\x0A\x0D\x20]*\)))[\x09\x0A\x0D\x20]+(?:#(?:REQUIRED|IMPLIED)|(?:#FIXED[\x09\x0A\x0D\x20]+)?(?:"(?:[^<&"]|&(?:[:_A-z][-.0-9:_A-z\xB7]*|#(?:[0-9]+|x[0-9a-fA-F]+));)*"|'(?:[^<&']|&(?:[:_A-z][-.0-9:_A-z\xB7]*|#(?:[0-9]+|x[0-9a-fA-F]+));)*')))*[\x09\x0A\x0D\x20]*>|(?:<!ENTITY[\x09\x0A\x0D\x20]+[:_A-z][-.0-9:_A-z\xB7]*[\x09\x0A\x0D\x20]+(?:(?:"(?:[^%&"]|%[:_A-z][-.0-9:_A-z\xB7]*;|&(?:[:_A-z][-.0-9:_A-z\xB7]*|#(?:[0-9]+|x[0-9a-fA-F]+));)*"|'(?:[^%&']|%[:_A-z][-.0-9:_A-z\xB7]*;|&(?:[:_A-z][-.0-9:_A-z\xB7]*|#(?:[0-9]+|x[0-9a-fA-F]+));)*')|(?:SYSTEM[\x09\x0A\x0D\x20]+(?:"[^"]*"|'[^']*')|PUBLIC[\x09\x0A\x0D\x20]+(?:"[\x0A\x0D\x20\x21\x23-\x25\x27-\x2F\x3A\x3B\x3D\x3F\x40_0-9A-z]*"|'[\x0A\x0D\x20\x21\x23-\x25\x28-\x2F\x3A\x3B\x3D\x3F\x40_0-9A-z]*')[\x09\x0A\x0D\x20]+(?:"[^"]*"|'[^']*'))(?:[\x09\x0A\x0D\x20]+NDATA[\x09\x0A\x0D\x20]+[:_A-z][-.0-9:_A-z\xB7]*)?)[\x09\x0A\x0D\x20]*>|<!ENTITY[\x09\x0A\x0D\x20]+%[\x09\x0A\x0D\x20]+[:_A-z][-.0-9:_A-z\xB7]*[\x09\x0A\x0D\x20]+(?:(?:"(?:[^%&"]|%[:_A-z][-.0-9:_A-z\xB7]*;|&(?:[:_A-z][-.0-9:_A-z\xB7]*|#(?:[0-9]+|x[0-9a-fA-F]+));)*"|'(?:[^%&']|%[:_A-z][-.0-9:_A-z\xB7]*;|&(?:[:_A-z][-.0-9:_A-z\xB7]*|#(?:[0-9]+|x[0-9a-fA-F]+));)*')|(?:SYSTEM[\x09\x0A\x0D\x20]+(?:"[^"]*"|'[^']*')|PUBLIC[\x09\x0A\x0D\x20]+(?:"[\x0A\x0D\x20\x21\x23-\x25\x27-\x2F\x3A\x3B\x3D\x3F\x40_0-9A-z]*"|'[\x0A\x0D\x20\x21\x23-\x25\x28-\x2F\x3A\x3B\x3D\x3F\x40_0-9A-z]*')[\x09\x0A\x0D\x20]+(?:"[^"]*"|'[^']*')))[\x09\x0A\x0D\x20]*>)|<!NOTATION[\x09\x0A\x0D\x20]+[:_A-z][-.0-9:_A-z\xB7]*[\x09\x0A\x0D\x20]+(?:(?:SYSTEM[\x09\x0A\x0D\x20]+(?:"[^"]*"|'[^']*')|PUBLIC[\x09\x0A\x0D\x20]+(?:"[\x0A\x0D\x20\x21\x23-\x25\x27-\x2F\x3A\x3B\x3D\x3F\x40_0-9A-z]*"|'[\x0A\x0D\x20\x21\x23-\x25\x28-\x2F\x3A\x3B\x3D\x3F\x40_0-9A-z]*')[\x09\x0A\x0D\x20]+(?:"[^"]*"|'[^']*'))|PUBLIC[\x09\x0A\x0D\x20]+(?:"[\x0A\x0D\x20\x21\x23-\x25\x27-\x2F\x3A\x3B\x3D\x3F\x40_0-9A-z]*"|'[\x0A\x0D\x20\x21\x23-\x25\x28-\x2F\x3A\x3B\x3D\x3F\x40_0-9A-z]*'))[\x09\x0A\x0D\x20]*>|<\?[:_A-z][-.0-9:_A-z\xB7]*(?<!(?i:\?xml))(?:[\x09\x0A\x0D\x20]+(?:[^\x01-\x08\x0B\x0C\x0E-\x1F\x3F]|\?(?!>))*)?\?>|<!--(?:[^\x01-\x08\x0B\x0C\x0E-\x1F\x2D]|-(?!-))*-->)|(?:%[:_A-z][-.0-9:_A-z\xB7]*;|[\x09\x0A\x0D\x20]+))*][\x09\x0A\x0D\x20]*)?>/m
str = '<appointments>
<event date="03-05-02" start-time="09:00" end-time="10:00">
<type>Meeting</type>
<title>Staff Meeting</title>
<description>Weekly staff meeting</description>
<location>Conference Room</location>
<reminder status="no"/>
</event>
<event date="03-06-02" start-time="14:00" end-time="15:00">
<type>Interview</type>
<title>Developer Interview</title>
<description>Interview new developer candidate.</description>
<location>Office</location>
<reminder status="yes" interval="15-min" method="ICQ"/>
</event>
<event date="03-15-02" start-time="13:45" end-time="15:00">
<type>Dentist</type>
<title>Root Canal</title>
<description>Root canal on lower left molar.</description>
<location>Dr. Scrivello\'s Office</location>
<reminder status="yes" interval="1-day" method="e-mail"/>
</event>
</appointments>'
# Print the match result
str.scan(re) do |match|
puts match.to_s
end
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Ruby, please visit: http://ruby-doc.org/core-2.2.0/Regexp.html