# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"%(?P<flag>\#|\+|\-| |0)?((?P<width>[1-9])\.(?P<precision>[1-9])|(?P<widthDefaultPrecision>[1-9])|(?P<widthZeroPrecison>[1-9])\.|\.(?P<precisionDefaultWidth>[1-9]))?(?P<verb>\w{1,9})"
test_str = ("// Copyright 2009 The Go Authors. All rights reserved.\n"
"// Use of this source code is governed by a BSD-style\n"
"// license that can be found in the LICENSE file.\n\n"
"/*\n"
" Package fmt implements formatted I/O with functions analogous\n"
" to C's printf and scanf. The format 'verbs' are derived from C's but\n"
" are simpler.\n\n\n"
" Printing\n\n"
" The verbs:\n\n"
" General:\n"
" %v the value in a default format\n"
" when printing structs, the plus flag (%+v) adds field names\n"
" %#v a Go-syntax representation of the value\n"
" %T a Go-syntax representation of the type of the value\n"
" %% a literal percent sign; consumes no value\n\n"
" Boolean:\n"
" %t the word true or false\n"
" Integer:\n"
" %b base 2\n"
" %c the character represented by the corresponding Unicode code point\n"
" %d base 10\n"
" %o base 8\n"
" %O base 8 with 0o prefix\n"
" %q a single-quoted character literal safely escaped with Go syntax.\n"
" %x base 16, with lower-case letters for a-f\n"
" %X base 16, with upper-case letters for A-F\n"
" %U Unicode format: U+1234; same as \"U+%04X\"\n"
" Floating-point and complex constituents:\n"
" %b decimalless scientific notation with exponent a power of two,\n"
" in the manner of strconv.FormatFloat with the 'b' format,\n"
" e.g. -123456p-78\n"
" %e scientific notation, e.g. -1.234456e+78\n"
" %E scientific notation, e.g. -1.234456E+78\n"
" %f decimal point but no exponent, e.g. 123.456\n"
" %F synonym for %f\n"
" %g %e for large exponents, %f otherwise. Precision is discussed below.\n"
" %G %E for large exponents, %F otherwise\n"
" %x hexadecimal notation (with decimal power of two exponent), e.g. -0x1.23abcp+20\n"
" %X upper-case hexadecimal notation, e.g. -0X1.23ABCP+20\n"
" String and slice of bytes (treated equivalently with these verbs):\n"
" %s the uninterpreted bytes of the string or slice\n"
" %q a double-quoted string safely escaped with Go syntax\n"
" %x base 16, lower-case, two characters per byte\n"
" %X base 16, upper-case, two characters per byte\n"
" Slice:\n"
" %p address of 0th element in base 16 notation, with leading 0x\n"
" Pointer:\n"
" %p base 16 notation, with leading 0x\n"
" The %b, %d, %o, %x and %X verbs also work with pointers,\n"
" formatting the value exactly as if it were an integer.\n\n"
" The default format for %v is:\n"
" bool: %t\n"
" int, int8 etc.: %d\n"
" uint, uint8 etc.: %d, %#x if printed with %#v\n"
" float32, complex64, etc: %g\n"
" string: %s\n"
" chan: %p\n"
" pointer: %p\n"
" For compound objects, the elements are printed using these rules, recursively,\n"
" laid out like this:\n"
" struct: {field0 field1 ...}\n"
" array, slice: [elem0 elem1 ...]\n"
" maps: map[key1:value1 key2:value2 ...]\n"
" pointer to above: &{}, &[], &map[]\n\n"
" Width is specified by an optional decimal number immediately preceding the verb.\n"
" If absent, the width is whatever is necessary to represent the value.\n"
" Precision is specified after the (optional) width by a period followed by a\n"
" decimal number. If no period is present, a default precision is used.\n"
" A period with no following number specifies a precision of zero.\n"
" Examples:\n"
" %f default width, default precision\n"
" %9f width 9, default precision\n"
" %.2f default width, precision 2\n"
" %9.2f width 9, precision 2\n"
" %9.f width 9, precision 0\n\n"
" Width and precision are measured in units of Unicode code points,\n"
" that is, runes. (This differs from C's printf where the\n"
" units are always measured in bytes.) Either or both of the flags\n"
" may be replaced with the character '*', causing their values to be\n"
" obtained from the next operand (preceding the one to format),\n"
" which must be of type int.\n\n"
" For most values, width is the minimum number of runes to output,\n"
" padding the formatted form with spaces if necessary.\n\n"
" For strings, byte slices and byte arrays, however, precision\n"
" limits the length of the input to be formatted (not the size of\n"
" the output), truncating if necessary. Normally it is measured in\n"
" runes, but for these types when formatted with the %x or %X format\n"
" it is measured in bytes.\n\n"
" For floating-point values, width sets the minimum width of the field and\n"
" precision sets the number of places after the decimal, if appropriate,\n"
" except that for %g/%G precision sets the maximum number of significant\n"
" digits (trailing zeros are removed). For example, given 12.345 the format\n"
" %6.3f prints 12.345 while %.3g prints 12.3. The default precision for %e, %f\n"
" and %#g is 6; for %g it is the smallest number of digits necessary to identify\n"
" the value uniquely.\n\n"
" For complex numbers, the width and precision apply to the two\n"
" components independently and the result is parenthesized, so %f applied\n"
" to 1.2+3.4i produces (1.200000+3.400000i).\n\n"
" Other flags:\n"
" + always print a sign for numeric values;\n"
" guarantee ASCII-only output for %q (%+q)\n"
" - pad with spaces on the right rather than the left (left-justify the field)\n"
" # alternate format: add leading 0b for binary (%#b), 0 for octal (%#o),\n"
" 0x or 0X for hex (%#x or %#X); suppress 0x for %p (%#p);\n"
" for %q, print a raw (backquoted) string if strconv.CanBackquote\n"
" returns true;\n"
" always print a decimal point for %e, %E, %f, %F, %g and %G;\n"
" do not remove trailing zeros for %g and %G;\n"
" write e.g. U+0078 'x' if the character is printable for %U (%#U).\n"
" ' ' (space) leave a space for elided sign in numbers (% d);\n"
" put spaces between bytes printing strings or slices in hex (% x, % X)\n"
" 0 pad with leading zeros rather than spaces;\n"
" for numbers, this moves the padding after the sign\n\n"
" Flags are ignored by verbs that do not expect them.\n"
" For example there is no alternate decimal format, so %#d and %d\n"
" behave identically.\n\n"
" For each Printf-like function, there is also a Print function\n"
" that takes no format and is equivalent to saying %v for every\n"
" operand. Another variant Println inserts blanks between\n"
" operands and appends a newline.\n\n"
" Regardless of the verb, if an operand is an interface value,\n"
" the internal concrete value is used, not the interface itself.\n"
" Thus:\n"
" var i interface{} = 23\n"
" fmt.Printf(\"%v\\n\", i)\n"
" will print 23.\n\n"
" Except when printed using the verbs %T and %p, special\n"
" formatting considerations apply for operands that implement\n"
" certain interfaces. In order of application:\n\n"
" 1. If the operand is a reflect.Value, the operand is replaced by the\n"
" concrete value that it holds, and printing continues with the next rule.\n\n"
" 2. If an operand implements the Formatter interface, it will\n"
" be invoked. Formatter provides fine control of formatting.\n\n"
" 3. If the %v verb is used with the # flag (%#v) and the operand\n"
" implements the GoStringer interface, that will be invoked.\n\n"
" If the format (which is implicitly %v for Println etc.) is valid\n"
" for a string (%s %q %v %x %X), the following two rules apply:\n\n"
" 4. If an operand implements the error interface, the Error method\n"
" will be invoked to convert the object to a string, which will then\n"
" be formatted as required by the verb (if any).\n\n"
" 5. If an operand implements method String() string, that method\n"
" will be invoked to convert the object to a string, which will then\n"
" be formatted as required by the verb (if any).\n\n"
" For compound operands such as slices and structs, the format\n"
" applies to the elements of each operand, recursively, not to the\n"
" operand as a whole. Thus %q will quote each element of a slice\n"
" of strings, and %6.2f will control formatting for each element\n"
" of a floating-point array.\n\n"
" However, when printing a byte slice with a string-like verb\n"
" (%s %q %x %X), it is treated identically to a string, as a single item.\n\n"
" To avoid recursion in cases such as\n"
" type X string\n"
" func (x X) String() string { return Sprintf(\"<%s>\", x) }\n"
" convert the value before recurring:\n"
" func (x X) String() string { return Sprintf(\"<%s>\", string(x)) }\n"
" Infinite recursion can also be triggered by self-referential data\n"
" structures, such as a slice that contains itself as an element, if\n"
" that type has a String method. Such pathologies are rare, however,\n"
" and the package does not protect against them.\n\n"
" When printing a struct, fmt cannot and therefore does not invoke\n"
" formatting methods such as Error or String on unexported fields.\n\n"
" Explicit argument indexes:\n\n"
" In Printf, Sprintf, and Fprintf, the default behavior is for each\n"
" formatting verb to format successive arguments passed in the call.\n"
" However, the notation [n] immediately before the verb indicates that the\n"
" nth one-indexed argument is to be formatted instead. The same notation\n"
" before a '*' for a width or precision selects the argument index holding\n"
" the value. After processing a bracketed expression [n], subsequent verbs\n"
" will use arguments n+1, n+2, etc. unless otherwise directed.\n\n"
" For example,\n"
" fmt.Sprintf(\"%[2]d %[1]d\\n\", 11, 22)\n"
" will yield \"22 11\", while\n"
" fmt.Sprintf(\"%[3]*.[2]*[1]f\", 12.0, 2, 6)\n"
" equivalent to\n"
" fmt.Sprintf(\"%6.2f\", 12.0)\n"
" will yield \" 12.00\". Because an explicit index affects subsequent verbs,\n"
" this notation can be used to print the same values multiple times\n"
" by resetting the index for the first argument to be repeated:\n"
" fmt.Sprintf(\"%d %d %#[1]x %#x\", 16, 17)\n"
" will yield \"16 17 0x10 0x11\".\n\n"
" Format errors:\n\n"
" If an invalid argument is given for a verb, such as providing\n"
" a string to %d, the generated string will contain a\n"
" description of the problem, as in these examples:\n\n"
" Wrong type or unknown verb: %!verb(type=value)\n"
" Printf(\"%d\", \"hi\"): %!d(string=hi)\n"
" Too many arguments: %!(EXTRA type=value)\n"
" Printf(\"hi\", \"guys\"): hi%!(EXTRA string=guys)\n"
" Too few arguments: %!verb(MISSING)\n"
" Printf(\"hi%d\"): hi%!d(MISSING)\n"
" Non-int for width or precision: %!(BADWIDTH) or %!(BADPREC)\n"
" Printf(\"%*s\", 4.5, \"hi\"): %!(BADWIDTH)hi\n"
" Printf(\"%.*s\", 4.5, \"hi\"): %!(BADPREC)hi\n"
" Invalid or invalid use of argument index: %!(BADINDEX)\n"
" Printf(\"%*[2]d\", 7): %!d(BADINDEX)\n"
" Printf(\"%.[2]d\", 7): %!d(BADINDEX)\n\n"
" All errors begin with the string \"%!\" followed sometimes\n"
" by a single character (the verb) and end with a parenthesized\n"
" description.\n\n"
" If an Error or String method triggers a panic when called by a\n"
" print routine, the fmt package reformats the error message\n"
" from the panic, decorating it with an indication that it came\n"
" through the fmt package. For example, if a String method\n"
" calls panic(\"bad\"), the resulting formatted message will look\n"
" like\n"
" %!s(PANIC=bad)\n\n"
" The %!s just shows the print verb in use when the failure\n"
" occurred. If the panic is caused by a nil receiver to an Error\n"
" or String method, however, the output is the undecorated\n"
" string, \"<nil>\".\n\n"
" Scanning\n\n"
" An analogous set of functions scans formatted text to yield\n"
" values. Scan, Scanf and Scanln read from os.Stdin; Fscan,\n"
" Fscanf and Fscanln read from a specified io.Reader; Sscan,\n"
" Sscanf and Sscanln read from an argument string.\n\n"
" Scan, Fscan, Sscan treat newlines in the input as spaces.\n\n"
" Scanln, Fscanln and Sscanln stop scanning at a newline and\n"
" require that the items be followed by a newline or EOF.\n\n"
" Scanf, Fscanf, and Sscanf parse the arguments according to a\n"
" format string, analogous to that of Printf. In the text that\n"
" follows, 'space' means any Unicode whitespace character\n"
" except newline.\n\n"
" In the format string, a verb introduced by the % character\n"
" consumes and parses input; these verbs are described in more\n"
" detail below. A character other than %, space, or newline in\n"
" the format consumes exactly that input character, which must\n"
" be present. A newline with zero or more spaces before it in\n"
" the format string consumes zero or more spaces in the input\n"
" followed by a single newline or the end of the input. A space\n"
" following a newline in the format string consumes zero or more\n"
" spaces in the input. Otherwise, any run of one or more spaces\n"
" in the format string consumes as many spaces as possible in\n"
" the input. Unless the run of spaces in the format string\n"
" appears adjacent to a newline, the run must consume at least\n"
" one space from the input or find the end of the input.\n\n"
" The handling of spaces and newlines differs from that of C's\n"
" scanf family: in C, newlines are treated as any other space,\n"
" and it is never an error when a run of spaces in the format\n"
" string finds no spaces to consume in the input.\n\n"
" The verbs behave analogously to those of Printf.\n"
" For example, %x will scan an integer as a hexadecimal number,\n"
" and %v will scan the default representation format for the value.\n"
" The Printf verbs %p and %T and the flags # and + are not implemented.\n"
" For floating-point and complex values, all valid formatting verbs\n"
" (%b %e %E %f %F %g %G %x %X and %v) are equivalent and accept\n"
" both decimal and hexadecimal notation (for example: \"2.3e+7\", \"0x4.5p-8\")\n"
" and digit-separating underscores (for example: \"3.14159_26535_89793\").\n\n"
" Input processed by verbs is implicitly space-delimited: the\n"
" implementation of every verb except %c starts by discarding\n"
" leading spaces from the remaining input, and the %s verb\n"
" (and %v reading into a string) stops consuming input at the first\n"
" space or newline character.\n\n"
" The familiar base-setting prefixes 0b (binary), 0o and 0 (octal),\n"
" and 0x (hexadecimal) are accepted when scanning integers\n"
" without a format or with the %v verb, as are digit-separating\n"
" underscores.\n\n"
" Width is interpreted in the input text but there is no\n"
" syntax for scanning with a precision (no %5.2f, just %5f).\n"
" If width is provided, it applies after leading spaces are\n"
" trimmed and specifies the maximum number of runes to read\n"
" to satisfy the verb. For example,\n"
" Sscanf(\" 1234567 \", \"%5s%d\", &s, &i)\n"
" will set s to \"12345\" and i to 67 while\n"
" Sscanf(\" 12 34 567 \", \"%5s%d\", &s, &i)\n"
" will set s to \"12\" and i to 34.\n\n"
" In all the scanning functions, a carriage return followed\n"
" immediately by a newline is treated as a plain newline\n"
" (\\r\\n means the same as \\n).\n\n"
" In all the scanning functions, if an operand implements method\n"
" Scan (that is, it implements the Scanner interface) that\n"
" method will be used to scan the text for that operand. Also,\n"
" if the number of arguments scanned is less than the number of\n"
" arguments provided, an error is returned.\n\n"
" All arguments to be scanned must be either pointers to basic\n"
" types or implementations of the Scanner interface.\n\n"
" Like Scanf and Fscanf, Sscanf need not consume its entire input.\n"
" There is no way to recover how much of the input string Sscanf used.\n\n"
" Note: Fscan etc. can read one character (rune) past the input\n"
" they return, which means that a loop calling a scan routine\n"
" may skip some of the input. This is usually a problem only\n"
" when there is no space between input values. If the reader\n"
" provided to Fscan implements ReadRune, that method will be used\n"
" to read characters. If the reader also implements UnreadRune,\n"
" that method will be used to save the character and successive\n"
" calls will not lose data. To attach ReadRune and UnreadRune\n"
" methods to a reader without that capability, use\n"
" bufio.NewReader.\n"
"*/\n"
"package fmt")
matches = re.finditer(regex, test_str, re.MULTILINE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html