# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"\b(10\.[0-9]{4,}(?:\.[0-9]+)*\/(?:(?![\"&\'])\S)+)\b"
test_str = ("Herbst, D. M., Griffith, N. R., & Slama, K. M. (2014). Rodeo cowboys: Conforming to masculine norms and help-seeking behaviors for depression. Journal of Rural Mental Health, 38, 20–35. http://dx.doi.org/10.1037/rmh0000008\n\n"
"Herbst, D. M., Griffith, N. R., & Slama, K. M. (2014). Rodeo cowboys: Conforming to masculine norms and help- seeking behaviors for depression. Journal of Rural Mental Health, 38, 20–35. doi:10.1037/rmh0000008\n\n"
"Example: https://doi.org/10.5468/ogs.2016.59.1.1\n\n"
"\"Quantum tomography: Measured measurement\", Markus Aspelmeyer, nature physics January 2009, Volume 5, No 1, pp11-12; [doi:10.10.1038/nphys1170]\n\n"
"Held, Gilbert. Internetworking LANs and WANs (Second Edition), John Wiley & Sons, 1998, Published Online 05 Oct 2001. Chapter 1, Network Concepts (p 1-30) [doi:10.1002/0470841559.ch1]\n\n"
"Irino, T; Tada, R (2009): Chemical and mineral compositions of sediments from ODP Site 127-797. Geological Institute, University of Tokyo. [doi:10.1594/PANGAEA.726855]\n"
"Earthquake Event, Authored by Automated System:\n\n"
"Geofon operator (2009): GEOFON event gfz2009kciu (NW Balkan Region) GeoForschungsZentrum Potsdam(GFZ). [doi:10.1594/GFZ.GEOFON.gfz2009kciu]\n\n"
"Kraus, Stefan; del Valle, Rodolfo (2008): Geological map of Potter Peninsula (King George Island, South Shetland Islands, Antarctic Peninsula). Instituto Antártico Chileno, Punta Arenas, Chile & Instituto Antártico Argentino, Buenos Aires, Argentina. [doi:10.1594/PANGAEA.667386]\n\n"
"B. Kirchhof (2009) Silicone oil bubbles entrapped in the vitreous base during silicone oil removal, Video Journal of Vitreoretinal Surgery. [doi: 10.3207/2959859860]")
matches = re.finditer(regex, test_str, re.IGNORECASE | re.MULTILINE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html