# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"(\sdoi: 10\.\d{1,50}/\S+\..)"
test_str = ("Reliability and validity of the French version of the global physical activity questionnaire.\",\"/pubmed/30356654\",\"Rivière F, Widad FZ, Speyer E, Erpelding ML, Escalon H, Vuillemin A.\",\"J Sport Health Sci. 2018 Jul;7(3):339-345. doi: 10.1016/j.jshs.2016.08.004. Epub 2016 Aug 15.\",\"J Sport Health Sci. 2018\",\"PubMed\",\"citation\",\"PMID:30356654 | PMCID:PMC6189276\",\"pubmed\",\"30356654\",\"create date:2018/10/26 | first author:Rivière F\n"
"jbfkjezfkzjbkzvf doi: 10.3390/pharmaceutics8020011.\",\"Pharmaceutics.\n"
"khfkefkzefhzfe doi: 10.18632/oncotarget.8210.\",\"Oncotarget.\n"
"jkkgkg doi: 10.3390/molecules21091123.\",\"Molecules.\n"
"(3). pii: E27. doi: 10.3390/pharmacy4030027.\",\"Pharmacy (Basel). 2016\",\"PubMed\",\"citation\",\"PMID:28970400 | PMCID:PMC5419365\",\"pubmed\",\"28970400\",\"create date:2017/10/04 | first author:Atkinson J\",\n"
"63(2):175-183. doi: 10.1093/cz/zow048. Epub 2016 Apr 22.\",\"Curr Zool. 2017\",\"PubMed\",\"citation\",\"PMID:29491975 | PMCID:PMC5804161\",\"pubmed\",\"29491975\",\"create date:2018/03/02 | first author:Colchen T\",\n"
"51:209-216. doi: 10.1515/hukin-2015-0184. eCollection 2016 Jun 1.\",\"J Hum Kinet. 2016\",\"PubMed\",\"citation\",\"PMID:28149384 | PMCID:PMC5260564\",\"pubmed\",\"28149384\",\"create date:2017/02/06 | first author:Rouis M\",\n"
"11(12):e0168349. doi: 10.1371/journal.pone.0168349. eCollection 2016.\",\"PLoS One. 2016\",\"PubMed\",\"citation\",\"PMID:28036335 | PMCID:PMC5201304\",\"pubmed\",\"28036335\",\"create date:2016/12/31 | first author:Huttin O\",\n"
"14(9):1124-1137. doi: 10.1080/15476286.2016.1251543. Epub 2016 Oct 28. Review.\",\"RNA Biol. 2017\",\"PubMed\",\"citation\",\"PMID:27791472 | PMCID:PMC5699547\",\"pubmed\",\"27791472\",\"create date:2016/10/30 | first author:Schwartz S\",\n"
"66(1):39-47. doi: 10.1016/j.jhep.2016.08.021. Epub 2016 Sep 10.\",\"J Hepatol. 2017\",\"PubMed\",\"citation\",\"PMID:27622858\",\"pubmed\",\"27622858\",\"create date:2016/09/14 | first author:Pol S\",")
matches = re.finditer(regex, test_str, re.MULTILINE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html