# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"(\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2}),(\d{3})\sINFO\sSYSTEM:\s+Command\s+(\w+)\s+returned:\s+(.*?)(?=\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2},\d{3}|\Z)"
test_str = ("2023-04-21 22:35:47,125 INFO NEXT ACTION: COMMAND = google ARGUMENTS = {'query': 'predictive-analytics-lab Protein-DNA-structure-prediction'}\n"
"2023-04-21 22:35:50,404 INFO SYSTEM: Command google returned: [\n"
" {\n"
" \"title\": \"Protein-structure prediction revolutionized\",\n"
" \"href\": \"https://www.nature.com/articles/d41586-021-02265-4\",\n"
" \"body\": \"Protein-structure prediction revolutionized. The full might of a world-leading artificial-intelligence laboratory has been brought to bear on protein-structure prediction. The resulting method ...\"\n"
" },\n"
" {\n"
" \"title\": \"Improved protein structure prediction using predicted ... - PNAS\",\n"
" \"href\": \"https://www.pnas.org/doi/10.1073/pnas.1914677117\",\n"
" \"body\": \"Protein structure prediction is a longstanding challenge in computational biology. Through extension of deep learning-based prediction to interresidue orientations in addition to distances, and the development of a constrained optimization by Rosetta, we show that more accurate models can be generated. Results on a set of 18 de novo-designed ...\"\n"
" },\n"
" {\n"
" \"title\": \"Method of the Year 2021: Protein structure prediction\",\n"
" \"href\": \"https://www.nature.com/articles/s41592-021-01380-4\",\n"
" \"body\": \"For these remarkable achievements, we have chosen protein structure prediction as the Method of the Year 2021. The 3D shape of a protein dictates its biological function and provides vital ...\"\n"
" },\n"
" {\n"
" \"title\": \"Artificial Intelligence Accurately Predicts Protein Folding\",\n"
" \"href\": \"https://directorsblog.nih.gov/2021/07/27/artificial-intelligence-accurately-predicts-protein-folding/\",\n"
" \"body\": \"But the ability to predict a protein's precise structure or shape from its sequence alone had proven to be a difficult problem to solve despite decades of effort. In search of a solution, research teams from around the world have come together every two years since 1994 at the Critical Assessment of Structure Prediction (CASP) meetings.\"\n"
" },\n"
" {\n"
" \"title\": \"Deep learning methods in protein structure prediction - PubMed\",\n"
" \"href\": \"https://pubmed.ncbi.nlm.nih.gov/32612753/\",\n"
" \"body\": \"Protein Structure Prediction is a central topic in Structural Bioinformatics. Since the '60s statistical methods, followed by increasingly complex Machine Learning and recently Deep Learning methods, have been employed to predict protein structural information at various levels of detail. In this re …\"\n"
" },\n"
" {\n"
" \"title\": \"PredictProtein - Predicting Protein Structure and Function for 29 Years ...\",\n"
" \"href\": \"https://academic.oup.com/nar/article/49/W1/W535/6276913\",\n"
" \"body\": \"Abstract. Since 1992 PredictProtein (https://predictprotein.org) is a one-stop online resource for protein sequence analysis with its main site hosted at the Luxembourg Centre for Systems Biomedicine (LCSB) and queried monthly by over 3,000 users in 2020. PredictProtein was the first Internet server for protein predictions. It pioneered combining evolutionary information and machine learning.\"\n"
" },\n"
" {\n"
" \"title\": \"Protein Structure Prediction | DNASTAR\",\n"
" \"href\": \"https://www.dnastar.com/workflows/protein-structure-prediction/\",\n"
" \"body\": \"CALL 866.511.5090. Create highly accurate protein models, unattainable through standard modeling methodologies. Protein 3D structure prediction from a unique amino acid sequence is so important because having a protein structure model provides a greater level of understanding of how a protein works, which can allow us to create hypotheses about ...\"\n"
" },\n"
" {\n"
" \"title\": \"Epitope Prediction Using Protean 3D | DNASTAR\",\n"
" \"href\": \"https://www.dnastar.com/workflows/epitope-prediction/\",\n"
" \"body\": \"CALL 866.511.5090. Use Protean 3D to accurately perform B-cell epitope prediction using only sequence-based information. B-cell epitopes-the part of an antigen recognized by an antibody-are conformational in nature; however, accurate epitope prediction is difficult for proteins without a known 3D structure.\"\n"
" }\n"
"]\n"
"2023-04-21 22:19:51,704 INFO THOUGHTS: Based on the search, I would like to analyze the performance of the existing methods, and see which ones could be improved upon. That way we can decide which method to use as a starting point for our novel method\n"
"2023-04-21 22:19:52,273 INFO REASONING: Comparing the performance of the existing methods can give us insight into their limitations and strengths, helping us decide how to design a better method\n"
"2023-04-21 22:19:52,761 INFO PLAN: ")
matches = re.finditer(regex, test_str, re.MULTILINE | re.DOTALL)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html