# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"([a-zA-Z,.:;\s]+)"
test_str = (" Cross, Kumaraswamy Layout Bangalore-560078, India \n"
"Date of Birth: 17 January, 1990 Phone: 9916970905 Email: neelmani.nm@gmail.com \n"
" \n"
"Seeking Data Scientist position \n"
"SYNOPSIS: * \n"
" Proficient in programming, statistical analysis, data visualization in R \n"
" On-time delivery with utmost quality \n"
" Data driven solution for business problems \n"
" Remarkable ability to identify, extract, analyze and disseminate statistical analysis \n"
" Highly motivated to learn and grow in Data Science \n"
" \n"
"PROFESSIONAL EXPERIENCE \n"
" \n"
"ORGANIZATION: Flutura Data \n"
"Science and Analytics \n"
" \n"
"Category: Service provider \n"
"Domain: Energy, Online Search \n"
"Designation: Data Scientist (Apr 2015 – July 2015) \n"
" \n"
"Role and Responsibilities \n"
" Understanding project requirements \n"
" Getting and Cleaning the data from different data sources using R and Python \n"
" Exploratory data analysis, Data Mining, Data Modelling \n"
" Insight generation and recommendations \n"
" \n"
"ORGANIZATION: Trianz Holding \n"
"Pvt. Ltd. \n"
" \n"
"Category: Service provider \n"
"Domain: e-Governance, Security and \n"
"Protection, Maritime Logistics \n"
"Designation: Software Engineer (July 2012 – Sep 2014) \n"
" \n"
"Role and Responsibilities \n"
" Understanding and gathering project requirements \n"
" Gathering information regarding different data sources \n"
" Identification and conversion of data for analytical modeling \n"
" Report development on the insights of the data \n"
" Migration of data between different data storage location \n"
" Insight generation and recommendations \n"
" \n"
" \n"
"EDUCATION \n"
" \n"
"Dayananda Sagar College of Engineering, \n"
"Bangalore \n"
"Bachelor of Engineering (B.E), June 2012 \n"
"Major: Telecom., 64.5% \n"
"CBSE – 2006 (XIIth ) – Guru Govind Singh Public School, Bokaro - 73% \n"
"CBSE – 2004 (Xth ) – Jeevan Deep Public School, Nawada - 84% \n"
" \n"
" \n"
" \n"
" \n"
" \n"
" \n"
" SKILLS \n"
" \n"
"Proficient in R, SQL, MS EXCEL, Pentaho and analytical techniques like linear regression, logistic regression, Hypothesis \n"
"testing, chi square test, ANOVA etc \n"
"Exposer of Python, Qlikview, Tableau \n"
" \n"
" \n"
" CERTIFICATIONS \n"
" \n"
" Foundation of Data Science, University of Texas (MOOC Programme) \n"
" The Data Scientist’s Toolbox, Jhons Hopkins University (Coursera verified certificate) \n"
" R Programming, Jhons Hopkins University (Coursera verified certificate) \n"
" Getting and Cleaning Data, Jhons Hopkins University (Coursera verified certificate) \n"
" Exploratory Data Analysis, Jhons Hopkins University (Coursera verified certificate) \n"
" Reproducibility of Research, Jhons Hopkins University (Coursera verified certificate) \n"
" Statistical Inference, Jhons Hopkins University (Coursera verified certificate) \n"
" Certification in ITIL Foundation \n"
" \n"
"PROJECTS \n"
" \n"
"Online Search:- \n"
"Reports and dashboard for analyzing the user preference for using online search engine. Reports have to be generated for regular \n"
"time interval using R to analyze the trend. \n"
"Energy:- \n"
"Analyzed churn problem by using data from different data sources and logistic regression. A statistical model for predicting the \n"
"future churn. Analyzed the problem and the retail energy domain in US which helped to come up with a statistical model. \n"
"Marine Industry:- \n"
"Gather data from different data sources involving different information regarding the vessel. On top of which analytical insights \n"
"for the vessel performance. \n"
" \n"
"Security and Protection:- \n"
"Fortune 500 company in the securities and protection. Using different data sources centralized reporting system has been \n"
"developed \n"
" \n"
"e-Governance:- \n"
"e-Govenance is a data migration project which is meant to migrate the different department data of the government to a common \n"
"data model. \n")
matches = re.finditer(regex, test_str)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html