using System;
using System.Text.RegularExpressions;
public class Example
{
public static void Main()
{
string pattern = @"(((<(?P<tag>(title|table|li|p|h\d|td|li|pre|div|i|a|b|strong))\b[^>]*?>(((?!<(?P=tag)|<pre|<figure|<img|<h\d|<li\b)[\d\D])){3,}?)(<(\/(?P=tag)|figure|img)[^>]*?>))|((<(?P<tag2>(title|li|p|h\d|td|li|pre|div|i|a|b|strong))\b[^>]*?>(((?!<(?P=tag2)|<pre|<figure|<img|<h\d|<li\b)[\d\D])){3,}?)(?=<))|((?<=>)[^<]{3,}(?=<\/div>)))";
string input = @"<?xml version=""1.0"" encoding=""UTF-8""?>
<!DOCTYPE html>
<html lang=""en"" xmlns=""http://www.w3.org/1999/xhtml"" xmlns:epub=""http://www.idpf.org/2007/ops"">
<head>
<title>Python Text Mining: Perform Text Processing, Word Embedding, Text Classification and Machine Translation</title>
<meta http-equiv=""default-style"" content=""application/xhtml+xml; charset=utf-8"" />
<style>ol{list-style-type:none;}a{text-decoration:none;}</style>
</head>
<body>
<nav id=""toc"" epub:type=""toc"">
<h1>Table of Contents</h1>
<div>jkas<div>fbksafbjksa</div>dfdfdfd</div>
<ol epub:type=""list"">
<li><a href=""cvi.xhtml#cvi"">Cover Page</a></li>
<li><a href=""tp.xhtml#s1"">Title
Page</a></li>
<li><a href=""cop.xhtml"">Copyright Page</a></li>
<li><a href=""ded.xhtml"">Dedication Page</a></li>
<li><a href=""ata.xhtml"">About the Author</a></li>
<li><a href=""fm.xhtml"">About the Reviewer</a></li>
<li><a href=""ack.xhtml"">Acknowledgement</a></li>
<li><a href=""pre.xhtml"">Preface</a></li>
<li><a href=""fm1.xhtml"">Errata</a></li>
<li><a href=""toc.xhtml"">Table of Contents</a></li>
<li><a href=""c01.xhtml"">1. Basic Text Processing Techniques</a>
<ol epub:type=""list"">
<li><a href=""c01.xhtml#s1"">Introduction</a></li>
<li><a href=""c01.xhtml#s2"">Structure</a></li>
<li><a href=""c01.xhtml#s3"">Objectives</a></li>
<li><a href=""c01.xhtml#s4"">Data preparation</a></li>
<li><a href=""c01.xhtml#s5"">Project 1: Twitter data analysis</a>
<ol epub:type=""list"">
<li><a href=""c01.xhtml#s6"">Scraping the data</a></li>
<li><a href=""c01.xhtml#s7"">Data pre-processing</a></li>
<li><a href=""c01.xhtml#s8"">Importing necessary packages</a></li>
<li><a href=""c01.xhtml#s9"">HTML parsing</a></li>
<li><a href=""c01.xhtml#s10"">Removing accented characters</a></li>
<li><a href=""c01.xhtml#s11"">Expanding contractions</a></li>
<li><a href=""c01.xhtml#s12"">Lemmetization and stemming</a>
<ol epub:type=""list"">
<li><a href=""c01.xhtml#s13"">Fail case</a></li></ol></li>
<li><a href=""c01.xhtml#s14"">Removing special characters</a></li>
<li><a href=""c01.xhtml#s15"">Removing stop words</a></li>
<li><a href=""c01.xhtml#s16"">Handling emojis or emoticons</a></li>
<li><a href=""c01.xhtml#s17"">Emoji removal</a></li>
<li><a href=""c01.xhtml#s18"">Text acronym abbreviation</a></li>
<li><a href=""c01.xhtml#s19"">Twitter data processing</a></li>
<li><a href=""c01.xhtml#s20"">Extracting usertags and hashtags</a></li></ol></li>
<li><a href=""c01.xhtml#s21"">Project 2: In-shots data pre-processing</a>
<ol epub:type=""list"">
<li><a href=""c01.xhtml#s22"">Importing the necessary packages</a></li>
<li><a href=""c01.xhtml#s23"">Setting the urls for data extraction</a></li>
<li><a href=""c01.xhtml#s24"">Function to scrape data from the urls</a></li>
<li><a href=""c01.xhtml#s25"">Importing packages</a></li></ol></li>
<li><a href=""c01.xhtml#s26"">Conclusion</a></li>
<li><a href=""c01.xhtml#s27"">Questions</a></li>
<li><a href=""c01.xhtml#s28"">Multiple choice questions</a>
<ol epub:type=""list"">
<li><a href=""c01.xhtml#s29"">Answers</a></li></ol></li></ol></li>
<li><a href=""c02.xhtml"">2. Text to Numbers</a>
<ol epub:type=""list"">
<li><a href=""c02.xhtml#s30"">Introduction</a></li>
<li><a href=""c02.xhtml#s31"">Structure</a></li>
<li><a href=""c02.xhtml#s32"">Objectives</a></li>
<li><a href=""c02.xhtml#s33"">Feature encoding or engineering</a></li>
<li><a href=""c02.xhtml#s34"">One-hot encoding</a>
<ol epub:type=""list"">
<li><a href=""c02.xhtml#s35"">Corpus</a></li>
<li><a href=""c02.xhtml#s36"">Code</a></li>
<li><a href=""c02.xhtml#s37"">Creating the text corpus</a></li>
<li><a href=""c02.xhtml#s38"">Some basic pre-processings</a>
<ol epub:type=""list"">
<li><a href=""c02.xhtml#s39"">Min_df</a></li>
<li><a href=""c02.xhtml#s40"">Max_df</a></li></ol></li>
<li><a href=""c02.xhtml#s41"">Limitations</a></li></ol></li>
<li><a href=""c02.xhtml#s42"">Bag of words</a>
<ol epub:type=""list"">
<li><a href=""c02.xhtml#s43"">Code</a></li>
<li><a href=""c02.xhtml#s44"">Performing bag-of-words using sklearn</a></li>
<li><a href=""c02.xhtml#s45"">Difference between one-hot encoding and bag of words</a></li>
<li><a href=""c02.xhtml#s46"">Limitations</a></li></ol></li>
<li><a href=""c02.xhtml#s47"">N-gram model</a>
<ol epub:type=""list"">
<li><a href=""c02.xhtml#s48"">Limitations</a></li></ol></li>
<li><a href=""c02.xhtml#s49"">TF-IDF</a>
<ol epub:type=""list"">
<li><a href=""c02.xhtml#s50"">Code</a></li>
<li><a href=""c02.xhtml#s51"">Performing TF-IDF using sklearn</a></li></ol></li>
<li><a href=""c02.xhtml#s52"">Project -1</a></li>
<li><a href=""c02.xhtml#s53"">Solution</a>
<ol epub:type=""list"">
<li><a href=""c02.xhtml#s54"">Loading the dataset</a></li>
<li><a href=""c02.xhtml#s55"">Some basic pre-processings</a></li>
<li><a href=""c02.xhtml#s56"">One-hot encoding</a></li>
<li><a href=""c02.xhtml#s57"">Bag of words</a></li>
<li><a href=""c02.xhtml#s58"">Bag of N-grams model</a></li></ol></li>
<li><a href=""c02.xhtml#s59"">Project -2</a>
<ol epub:type=""list"">
<li><a href=""c02.xhtml#s60"">Loading the dataset</a></li>
<li><a href=""c02.xhtml#s61"">Some basic pre-processings</a></li>
<li><a href=""c02.xhtml#s62"">TF-IDF</a></li>
<li><a href=""c02.xhtml#s63"">Comparison of One-Hot, BOW, and TF-IDF</a></li></ol></li>
<li><a href=""c02.xhtml#s64"">Conclusion</a></li>
<li><a href=""c02.xhtml#s65"">Questions</a></li>
<li><a href=""c02.xhtml#s66"">Multiple choice questions</a>
<ol epub:type=""list"">
<li><a href=""c02.xhtml#s67"">Answers</a></li></ol></li></ol></li>
<li><a href=""c03.xhtml"">3. Word Embeddings</a>
<ol epub:type=""list"">
<li><a href=""c03.xhtml#s68"">Introduction</a></li>
<li><a href=""c03.xhtml#s69"">Structure</a></li>
<li><a href=""c03.xhtml#s70"">Objective</a></li>
<li><a href=""c03.xhtml#s71"">Word vectors or word embeddings</a>
<ol epub:type=""list"">
<li><a href=""c03.xhtml#s72"">Difference between word embeddings and TF-IDF</a></li></ol></li>
<li><a href=""c03.xhtml#s73"">Feature engineering with word embeddings</a></li>
<li><a href=""c03.xhtml#s74"">Word2Vec</a>
<ol epub:type=""list"">
<li><a href=""c03.xhtml#s75"">Code</a></li>
<li><a href=""c03.xhtml#s76"">t-SNE</a></li>
<li><a href=""c03.xhtml#s77"">Word similarity dataframe</a></li></ol></li>
<li><a href=""c03.xhtml#s78"">Global Vector (GloVe) Model</a>
<ol epub:type=""list"">
<li><a href=""c03.xhtml#s79"">The GloVe Model using Spacy</a></li>
<li><a href=""c03.xhtml#s80"">Loading the downloaded vector model</a></li>
<li><a href=""c03.xhtml#s81"">Word vector dataframe</a></li>
<li><a href=""c03.xhtml#s82"">t-SNE visualization</a></li>
<li><a href=""c03.xhtml#s83"">Word similarity dataframe</a></li></ol></li>
<li><a href=""c03.xhtml#s84"">fastText</a>
<ol epub:type=""list"">
<li><a href=""c03.xhtml#s85"">fastText using Gensim</a></li>
<li><a href=""c03.xhtml#s86"">t-SNE visualization</a></li>
<li><a href=""c03.xhtml#s87"">Finding Odd word out using FastText</a></li></ol></li>
<li><a href=""c03.xhtml#s88"">Difference between Word2Vec, GloVe, and FastText</a></li>
<li><a href=""c03.xhtml#s89"">Using pre-trained word embeddings</a>
<ol epub:type=""list"">
<li><a href=""c03.xhtml#s90"">Importing necessary libraries</a></li>
<li><a href=""c03.xhtml#s91"">Loading the Word2Vec model</a></li>
<li><a href=""c03.xhtml#s92"">Sample data initialization</a></li>
<li><a href=""c03.xhtml#s93"">Pre-processings and word tokenizations</a></li>
<li><a href=""c03.xhtml#s94"">Extracting list of unique words</a></li>
<li><a href=""c03.xhtml#s95"">t-SNE visualization</a></li></ol></li>
<li><a href=""c03.xhtml#s96"">Project</a>
<ol epub:type=""list"">
<li><a href=""c03.xhtml#s97"">Solution</a></li>
<li><a href=""c03.xhtml#s98"">Importing necessary libraries</a></li>
<li><a href=""c03.xhtml#s99"">Loading the Word2Vec model</a></li>
<li><a href=""c03.xhtml#s100"">Scrapping data from inshots</a></li>
<li><a href=""c03.xhtml#s101"">Pre-processings and word tokenizations</a></li>
<li><a href=""c03.xhtml#s102"">Extracting list of unique words</a></li>
<li><a href=""c03.xhtml#s103"">Removing words not in vocab</a></li>
<li><a href=""c03.xhtml#s104"">t-SNE visualization</a></li></ol></li>
<li><a href=""c03.xhtml#s105"">Conclusion</a>
<ol epub:type=""list"">
<li><a href=""c03.xhtml#s106"">Project</a></li></ol></li></ol></li>
<li><a href=""c04.xhtml"">4. Topic Modeling</a>
<ol epub:type=""list"">
<li><a href=""c04.xhtml#s107"">Introduction</a></li>
<li><a href=""c04.xhtml#s108"">Structure</a></li>
<li><a href=""c04.xhtml#s109"">Objectives</a></li>
<li><a href=""c04.xhtml#s110"">Topic modeling</a>
<ol epub:type=""list"">
<li><a href=""c04.xhtml#s111"">Identity a matrix</a></li>
<li><a href=""c04.xhtml#s112"">Unitary matrix</a></li>
<li><a href=""c04.xhtml#s113"">Eigen values and Eigen vectors</a></li>
<li><a href=""c04.xhtml#s114"">Singular value decomposition</a></li>
<li><a href=""c04.xhtml#s115"">Latent semantic indexing</a></li>
<li><a href=""c04.xhtml#s116"">TF-IDF vectorization</a></li>
<li><a href=""c04.xhtml#s117"">Building an SVD model</a></li>
<li><a href=""c04.xhtml#s118"">Looking at the topics and the words contributing to the topic</a></li>
<li><a href=""c04.xhtml#s119"">Advantages and disadvantages of LSI</a></li></ol></li>
<li><a href=""c04.xhtml#s120"">Latent Dirichlet Allocation</a>
<ol epub:type=""list"">
<li><a href=""c04.xhtml#s121"">Introduction</a></li>
<li><a href=""c04.xhtml#s122"">Working</a></li>
<li><a href=""c04.xhtml#s123"">About the data</a></li>
<li><a href=""c04.xhtml#s124"">Some pre-processing</a></li>
<li><a href=""c04.xhtml#s125"">Looking at the top 20 frequently used words</a></li>
<li><a href=""c04.xhtml#s126"">Some EDA</a></li>
<li><a href=""c04.xhtml#s127"">Generating Bi-grams (BoW)</a></li>
<li><a href=""c04.xhtml#s128"">LDA model fitting</a></li>
<li><a href=""c04.xhtml#s129"">LDA using Gensim and its visualization</a></li>
<li><a href=""c04.xhtml#s130"">Importing the data</a></li>
<li><a href=""c04.xhtml#s131"">Some pre-processing</a></li>
<li><a href=""c04.xhtml#s132"">Extending stop words and building ngram models</a></li>
<li><a href=""c04.xhtml#s133"">Creating term document frequency and the LDA model</a></li>
<li><a href=""c04.xhtml#s134"">Dominant topic identification</a></li>
<li><a href=""c04.xhtml#s135"">PyLDAvis</a></li>
<li><a href=""c04.xhtml#s136"">Disadvantages of LDA</a></li></ol></li>
<li><a href=""c04.xhtml#s137"">Non-Negative Matrix Factorization (NMF)</a>
<ol epub:type=""list"">
<li><a href=""c04.xhtml#s138"">Importing necessary libraries</a></li>
<li><a href=""c04.xhtml#s139"">Some pre-processing</a></li>
<li><a href=""c04.xhtml#s140"">Looking at the top 20 frequently used words</a></li>
<li><a href=""c04.xhtml#s141"">Some EDA</a></li>
<li><a href=""c04.xhtml#s142"">Generating Bi-grams (BoW)</a></li>
<li><a href=""c04.xhtml#s143"">Building TF-IDF vectorizer</a></li>
<li><a href=""c04.xhtml#s144"">Visualizing ranks with the TF-IDF weights</a></li>
<li><a href=""c04.xhtml#s145"">NMF modelling</a></li>
<li><a href=""c04.xhtml#s146"">Disadvantages of NMF</a></li></ol></li>
<li><a href=""c04.xhtml#s147"">Conclusion</a></li>
<li><a href=""c04.xhtml#s148"">Questions</a>
<ol epub:type=""list"">
<li><a href=""c04.xhtml#s149"">Answers</a></li></ol></li>
<li><a href=""c04.xhtml#s150"">Projects</a></li></ol></li>
<li><a href=""c05.xhtml"">5. Unsupervised Sentiment Classification</a>
<ol epub:type=""list"">
<li><a href=""c05.xhtml#s151"">Introduction</a></li>
<li><a href=""c05.xhtml#s152"">Structure</a></li>
<li><a href=""c05.xhtml#s153"">Objective</a></li>
<li><a href=""c05.xhtml#s154"">Lexicon-based approach</a>
<ol epub:type=""list"">
<li><a href=""c05.xhtml#s155"">About the dataset</a></li>
<li><a href=""c05.xhtml#s156"">Loading necessary libraries</a></li>
<li><a href=""c05.xhtml#s157"">Importing the dataset</a></li>
<li><a href=""c05.xhtml#s158"">Some pre-processings</a></li>
<li><a href=""c05.xhtml#s159"">Defining a function to perform the following</a></li></ol></li>
<li><a href=""c05.xhtml#s160"">Opinion lexicon</a>
<ol epub:type=""list"">
<li><a href=""c05.xhtml#s161"">Importing the opinion lexicon</a></li>
<li><a href=""c05.xhtml#s162"">Tokenize the reviews into a sentence and form the sentence and review the ID</a></li>
<li><a href=""c05.xhtml#s163"">Sentiment classification</a></li>
<li><a href=""c05.xhtml#s164"">Converting the sentiments to a review level</a></li>
<li><a href=""c05.xhtml#s165"">Converting the sentiment codes from the dataset to sentiments</a></li></ol></li>
<li><a href=""c05.xhtml#s166"">Senti WordNet lexicon</a>
<ol epub:type=""list"">
<li><a href=""c05.xhtml#s167"">Function to perform SentiWordNet</a></li>
<li><a href=""c05.xhtml#s168"">Sentiment classification</a></li>
<li><a href=""c05.xhtml#s169"">Evaluation</a></li></ol></li>
<li><a href=""c05.xhtml#s170"">TextBlob</a>
<ol epub:type=""list"">
<li><a href=""c05.xhtml#s171"">Importing libraries</a></li>
<li><a href=""c05.xhtml#s172"">Predicting a sentiment of sample reviews</a></li>
<li><a href=""c05.xhtml#s173"">Prediction and evaluation</a></li></ol></li>
<li><a href=""c05.xhtml#s174"">AFINN</a>
<ol epub:type=""list"">
<li><a href=""c05.xhtml#s175"">Importing necessary libraries</a></li>
<li><a href=""c05.xhtml#s176"">Sentiment classification and evaluation</a></li></ol></li>
<li><a href=""c05.xhtml#s177"">VADER</a>
<ol epub:type=""list"">
<li><a href=""c05.xhtml#s178"">Importing necessary libraries</a></li>
<li><a href=""c05.xhtml#s179"">Sentiment classification and evaluation</a></li>
<li><a href=""c05.xhtml#s180"">Sample prediction</a></li>
<li><a href=""c05.xhtml#s181"">Drawbacks of lexicon-based sentiment classification</a></li></ol></li>
<li><a href=""c05.xhtml#s182"">Conclusion</a></li>
<li><a href=""c05.xhtml#s183"">Questions</a>
<ol epub:type=""list"">
<li><a href=""c05.xhtml#s184"">Answers</a></li></ol></li></ol></li>
<li><a href=""c06.xhtml"">6. Text Classification Using ML</a>
<ol epub:type=""list"">
<li><a href=""c06.xhtml#s185"">Introduction</a></li>
<li><a href=""c06.xhtml#s186"">Structure</a></li>
<li><a href=""c06.xhtml#s187"">Objectives</a></li>
<li><a href=""c06.xhtml#s188"">Supervised learning</a>
<ol epub:type=""list"">
<li><a href=""c06.xhtml#s189"">About the dataset</a></li>
<li><a href=""c06.xhtml#s190"">Loading the necessary libraries</a></li>
<li><a href=""c06.xhtml#s191"">Importing the dataset</a></li>
<li><a href=""c06.xhtml#s192"">Pre-processings</a></li>
<li><a href=""c06.xhtml#s193"">Performing TF-IDF</a></li></ol></li>
<li><a href=""c06.xhtml#s194"">Model fitting</a>
<ol epub:type=""list"">
<li><a href=""c06.xhtml#s195"">Logistic regression</a></li>
<li><a href=""c06.xhtml#s196"">Lasso regularization</a></li>
<li><a href=""c06.xhtml#s197"">Ridge regularization</a></li>
<li><a href=""c06.xhtml#s198"">Elastic-net classifier</a></li>
<li><a href=""c06.xhtml#s199"">Naïve Bayes algorithm</a></li>
<li><a href=""c06.xhtml#s200"">K – Nearest Neighbors</a></li>
<li><a href=""c06.xhtml#s201"">Decision tree</a></li>
<li><a href=""c06.xhtml#s202"">Random forest</a></li>
<li><a href=""c06.xhtml#s203"">Ada Boost</a></li>
<li><a href=""c06.xhtml#s204"">Gradient boosting machine</a></li>
<li><a href=""c06.xhtml#s205"">XG-Boost</a></li></ol></li>
<li><a href=""c06.xhtml#s206"">Grid Search</a></li>
<li><a href=""c06.xhtml#s207"">Conclusion</a></li>
<li><a href=""c06.xhtml#s208"">Questions</a>
<ol epub:type=""list"">
<li><a href=""c06.xhtml#s209"">Answers</a></li></ol></li>
<li><a href=""c06.xhtml#s210"">Project</a></li></ol></li>
<li><a href=""c07.xhtml"">7. Text Classification Using Deep Learning</a>
<ol epub:type=""list"">
<li><a href=""c07.xhtml#s211"">Introduction</a></li>
<li><a href=""c07.xhtml#s212"">Structure</a></li>
<li><a href=""c07.xhtml#s213"">Objectives</a>
<ol epub:type=""list"">
<li><a href=""c07.xhtml#s214"">Learning about the Neural Networks</a></li></ol></li>
<li><a href=""c07.xhtml#s215"">Neural networks for sentiment classification</a></li>
<li><a href=""c07.xhtml#s216"">Neural networks with TF-IDF</a>
<ol epub:type=""list"">
<li><a href=""c07.xhtml#s217"">Installing libraries</a></li>
<li><a href=""c07.xhtml#s218"">Importing libraries</a></li>
<li><a href=""c07.xhtml#s219"">Importing the dataset</a></li>
<li><a href=""c07.xhtml#s220"">Pre-processings</a></li>
<li><a href=""c07.xhtml#s221"">Train, test, and validation set</a></li>
<li><a href=""c07.xhtml#s222"">Performing TF-IDF</a></li>
<li><a href=""c07.xhtml#s223"">Model building</a>
<ol epub:type=""list"">
<li><a href=""c07.xhtml#s224"">Linear regression</a></li>
<li><a href=""c07.xhtml#s225"">Increasing the dimensionality</a></li></ol></li>
<li><a href=""c07.xhtml#s226"">Activation functions</a></li>
<li><a href=""c07.xhtml#s227"">Model fitting</a></li>
<li><a href=""c07.xhtml#s228"">Cross – validation</a></li></ol></li>
<li><a href=""c07.xhtml#s229"">Neural networks with word2vec:</a>
<ol epub:type=""list"">
<li><a href=""c07.xhtml#s230"">Data splitting</a></li>
<li><a href=""c07.xhtml#s231"">Creating a Word2Vec model</a></li>
<li><a href=""c07.xhtml#s232"">Word2Vec model fitting</a></li>
<li><a href=""c07.xhtml#s233"">Creating word vectors</a></li>
<li><a href=""c07.xhtml#s234"">Padding sequences</a></li>
<li><a href=""c07.xhtml#s235"">ANN model building</a></li>
<li><a href=""c07.xhtml#s236"">Model fitting</a></li>
<li><a href=""c07.xhtml#s237"">Cross-validation</a></li>
<li><a href=""c07.xhtml#s238"">Sentiment analysis using LSTM</a></li>
<li><a href=""c07.xhtml#s239"">Importing the dataset</a></li>
<li><a href=""c07.xhtml#s240"">Pre-processings</a></li>
<li><a href=""c07.xhtml#s241"">Data splitting and padding</a></li>
<li><a href=""c07.xhtml#s242"">LSTM model building</a></li>
<li><a href=""c07.xhtml#s243"">Cross-validation</a></li>
<li><a href=""c07.xhtml#s244"">Comparison of results</a></li></ol></li>
<li><a href=""c07.xhtml#s245"">Conclusion</a></li>
<li><a href=""c07.xhtml#s246"">Questions</a>
<ol epub:type=""list"">
<li><a href=""c07.xhtml#s247"">Answers</a></li></ol></li></ol></li>
<li><a href=""c08.xhtml"">8. Recommendation Engine</a>
<ol epub:type=""list"">
<li><a href=""c08.xhtml#s248"">Introduction</a></li>
<li><a href=""c08.xhtml#s249"">Structure</a></li>
<li><a href=""c08.xhtml#s250"">Objective</a></li>
<li><a href=""c08.xhtml#s251"">Applications</a></li>
<li><a href=""c08.xhtml#s252"">Classification of a recommendation system</a>
<ol epub:type=""list"">
<li><a href=""c08.xhtml#s253"">Simple rule-based recommenders</a>
<ol epub:type=""list"">
<li><a href=""c08.xhtml#s254"">About the dataset</a></li>
<li><a href=""c08.xhtml#s255"">Installing and loading necessary libraries</a></li>
<li><a href=""c08.xhtml#s256"">Importing the dataset</a></li>
<li><a href=""c08.xhtml#s257"">Building a simple rule-based recommendation system</a></li>
<li><a href=""c08.xhtml#s258"">Weighted ratings calculation</a></li>
<li><a href=""c08.xhtml#s259"">Applying the calculation on the filtered records</a></li></ol></li>
<li><a href=""c08.xhtml#s260"">Content based</a>
<ol epub:type=""list"">
<li><a href=""c08.xhtml#s261"">Using document similarity</a></li>
<li><a href=""c08.xhtml#s262"">About the dataset</a></li>
<li><a href=""c08.xhtml#s263"">Installing and loading necessary libraries</a></li>
<li><a href=""c08.xhtml#s264"">Importing the dataset</a></li>
<li><a href=""c08.xhtml#s265"">Some pre-processing</a></li></ol></li></ol></li>
<li><a href=""c08.xhtml#s266"">Extract TF-IDF features</a></li>
<li><a href=""c08.xhtml#s267"">Computing pairwise document similarity</a></li>
<li><a href=""c08.xhtml#s268"">Building a movie recommender</a>
<ol epub:type=""list"">
<li><a href=""c08.xhtml#s269"">Using word embedding</a>
<ol epub:type=""list"">
<li><a href=""c08.xhtml#s270"">FastText</a></li></ol></li>
<li><a href=""c08.xhtml#s271"">Generate document-level embeddings</a></li>
<li><a href=""c08.xhtml#s272"">Collaborative-based</a>
<ol epub:type=""list"">
<li><a href=""c08.xhtml#s273"">User-based</a></li>
<li><a href=""c08.xhtml#s274"">About the dataset</a></li>
<li><a href=""c08.xhtml#s275"">Installing and loading necessary libraries</a></li>
<li><a href=""c08.xhtml#s276"">Importing the dataset</a></li></ol></li></ol></li>
<li><a href=""c08.xhtml#s277"">Advantages of a recommendation system</a></li>
<li><a href=""c08.xhtml#s278"">Conclusion</a></li>
<li><a href=""c08.xhtml#s279"">Questions</a>
<ol epub:type=""list"">
<li><a href=""c08.xhtml#s280"">Answers</a></li></ol></li></ol></li>
<li><a href=""c09.xhtml"">9. Machine Translation</a>
<ol epub:type=""list"">
<li><a href=""c09.xhtml#s281"">Introduction</a></li>
<li><a href=""c09.xhtml#s282"">Structure</a></li>
<li><a href=""c09.xhtml#s283"">Objectives</a></li>
<li><a href=""c09.xhtml#s284"">Application</a></li>
<li><a href=""c09.xhtml#s285"">Types of MT</a></li>
<li><a href=""c09.xhtml#s286"">Readily available libraries</a>
<ol epub:type=""list"">
<li><a href=""c09.xhtml#s287"">TextBlob</a></li>
<li><a href=""c09.xhtml#s288"">LangDetect</a></li>
<li><a href=""c09.xhtml#s289"">Fasttext</a></li></ol></li>
<li><a href=""c09.xhtml#s290"">Sequence-to-sequence modeling</a>
<ol epub:type=""list"">
<li><a href=""c09.xhtml#s291"">About the dataset</a></li>
<li><a href=""c09.xhtml#s292"">Installing and loading necessary libraries:</a></li>
<li><a href=""c09.xhtml#s293"">Importing the dataset</a></li>
<li><a href=""c09.xhtml#s294"">Preprocessing</a></li></ol></li>
<li><a href=""c09.xhtml#s295"">Model building (using LSTM)</a></li>
<li><a href=""c09.xhtml#s296"">Conclusion</a></li>
<li><a href=""c09.xhtml#s297"">Exercise</a></li>
<li><a href=""c09.xhtml#s298"">Questions</a>
<ol epub:type=""list"">
<li><a href=""c09.xhtml#s299"">Answers</a></li></ol></li></ol></li>
<li><a href=""c10.xhtml"">10. Transfer Learning</a>
<ol epub:type=""list"">
<li><a href=""c10.xhtml#s300"">Introduction</a></li>
<li><a href=""c10.xhtml#s301"">Structure</a></li>
<li><a href=""c10.xhtml#s302"">Objectives</a></li>
<li><a href=""c10.xhtml#s303"">Universal Sentence Encoder</a>
<ol epub:type=""list"">
<li><a href=""c10.xhtml#s304"">Goal</a></li></ol></li>
<li><a href=""c10.xhtml#s305"">What is a transformer and do we need it?</a></li>
<li><a href=""c10.xhtml#s306"">Deep Averaging Network (DAN)</a>
<ol epub:type=""list"">
<li><a href=""c10.xhtml#s307"">About the data</a></li>
<li><a href=""c10.xhtml#s308"">Data pre-processing</a></li></ol></li>
<li><a href=""c10.xhtml#s309"">Bidirectional Encoder Representation from Transformer (BERT)</a>
<ol epub:type=""list"">
<li><a href=""c10.xhtml#s310"">What is the necessity of BERT?</a></li>
<li><a href=""c10.xhtml#s311"">The main idea behind BERT</a></li>
<li><a href=""c10.xhtml#s312"">Why is BERT so powerful?</a></li>
<li><a href=""c10.xhtml#s313"">BERT architecture</a>
<ol epub:type=""list"">
<li><a href=""c10.xhtml#s314"">Text processing</a></li>
<li><a href=""c10.xhtml#s315"">Pre-training tasks</a></li></ol></li></ol></li>
<li><a href=""c10.xhtml#s316"">Fine tuning</a></li>
<li><a href=""c10.xhtml#s317"">Drawbacks</a></li>
<li><a href=""c10.xhtml#s318"">Conclusion</a></li>
<li><a href=""c10.xhtml#s319"">Multiple choice questions</a>
<ol epub:type=""list"">
<li><a href=""c10.xhtml#s320"">Answers</a></li></ol></li>
<li><a href=""c10.xhtml#s321"">Project</a></li></ol></li>
<li><a href=""ind.xhtml"">Index</a></li>
</ol>
</nav>
<nav epub:type=""landmarks"">
<h3>Guide</h3>
<ol epub:type=""list"">
<li><a epub:type=""titlepage"" href=""tp.xhtml"">Title Page</a></li>
<li><a epub:type=""copyright-page"" href=""cop.xhtml"">Copyright Page</a></li>
<li><a epub:type=""toc"" href=""toc.xhtml"">Table of Contents</a></li>
<li><a epub:type=""bodymatter"" href=""c01.xhtml"">1. Basic Text Processing Techniques</a></li>
</ol>
</nav>
</body>
</html>";
RegexOptions options = RegexOptions.Multiline;
foreach (Match m in Regex.Matches(input, pattern, options))
{
Console.WriteLine("'{0}' found at index {1}.", m.Value, m.Index);
}
}
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for C#, please visit: https://msdn.microsoft.com/en-us/library/system.text.regularexpressions.regex(v=vs.110).aspx