import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Example {
public static void main(String[] args) {
final String regex = "^https:\\/\\/www.test.com\\/(en|de|fr|es|pt-br)\\/((blog|news|nouvelles|noticias))+([a-zA-Z0-9-\\/]*)$";
final String string = "Should be excluded - starting page:\n"
+ "https://www.test.com/en/\n"
+ "https://www.test.com/de/\n"
+ "https://www.test.com/fr/\n"
+ "https://www.test.com/es/\n"
+ "https://www.test.com/pt-br/\n\n"
+ "Should be excluded - pages and sub pages from blog and news:\n"
+ "https://www.test.com/en/blog\n"
+ "https://www.test.com/en/news\n"
+ "https://www.test.com/en/noticias\n"
+ "https://www.test.com/en/nouvelles\n"
+ "https://www.test.com/en/blog/hardware\n"
+ "https://www.test.com/en/news/hardware\n\n"
+ "Should be included - all other pages:\n"
+ "https://www.test.com/en/shop\n"
+ "https://www.test.com/en/shop/product-1/\n\n\n";
final Pattern pattern = Pattern.compile(regex, Pattern.MULTILINE);
final Matcher matcher = pattern.matcher(string);
while (matcher.find()) {
System.out.println("Full match: " + matcher.group(0));
for (int i = 1; i <= matcher.groupCount(); i++) {
System.out.println("Group " + i + ": " + matcher.group(i));
}
}
}
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Java, please visit: https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html