import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Example {
public static void main(String[] args) {
final String regex = "(?############ Let's catch paths without \"\" or '' ############################\n"
+ ")(?<opening>(?# First, catch the starting path, the <opening> ###################\n"
+ " )\\b(?<montage>[a-zA-Z]:[\\/\\\\])(?# montage = 'C:/'\n"
+ " )|[\\/\\\\][\\/\\\\](?<!http:\\/\\/)(?<!https:\\/\\/)(?>(?# check not 'http[s]:' prefix\n"
+ " )[?.][\\/\\\\](?:[^\\/\\\\<>:\"|?\\n\\r ]+[\\/\\\\])?(?# '//[?or.]/xxxxx' or '//[?or.]/server/'\n"
+ " )(?&montage)?(?# '//[?or.]/c:/' or '//[?or.]/server/c:/'\n"
+ " )|(?!(?&montage)))(?# '//[addressIP/ or serverName/ but not C:/]'\n"
+ " )|%\\w+%[\\/\\\\]?(?# '%EnvVariable%[/]'\n"
+ "))(?# So, <opening> catch : \n"
+ " 'C:/' or\n"
+ " '//[?or.]/[UNC/]C:/' or\n"
+ " '//[?or.]/[UNC/]' or\n"
+ " '//[next characters must be something other than C:/]' or\n"
+ " '%EnvironementVariable%[/]'\n"
+ ")(?:(?# now, we catch each directory name wich is between [/] ########################\n"
+ ")[^\\/\\\\<>:\"|?\\n\\r ,'](?# the first character should not be [ ,']\n"
+ ")[^\\/\\\\<>:\"|?\\n\\r]*(?# Any pathFrendly character\n"
+ ")(?<![ ,'])(?# The last directory name's character must not be [ ,']\n"
+ ")[\\/\\\\](?# End of directory name - who are between '/' -\n"
+ "))*(?# Catch most 'directoryName/' as possible\n"
+ ")(?:(?# Lets catch the End path. There is a file ? a directory ? or just a useless '/' ?\n"
+ ")(?=[^\\/\\\\<>:\"'|?\\n\\r;, ])(?#if next character is not pathFriendly or ' ' or [,'], we have reach the end of the path => we don't catch the last '/' and the the Regex end now.\n"
+ "You can't catch fileName who begin by [,'] because they are probably a delimiter between 2 path. but '.' is allowed\n"
+ ")(?:(?#If we are here, that mean there is a fileName or directoryName to catch\n"
+ "###### We will catch the last directoryName or the fileName without the extention ######\n"
+ " )(?:[^\\/\\\\<>:\"|?\\n\\r;, .](?# catch any character pathFriendly exept ' ' or [,.]\n"
+ " )(?: (?=[\\w\\-]))?(?# If we find a ' ', we catch him if next charcter is not a delimiter. I see '-' after an ' ' not like a delimiter.\n"
+ " )(?:\\*(?!= ))?(?# If we find a '*', we stop the catch if next character is an ' '\n"
+ " )(?!(?&montage))(?# If we find a string who look like 'C:/', we stop the catch\n"
+ "))+(?# We catch theses word delimited by ' ' as much as possible\n"
+ "))?(?# it's possible the fileName have no name, but just an extention\n"
+ ")(?:\\.\\w+(?# #### an extention begin by '.' and at least one none delimiter chracter\n"
+ "))*(?# we can add more extention until the first none '.' delimiter character. So, after the first '.' character inside a fileName, we cannot catch any ' ' character\n"
+ "If we don't find one extention, so the filename is a directory name, and we stop the catch.\n"
+ "))(?# ############# END OF PATH CATCHING WITHOUT QUOTE \"\" and '' #######################\n"
+ ")|(?:(?# ######### Catching path quoted '' ###########################\n"
+ "Path quoted '' is difficult because ['] is also a pathFrendly character\n"
+ ")'(?&opening)(?# We catch .* between quote only if string start with an <opening>\n"
+ ")(?=.*'\\W|.*'$)(?# We catch .* between quote only if we are sure we will find end quote. End quote must be ['] and delimiter character or ['] and end string\n"
+ ")(?:[^\\/\\\\<>:'\"|?\\n\\r]+(?# We take any pathFriendly character exept quote [']\n"
+ ")(?:'(?=\\w))?(?# we catch quote ['] if next character is not a delimiter\n"
+ ")[\\/\\\\]?)*(?# Path quoted must respect this patern until end quote character [']\n"
+ ")')(?# end quoted '' path\n"
+ ")|(?# ######### Catching path quoted \"\" ###########################\n"
+ ")\"(?&opening)(?# We catch .* between quote only if string start with an <opening>\n"
+ ")(?=.*\")(?# We catch .* between quote only if we are sure we will find end quote [\"]\n"
+ ")(?:[^\\/\\\\<>:\"|?\\n\\r]+(?# We take any pathFriendly character\n"
+ ")[\\/\\\\]?(?# pathFriendly characters can be is delimited by '\\'\n"
+ "))*(?# Path quoted must respect this patern until end quote character\n"
+ ")\"(?# end quoted path\n"
+ ")";
final String string = "THIS IS COMMENTED VERSION !\n"
+ "to simple copy and use it, go https://regex101.com/r/zWGLMP\n\n"
+ "C:/testOk\\dot.Dirname/.nameFileBeginByDot first space after a dot in file name stop the match\n"
+ "C:/testOk\\_.._AsDirName/../file name.ext1.ext2 first space after a dot stop the match\n"
+ "start text don't match C:/testOk\\lastDir Or FileName WithDouble..dot stop the match\n"
+ "C:/testOk\\lastDir Or FileName dot ended. stop the match like an end sentence. So, a last name with a space after a dot is not catch\n"
+ "C:/testOk\\LastNameIs/DirName C:/testOk\\2Paths_ _separated/f.ext space after extention stop match\n"
+ "C:/testOK\\Last_/_isNotmatched/fgfj.gjjb/uhloext/ and [ ,'] after '\\' stop match\n"
+ "\\\\127.0.0.1/this', 'isOkInMidDirName\\butSimple',' stop match in last DirName or FileName\n"
+ "\\\\.\\c:/this exotic path begining work\\and\\ space after '\\' stop the match\n"
+ "\\\\?\\c:/this exotic path begining work too\\and \\space before '\\' stop the match too\n"
+ "\\\\testOk/this' - 'is ok in dirName/and - in lastName.ext\n"
+ "i:/dir/fileName with a .space before dot stop the match\n"
+ "\\\\?\\server1\\e:\\utilities\\\\filecomparer\\ this double \\\\ is interpretated as new path\n\n"
+ "@\"c:\\testOk\\double quote character is more permissive/ '' , ; .txt, .ext2\",\n"
+ "@\"\\\\127.0.0.1\\c$\\temp\\t'est-file.txt, if end double quote is missing, we use unquote match\n"
+ "@\"\\c:\\LOCALHOST\\c$\\ thisIsNotMatched\" \"temp\\test-file.txt\", quoted path must have a right opening to be matched\n"
+ "@'\\\\.\\c:\\temp\\te'st-file.txt' simple quoted is ok \n"
+ "'c:\\simpleQuoteInsideStill'Match\\but' stopMatch if next is space character,\n"
+ "'c:\\simpleQuoteInsideStill'Match\\but\\'stopMatch if is fisrt character after \\\n"
+ "'c:\\simpleQuoteInsideStill'Match\\but''stopMatch if he is double\n"
+ "@\"\\\\?\\c:\\te ' mp\\est-file.txt\",\n"
+ "@\"\\\\.\\UNC\\LOCALHOST\\c$\\temp\\test-file.txt\",\n"
+ "@\"\\\\127.0.0.1\\c$\\temp\\test-file.txt\"\n\n"
+ "/\\serverName\\mix/and\\still match\" double quote character stop match\n"
+ "\\\\\\IfMoreThan2_\\_we take only the 2 lasts.ext first space after ext stop the match\n"
+ "/testNotMatch/html\n"
+ "/testNotMatch.html\n"
+ "testNotMatch.html\n"
+ "// -> this simple // or \\\\ is not matched, but this //isMatched !\n"
+ "/ -> this simple / is not matched, and this /notMatchedToo\n"
+ "b-renice\\sauvegardes\\B-HIER\\GEO\\Geo_NetAct_Atoll_Planet\\UR_Est\\Custom Data\"\n"
+ "\"b-renice\\sauvegardes\\B-HIER\\GEO\\Geo_NetAct_Atoll_Planet\\UR_Est\\Custom Data\"\n"
+ "\"\\\\b-renice\\sauvegardes\\B-HIER\\GEO\\Geo_NetAct_Atoll_Planet\\UR_Est\\Custom Data\"\n\n"
+ "error Message test:\n"
+ "---------------------------\n"
+ "Tentative d'accès à C:\\Users\\tpgz4017\\App - Data\\Local\\Temp\\tempShapeFile_CrossWave Calibration Zones - Atoll CrossWave Model.shx après sa fin.\n"
+ "---------------------------\n\n"
+ "local url path :\n"
+ "file://C:/Users/Downloads/20220516_32289275_1049383.pdf\n"
+ "urlPath :\n"
+ "file://p-eco2.rd.fr/vol_H0037_01$/599/livraison/20220516_32289275_1049383.pdf\n\n"
+ "c:\\temp\\test-file.txt\",\n"
+ "\\\\127.0.0.1\\c$\\temp\\test-file.txt\",\n"
+ "\\\\LOCALHOST\\c$\\ temp\\test-file.txt\",\n"
+ "\\\\LOCALHOST\\c$ \\temp\\test-file.txt\",\n"
+ "\\\\.\\c:\\temp\\t\\est-file.txt\",\n"
+ "\\\\?\\c:\\temp\\test-file.txt\",\n"
+ "\\\\.\\UNC\\LOCALHOST\\c$\\temp\\test-file.txt\",\n"
+ "\\\\?\\UNC\\ServerName\\ temp\\test-file.txt\",\n"
+ "\\\\127.0.0.1\\c$\\temp\\test -file.txt\"\n\n\n"
+ "error Message test:\n"
+ "Site0 / 3: - Warning . See log file 'C:\\ProgramData\\InfoVista\\Planet 7.4\\7.4\\RPE\\Log\\Plugins\\Universal_Model_masked\\log_Universal_Model.txt' for details\n\n"
+ "C:/test\\gvk.hv/fgfj.gjjb/uhloext : some random text\n\n"
+ "\\\\b-renice\\sauvegardes\\B-HIER\\GEO\\Geo_NetAct_Atoll_Planet\\UR_Est\\Polygon\\Haguenau\\Building\\Haguenau hgtfhyt \"C:/te-st.html\" \"C:/te-st.html\" gd\"dhbcsk \"C:/te/dsst.ikpo fdsf \"C:\\test\" \"C:// test.html\" gd\n"
+ "\"//te s t/e, llo.html \n"
+ "C:/test\\f/uhlo/. \n"
+ "C://te?st.html\n"
+ "b-renice\\sauvegardes\\B-HIER\\GEO\\Geo_NetAct_Atoll_Planet\\UR_Est\\Custom Data\"\n"
+ "; dfsdf \"\\\\b-renice\\sauvegardes\\B-HIER\\GEO\\Geo_NetAct_Atoll_Planet\\UR_Est\\Custom Data\"\n"
+ "; dfsdf \"\\\\\n"
+ "\"\\\\b-renice\\sauvegardes\\B-HIER\\GEO\\Geo_NetAct_Atoll_Planet\\UR_Est\\Custom Data\"Haguenau_Building.tab : Data format of \\\\b-renice\\sauvegardes\\B-HIER\\GEO\\Geo_NetAct_Atoll_Planet\\UR_Est\\Polygon\\Haguenau\\Building\\Haguenau Building.* C: is invalid\n"
+ "Haguenau_Building.tab : Data format of \\\\b-renice\\sauvegardes\\B-HIER\\GEO\\Geo NetAct Atoll_Planet\\UR_Est\\Polygon\\Haguenau\\Building\\Haguenau Building.TAB, is invalid\n"
+ "Haguenau_Building.tab : Data format of \\\\b-renice\\sauvegardes\\B-HIER\\GEO\\Geo_NetAct_Atoll_Planet\\UR_Est\\Polygon\\Haguenau\\Building\\Haguenau Buildi*.*ng.*, is invalid\n"
+ "C:/test/../hjgbkl C:/test/../hjgbkl.gfgdfgrdgfdgr C:/test/../hjgbkl\n"
+ "C:/test.html\n"
+ "C://test/ .h/hel,lo.html//test/./hello.html\n"
+ "C:/test//hello.html\n"
+ "//test\n"
+ "//hello.html\n"
+ "/test\n"
+ "\"%tmp%/fsdfs\"\n"
+ "%tmp%/fsdfs\n"
+ "ERROR 8/31/2021 - 6:45:39 PM HighResClutter .RasterFile : \\\\b-ren ice\\sauv egardes\\B-HIER\\GEO%dsq%\\NewJersey_NewYork\\DTM\\DTM\\CENTRAL_JERSE..Y_New_York_2 m_Z18N_0_DTM_02_06.bil : Le fichier spécifié est introuvable. \n"
+ "\\\\b-ren ice\\sauv egardes\\..\\B-HIER\\GEO\\NewJersey_NewYork\\DTM\\DTM\\CENTRAL_JERSE..Y_New_York_2 m_Z18N_0_DTM_02_06.bil C:\\b-ren ice\\sauv egardes\\B-HIER\\GEO\\NewJersey_NewYork\\DTM\\DTM\\CENTRAL_JERSE..Y_New_York_2 m_Z18N_0_DTM_02_06.bil \\\\b-ren ice\\sauv egardes\\B-HIER\\GEO\\NewJersey_NewYork\\DTM\\DTM\\CENTRAL_JER SE.Y_New_York_2 m_Z18N_0_DTM_02_06.bil. \n"
+ "//test.html\n"
+ "\\\\10.1.1.107\n"
+ "//10.1.1.107/test.html\n"
+ "//10.1.1.107/te st/hello.html\n"
+ "//10.1.1.107/test/hello\n"
+ "//test/hello.txt\n"
+ "//test/hello.txt.\n\n"
+ "\\\\.\\UNC\\Server\\Share\\Test\\Foo.txt \n"
+ "\\\\?\\UNC\\Server\\Share\\Test\\Foo.txt\n\n"
+ "Pour les chemins UNC de périphérique, la partie serveur/partage forme le volume. Par exemple, dans \\\\?\\server1\\e:\\utilities\\filecomparer\\ , la partie serveur/partage est server1\\utilities . Ceci est important quand\n"
+ "'\\\\127.0.0.1\\c$\\temp\\test-fi'le.txt'";
final Pattern pattern = Pattern.compile(regex);
final Matcher matcher = pattern.matcher(string);
while (matcher.find()) {
System.out.println("Full match: " + matcher.group(0));
for (int i = 1; i <= matcher.groupCount(); i++) {
System.out.println("Group " + i + ": " + matcher.group(i));
}
}
}
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Java, please visit: https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html