# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"^(?<expedicion>\S+(?:\s{1,3}\S+)*)\s+(?<fecha>\d{2}\/\d{2}\/\d{2})\s+(?<referencia>\S+(?:\s\S+)*)\s+(?<remitente>\S+(?:\s\S+)*)\s+(?<num>\d+)\s+(?<nombre>\S+(?:\s\S+)*)\s+(?:\D+)(?<kilos>\d+)\s+(?<importe>\d+(?:[.,]\d+)+)\s+"
test_str = ("Expedición Fecha S/Referencia Remitente / Destinatario Kilos importe\n"
"Subtotal 6.088,93\n"
"Recargo Combustible 0,00\n"
"(*) Incluye conceptos anexos a portes (E Expedic. Exenta de IVA\n"
"BASE IMPONIBLE % IVA IMPORTE IVA ACUMULADO\n"
"FORMA DE PAGO: Cheque 60 díaS\n"
"6.088,93 21,00 1.278,68 7.367,61\n"
"0,00 0,00 0,00 0,00\n"
"VENCIMIENTO: 0,00 0,00 0,00 0,00\n"
"1) 25/05/16 7.367,61 EUR Suplidos 0,00\n"
"TOTAL FACTURA: 7.367,61 EUR\n"
"OBSERVAClONES:\n"
"NUEVA RUTAANDALUCIA24 HORASDESDEBARCELONA\n"
"|~|Expedición Fecha S/Referencia Remitente / Destinatario Kilos importe\n"
"S/1/001831063 07/03/16 17395 FRANCISCO ARAGON SL 30500 MOLINADE SEGURA 860 90,05\n"
"S/1/001831065 07/03/16 17398 GABRIEL POVEDA 03610 PETREL 96 14,87\n"
"S/1/001831068 07/03/16 17379 DIVECANT DISTRIBUCIONES 23100 MANCHAREAL 1000 102,24\n"
"E/2/000745188 07/03/16 TE1600276/17 ClKAUTXO 48270 MARQUINA-JEMEIN Vol. 3 4,07\n"
"S/2/000745519 07/03/16 DV.N/R. 1-1829 KWD GMBH 08430 ROCADEL VALLES, LA 400 116,00\n"
"S/2/000745520 07/03/16 DV.N/R. 1-1826 KWD GMBH 08430 ROCADEL VALLES, LA 1000 237,65\n"
"E/46/000677639 07/03/16 TE16000295/12 PLAY BY PLAY 46190 RIBA-ROJADE TURIA Vol. 1094 76,84\n"
"E/46/000677643 07/03/16 TE1600292/172 RUBBEREX 46190 RIBA-ROJADE TURIA 598 48,24\n"
"E/46/000677772 07/03/16 81233647 KEMMERICHE IBEIRCASL 46440 ALMUSSAFES 1304 90,58\n"
"S/1/001831646 08/03/16 17500 TRANPORTES VICUNA 31800 ALTSASU/ALSASUA 152 25,86\n"
"S/1/001831648 08/03/16 17508 HUTSMAN P & P SPAIN SL 21810 PALOS DE LA FRONTERA 81 19,08\n"
"S/1/001831649 08/03/16 17501 HIJOS DE JUAN DE GARAY 20560 OÑATE 1362 116,55\n"
"E/30/000013670 08/03/16 MERCHE POS, MARMOLES SAN MARINO SA 30430 CEHEGIN 200 30,96\n"
"S/1/001832242 09/03/16 17416 PlÑAVAL. 956 464 166 11600 UBRIQUE Vol. 224 40,30\n"
"S/1/001832244 09/03/16 17634 FROST TROL 964 342 740 12004 CASTELLON 100 12,16\n"
"S/1/001832246 09/03/16 17569 SOLTEC ENERGIAS RENOVAB 30500 MOLINADE SEGURA 390 47,02\n"
"S/1/001832247 09/03/16 17584 DHL EXPRESS SEVILLASPA 41018 SEVILLA Vol. 574 70,43\n"
"S/1/001832249 09/03/16 17585 GAPARSORO LOGISTICA 20211 ATAUN (SAN GREGORIO) 900 90,05\n"
"S/1/001832251 09/03/16 17537 LM WIND POWER SERVICE 12185 COVES DE VINROMA, LES 364 32,35\n"
"S/1/001832254 09/03/16 17558 METAL WORK IBERICADELE 48220 ABADIANO Vol. 172 28,44\n"
"S/1/001832256 09/03/16 17536 RHENUS LOGISTICS. 46190 RIBA-ROJADE TURIA 2000 118,01\n"
"S/1/001832258 09/03/16 17560 HIJOS DE J. CARDOSO 13005 CIUDAD REAL Vol. 528 70,43\n"
"E/2/000746025 09/03/16 TE1600308/17f CIAKUTXO 48270 MARKINA-XEMEIN 369 47,02\n"
"S/1/001832849 10/03/16 17653 CERLER LOGISTICA 50196 MUELA, LA 255 29,47\n"
"S/1/001832851 10/03/16 17672 SCHINDLER 50720 CARTUJABAJA Vol. 112 15,51\n"
"S/1/001832853 10/03/16 17631 DIC COATING. 48500 GALLARTA 2280 163,48\n"
"S/1/001832854 10/03/16 17630 PINTURAS ISAVAL 96 164 46190 RIBA-ROJADE TURIA 875 66,34\n"
"S/1/001832856 10/03/16 17680 DERMO PRODUCTS DEVELOPl\\ 01474 ARCENIEGA 1056 100,71\n"
"S/1/001832857 10/03/16 17629 SUN CHEMICALTF. 918 2 28800 ALCALADE HENARES 900 73,09\n"
"E/2/000746391 10/03/16 TE1600315/17 NORIKER KABEL SL 48170 SAN MARTIN DEARTEAGA(ZA 390 47,02\n"
"E/2/000746548 10/03/16 TE1600315/17 ETRON APLICACIONES INDU 20120 HERNANI 300 38,96\n"
"E/50/000013662 10/03/16 17639 ALLIANCE HEALTHCARE ESP 50830 VILLANUEVA DE GALLEGO 981 72,18\n"
"S/1/001833376 11/03/16 17745 PILKINGTON SAGUNTO 46500 SAGUNT/SAGUNTO Vol. 830 66,34\n"
"S/1/001833378 11/03/16 17679 ORTO IBERICA 33421 LLANERA Vol. 316 49,22\n"
"S/1/001833379 11/03/16 GAMESAENERGY TRANSMISS 20159 ASTEASU 82 16,45\n"
"S/1/001833380 11/03/16 17678 EPW BRUNO PITA 3105 GUIA Vol. 590 104,84\n"
"E/2/000746714 11/03/16 TE1600315/17) ARAVI C/O BILON EUROPA 48450 SAN ESTEBAN DE ETXEBARRI, 427 51,01\n\n\n\n"
"GALLASTEGUI")
matches = re.finditer(regex, test_str, re.MULTILINE | re.IGNORECASE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html