# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"^(?<MasterGroup>(?<MatchMisc>.{0,8}?(刚刚|烫手|热乎|出息|来[了啦辣喽]).{0,8}+)|(?<StrictMatchNumber>\d)|(?<MatchMost>.{0,8}?(?<PreQuantifier>[后前]?)第?(?<NumberMatch>[0-9一二三四五六七八九十两俩百万千亿半]+[kw]?)(?<PostQuantifierMatch>[后前个条发第]?(?<PostQuantifierMost>点?赞|播放?|弹幕|看|转发?|开|分钟?|小?时|秒|min|h)|(?<CompareMatch>[以之]?[后前])).{0,8}+)|(?<MatchOrder>.{0,8}?[前第](?&NumberMatch).{0,8}+)|(?<MatchInverse>.{0,8}?[前第](?&NumberMatch)[个名].{1,8}+)|(?<MatchAnchor>空降.{3,8}+))$"
test_str = ("(注。以下测试样例部分取自某些视频的真实弹幕,少部分是自己编的句子)\n"
"看到我就是两百以后了\n"
"看到我你就不是第一了\n"
"1播放,256赞,B站特色\n"
"一分钟!!!312个赞\n"
"两分钟热热乎乎\n"
"600播放500赞\n"
"第一名是我\n"
"第838个赞我收下了\n"
"天哪那,第四个转发!!!\n"
"4分钟\n"
"热乎\n"
"183条弹幕\n"
"十分钟的,还热乎啊,真香\n"
"关上前500发弹幕的大门\n"
"怎么过一会儿就18个转发了\n"
"关上前10000播放量的大门\n"
"热乎25min\n"
"8888点赞\n"
"12个\n"
"第4\n"
"来了来了,1分钟!\n"
"热乎 前2000\n"
"我迟到了10min!!!\n"
"有10只\n"
"2分钟,烫\n"
"2021第一天就这么刺激\n"
"借刀杀人*2\n"
"2333333\n"
"66666666\n"
"2\n"
"1\n"
"2:42\n"
"出息了\n"
"7分钟 很快啊!\n"
"一番定夺\n"
"第一哈哈哈\n"
"半小时,冻死了\n"
"空降00:00\n"
"牛顿第二定律\n"
"你这到底是保护村庄还是破坏村庄来了\n"
"燃起来了!\n"
"来了来了\n"
"很残酷的事实:在未命名的情况下不管你杀不杀末影螨它都会在两分钟后消失\n\n"
"(以下是误杀的例子。注意,这一些误杀并不准备修复,毕竟正则是读不懂人话的。)\n"
"三分钟后会消失\n"
"(。。。我找不到误杀的例子了。。。)\n"
"刚刚,放进岩浆里还能吃\n"
"(哇这个竟然没有屏蔽!想办法修复中哇。。。)")
matches = re.finditer(regex, test_str, re.MULTILINE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html