# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"""
(?:
(?<!\S)@ # Match "@" only if it's not preceded by a non-whitespace character
| # OR
(?:
# Telegram link types reference: https://core.telegram.org/api/links
#
(?:https?://)? # Optional HTTP or HTTPS protocol
(?:www\.)? # www subdomain
(?:t\.me|telegram\.(?:me|dog)) # Match "t.me", "telegram.me", or "telegram.dog" domains
/ # Ensure a forward slash after the domain
| # OR
tg://resolve\?domain= # Match Telegram deep link schema (tg://resolve?domain=)
)
| # OR
(?=\b # Positive lookahead: Ensure a valid subdomain before username
(?!\w*__) # Disallow double underscores anywhere in the username
(?!\w*_{2,}) # Disallow underscores at the start or end of username
(?:[a-z][a-z0-9_]{3,31}) # Username
(?<!_) # Disallow usernames ending with an underscore
\.t\.me$ # Ensure it ends with ".t.me"
)
)
(?P<username> # Start capturing the username
(?!\w*_{2,}) # Disallow double underscores in the username
(?!\w*[^0-9a-z_.,\s]) # Ensure valid characters (letters, numbers, underscores, dots, commas, spaces)
(?:[a-z][a-z0-9_]{3,31}) # Username
(?<!_) # Disallow username ending with an underscore
\b # Ensure it's a valid word boundary
)
(?:\.t\.me)? # Optional ".t.me" subdomain
"""
test_str = ("@username\n"
"@user_name\n"
"@username123\n"
"@alanbadoev @orkester okinea.t.me\n"
"@username123_, @username123\n"
"@username123, @username123_\n"
"@username123, username123.t.me\n"
"@name_with_underscore\n"
"@user$name\n"
"@user__name\n"
"@username_\n"
"@user name\n"
"@toolongusernameeeeeeeeeeeeeeeeeee\n"
"@123username\n"
"https://t.me/username\n"
"https://www.t.me/username\n"
"http://t.me/username\n"
"t.me/username\n"
"tg://resolve?domain=username\n"
"username.t.me\n"
" username.t.me\n"
"https://t.me/@username\n"
"http://t.me/user__name\n"
"username_.t.me\n"
"user__name.t.me\n")
matches = re.finditer(regex, test_str, re.MULTILINE | re.IGNORECASE | re.VERBOSE)
for matchNum, match in enumerate(matches, start=1):
print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Python, please visit: https://docs.python.org/3/library/re.html