|
1 | 1 | from .exceptions_types import EmailSyntaxError |
2 | 2 | from .rfc_constants import EMAIL_MAX_LENGTH, LOCAL_PART_MAX_LENGTH, DOMAIN_MAX_LENGTH, \ |
3 | | - DOT_ATOM_TEXT, DOT_ATOM_TEXT_INTL, ATEXT, ATEXT_INTL, ATEXT_HOSTNAME_INTL, DNS_LABEL_LENGTH_LIMIT, DOT_ATOM_TEXT_HOSTNAME, DOMAIN_NAME_REGEX |
| 3 | + DOT_ATOM_TEXT, DOT_ATOM_TEXT_INTL, ATEXT_RE, ATEXT_INTL_RE, ATEXT_HOSTNAME_INTL, DNS_LABEL_LENGTH_LIMIT, DOT_ATOM_TEXT_HOSTNAME, DOMAIN_NAME_REGEX |
4 | 4 |
|
5 | 5 | import re |
6 | 6 | import unicodedata |
@@ -57,44 +57,25 @@ def validate_email_local_part(local, allow_smtputf8=True, allow_empty_local=Fals |
57 | 57 | reason = get_length_reason(local, limit=LOCAL_PART_MAX_LENGTH) |
58 | 58 | raise EmailSyntaxError("The email address is too long before the @-sign {}.".format(reason)) |
59 | 59 |
|
60 | | - # Check for invalid characters. |
61 | | - # (RFC 2822 Section 3.2.4 / RFC 5322 Section 3.2.3, plus RFC 6531 section 3.3 |
62 | | - # if internationalized local parts are allowed) |
63 | | - atext_re = re.compile('[.' + (ATEXT if not allow_smtputf8 else ATEXT_INTL) + ']') |
64 | | - bad_chars = set( |
65 | | - safe_character_display(c) |
66 | | - for c in local |
67 | | - if not atext_re.match(c) |
68 | | - ) |
69 | | - if bad_chars: |
70 | | - raise EmailSyntaxError("The email address contains invalid characters before the @-sign: " + ", ".join(sorted(bad_chars)) + ".") |
71 | | - |
72 | | - # Check for dot errors imposted by the dot-atom rule. |
73 | | - # (RFC 2822 3.2.4) |
74 | | - check_dot_atom(local, 'An email address cannot start with a {}.', 'An email address cannot have a {} immediately before the @-sign.', is_hostname=False) |
75 | | - |
76 | 60 | # Check the local part against the non-internationalized regular expression. |
| 61 | + # Most email addresses match this regex so it's probably fastest to check this first. |
77 | 62 | # (RFC 2822 3.2.4) |
78 | 63 | m = DOT_ATOM_TEXT.match(local) |
79 | 64 | if m: |
| 65 | + # It's valid. |
| 66 | + |
80 | 67 | # Return the local part unchanged and flag that SMTPUTF8 is not needed. |
81 | 68 | return { |
82 | 69 | "local_part": local, |
83 | 70 | "ascii_local_part": local, |
84 | 71 | "smtputf8": False, |
85 | 72 | } |
86 | 73 |
|
87 | | - else: |
88 | | - # The local part failed the ASCII check. Now try the extended internationalized requirements. |
89 | | - # This should already be handled by the bad_chars and check_dot_atom tests above. |
90 | | - # It's the same pattern but with additional characters permitted. |
91 | | - m = DOT_ATOM_TEXT_INTL.match(local) |
92 | | - if not m: |
93 | | - raise EmailSyntaxError("The email address contains invalid characters before the @-sign.") |
94 | | - # It would be valid if internationalized characters were allowed by the caller. |
95 | | - if not allow_smtputf8: |
96 | | - raise EmailSyntaxError("Internationalized characters before the @-sign are not supported.") |
97 | | - |
| 74 | + # The local part failed the ASCII check. Try the extended character set |
| 75 | + # for internationalized addresses. It's the same pattern but with additional |
| 76 | + # characters permitted. |
| 77 | + m = DOT_ATOM_TEXT_INTL.match(local) |
| 78 | + if m and allow_smtputf8: |
98 | 79 | # It's valid. |
99 | 80 |
|
100 | 81 | # RFC 6532 section 3.1 also says that Unicode NFC normalization should be applied, |
@@ -122,6 +103,27 @@ def validate_email_local_part(local, allow_smtputf8=True, allow_empty_local=Fals |
122 | 103 | "smtputf8": True, |
123 | 104 | } |
124 | 105 |
|
| 106 | + # It's not a valid local part either non-internationalized or internationalized. |
| 107 | + # Let's find out why. |
| 108 | + |
| 109 | + # Check for invalid characters. |
| 110 | + # (RFC 2822 Section 3.2.4 / RFC 5322 Section 3.2.3, plus RFC 6531 section 3.3) |
| 111 | + bad_chars = set( |
| 112 | + safe_character_display(c) |
| 113 | + for c in local |
| 114 | + if not (ATEXT_INTL_RE if allow_smtputf8 else ATEXT_RE).match(c) |
| 115 | + ) |
| 116 | + if bad_chars: |
| 117 | + raise EmailSyntaxError("The email address contains invalid characters before the @-sign: " + ", ".join(sorted(bad_chars)) + ".") |
| 118 | + |
| 119 | + # Check for dot errors imposted by the dot-atom rule. |
| 120 | + # (RFC 2822 3.2.4) |
| 121 | + check_dot_atom(local, 'An email address cannot start with a {}.', 'An email address cannot have a {} immediately before the @-sign.', is_hostname=False) |
| 122 | + |
| 123 | + # All of the reasons should already have been checked, but just in case |
| 124 | + # we have a fallback message. |
| 125 | + raise EmailSyntaxError("The email address contains invalid characters before the @-sign.") |
| 126 | + |
125 | 127 |
|
126 | 128 | def check_unsafe_chars(s): |
127 | 129 | # Check for unsafe characters or characters that would make the string |
|
0 commit comments