Skip to content

Commit e7565b9

Browse files
committed
WIP
1 parent 6842637 commit e7565b9

File tree

1 file changed

+122
-9
lines changed

1 file changed

+122
-9
lines changed

lpeg_patterns/email.lua

Lines changed: 122 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,14 @@ local Cs = lpeg.Cs
1212
local Ct = lpeg.Ct
1313

1414
local core = require "lpeg_patterns.core"
15+
16+
local printable_character = lpeg.R("\33\126")
17+
18+
local ALPHA = core.ALPHA
1519
local CHAR = core.CHAR
1620
local CRLF = core.CRLF
1721
local CTL = core.CTL
22+
local DIGIT = core.DIGIT
1823
local DQUOTE = core.DQUOTE
1924
local WSP = core.WSP
2025
local VCHAR = core.VCHAR
@@ -51,14 +56,15 @@ local quoted_string = CFWS^-1 * quoted_string_text * CFWS^-1
5156

5257
-- Miscellaneous Tokens
5358
local word = atom + quoted_string
54-
local obs_phrase = C(word * (word + P"." + CFWS)^0 / function() end)
59+
local obs_phrase = Cs(word / 1 * (word / 1 + P"." + CFWS / " ")^0)
5560
local phrase = obs_phrase -- obs_phrase is more broad than `word^1`, it's really the same but allows "."
5661

5762
-- Addr-spec
5863
local obs_dtext = obs_NO_WS_CTL + quoted_pair
5964
local dtext = R("\33\90", "\94\126") + obs_dtext
6065
local domain_literal_text = P"[" * Cs((FWS^-1 * dtext)^0 * FWS^-1) * P"]"
6166

67+
-- A non-spec-compliant variant that does not allow comments or folding whitespace
6268
local domain_text = dot_atom_text + domain_literal_text
6369
local local_part_text = dot_atom_text + quoted_string_text
6470
local addr_spec_text = local_part_text * P"@" * domain_text
@@ -68,24 +74,131 @@ local obs_domain = Ct(atom * (C"." * atom)^0) / table.concat
6874
local domain = obs_domain + dot_atom + domain_literal
6975
local obs_local_part = Ct(word * (C"." * word)^0) / table.concat
7076
local local_part = obs_local_part + dot_atom + quoted_string
71-
local addr_spec = local_part * P"@" * domain
77+
local addr_spec = Cg(local_part, "local-part") * P"@" * Cg(domain, "domain")
7278

7379
local display_name = phrase
7480
local obs_domain_list = (CFWS + P",")^0 * P"@" * domain
7581
* (P"," * CFWS^-1 * (P"@" * domain)^-1)^0
76-
local obs_route = Cg(Ct(obs_domain_list) * P":", "route")
77-
local obs_angle_addr = CFWS^-1 * P"<" * obs_route * addr_spec * P">" * CFWS^-1
78-
local angle_addr = CFWS^-1 * P"<" * addr_spec * P">" * CFWS^-1
82+
local obs_route = Ct(obs_domain_list) * P":"
83+
local obs_angle_addr = CFWS^-1 * P"<" * Cg(obs_route, "route") * C(addr_spec) * P">" * CFWS^-1
84+
local angle_addr = CFWS^-1 * P"<" * C(addr_spec) * P">" * CFWS^-1
7985
+ obs_angle_addr
80-
local name_addr = Cg(display_name, "display")^-1 * angle_addr
81-
local mailbox = name_addr + addr_spec
86+
local name_addr = Cg(display_name^-1, "display") * angle_addr
87+
local mailbox = name_addr + C(addr_spec)
88+
89+
-- https://www.rfc-editor.org/rfc/rfc5322#section-2.2
90+
-- Header fields are lines beginning with a field name, followed by a
91+
-- colon (":"), followed by a field body, and terminated by CRLF. A
92+
-- field name MUST be composed of printable US-ASCII characters (i.e.,
93+
-- characters that have values between 33 and 126, inclusive), except
94+
-- colon. A field body may be composed of printable US-ASCII characters
95+
-- as well as the space (SP, ASCII value 32) and horizontal tab (HTAB,
96+
-- ASCII value 9) characters (together known as the white space
97+
-- characters, WSP). A field body MUST NOT include CR and LF except
98+
-- when used in "folding" and "unfolding", as described in section
99+
-- 2.2.3. All field bodies MUST conform to the syntax described in
100+
-- sections 3 and 4 of this specification.
101+
-- local ftext = S("\33\57","\59\126")
102+
-- local field_name = ftext^1
103+
-- local header_field_body = R("\33\126") + FWS
104+
105+
-- RFC 5322 Section 3.4
106+
local obs_mbox_list = (CFWS^-1 * P",")^0 * Ct(mailbox) * (P"," * (Ct(mailbox) + CFWS)^-1)^0
107+
-- mailbox_list is a super-set of obs_mbox_list that allowed empty fields
108+
local mailbox_list = obs_mbox_list
109+
local obs_group_list = (CFWS^-1 * P",")^1 * CFWS^-1
110+
local group_list = mailbox_list + CFWS + obs_group_list
111+
local group = Cg(display_name, "display") * P":" * Cg(Ct(group_list^-1), "members") * P";" * CFWS^-1
112+
local address = mailbox + group
113+
local obs_addr_list = (CFWS^-1 * P",")^0 * Ct(address) * (P"," * (Ct(address) + CFWS)^-1)^0
114+
-- address_list is a super-set of obs_addr_list that allowed empty fields
115+
local address_list = obs_addr_list
116+
117+
-- RFC 5322 Section 4.5.4
118+
local obs_id_left = local_part
119+
local obs_id_right = domain
120+
121+
-- RFC 5322 Section 3.6.4
122+
local no_fold_literal = P"[" * dtext^0 * P"]"
123+
local id_left = dot_atom_text + obs_id_left
124+
local id_right = dot_atom_text + no_fold_literal + obs_id_right
125+
-- Semantically, the angle bracket characters are not part of the
126+
-- msg-id; the msg-id is what is contained between the two angle bracket
127+
-- characters.
128+
local msg_id = CFWS^-1 * P"<" * C(id_left * P"@" * id_right) * ">" * CFWS^-1
129+
130+
-- RFC 5987
131+
local mime_charsetc = ALPHA + DIGIT + S"!#$%&+-^_`{}~"
132+
local mime_charset = C(mime_charsetc^1)
133+
134+
-- RFC 2047
135+
local charset = mime_charset / string.lower
136+
local encoding = mime_charset / string.lower
137+
local encoded_text = (printable_character - S"?")^1
138+
local encoded_word = P"=?" * charset * P"?" * encoding * P"?" * C(encoded_text) * P"?="
82139

83140
return {
84-
local_part = local_part;
141+
obs_NO_WS_CTL = obs_NO_WS_CTL;
142+
obs_qp = obs_qp;
143+
quoted_pair = quoted_pair;
144+
FWS = FWS;
145+
ctext = ctext;
146+
comment = comment;
147+
CFWS = CFWS;
148+
specials = specials;
149+
atext = atext;
150+
atom = atom;
151+
dot_atom_text = dot_atom_text;
152+
dot_atom = dot_atom;
153+
qtext = qtext;
154+
qcontent = qcontent;
155+
quoted_string_text = quoted_string_text;
156+
quoted_string = quoted_string;
157+
word = word;
158+
obs_phrase = obs_phrase;
159+
phrase = phrase;
160+
obs_dtext = obs_dtext;
161+
dtext = dtext;
162+
domain_literal_text = domain_literal_text;
163+
domain_literal = domain_literal;
164+
obs_domain = obs_domain;
85165
domain = domain;
86-
email = addr_spec;
166+
obs_local_part = obs_local_part;
167+
local_part = local_part;
168+
addr_spec = addr_spec;
169+
display_name = display_name;
170+
obs_domain_list = obs_domain_list;
171+
obs_route = obs_route;
172+
obs_angle_addr = obs_angle_addr;
173+
angle_addr = angle_addr;
87174
name_addr = name_addr;
88175
mailbox = mailbox;
176+
-- ftext = ftext;
177+
-- field_name = field_name;
178+
-- header_field_body = header_field_body;
179+
obs_mbox_list = obs_mbox_list;
180+
mailbox_list = mailbox_list;
181+
obs_group_list = obs_group_list;
182+
group_list = group_list;
183+
group = group;
184+
address = address;
185+
obs_addr_list = obs_addr_list;
186+
address_list = address_list;
187+
obs_id_left = obs_id_left;
188+
obs_id_right = obs_id_right;
189+
no_fold_literal = no_fold_literal;
190+
id_left = id_left;
191+
id_right = id_right;
192+
msg_id = msg_id;
193+
mime_charsetc = mime_charsetc;
194+
mime_charset = mime_charset;
195+
charset = charset;
196+
encoding = encoding;
197+
encoded_text = encoded_text;
198+
encoded_word = encoded_word;
199+
200+
-- Handy alias
201+
email = addr_spec;
89202

90203
-- A variant that does not allow comments or folding whitespace
91204
local_part_nocfws = local_part_text;

0 commit comments

Comments
 (0)