Skip to content

Commit 9418d4d

Browse files
committed
WIP
1 parent 6842637 commit 9418d4d

File tree

1 file changed

+121
-9
lines changed

1 file changed

+121
-9
lines changed

lpeg_patterns/email.lua

Lines changed: 121 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,14 @@ local Cs = lpeg.Cs
1212
local Ct = lpeg.Ct
1313

1414
local core = require "lpeg_patterns.core"
15+
16+
local printable_character = lpeg.R("\33\126")
17+
18+
local ALPHA = core.ALPHA
1519
local CHAR = core.CHAR
1620
local CRLF = core.CRLF
1721
local CTL = core.CTL
22+
local DIGIT = core.DIGIT
1823
local DQUOTE = core.DQUOTE
1924
local WSP = core.WSP
2025
local VCHAR = core.VCHAR
@@ -51,7 +56,7 @@ local quoted_string = CFWS^-1 * quoted_string_text * CFWS^-1
5156

5257
-- Miscellaneous Tokens
5358
local word = atom + quoted_string
54-
local obs_phrase = C(word * (word + P"." + CFWS)^0 / function() end)
59+
local obs_phrase = Cs(word / 1 * (word / 1 + P"." + CFWS / " ")^0)
5560
local phrase = obs_phrase -- obs_phrase is more broad than `word^1`, it's really the same but allows "."
5661

5762
-- Addr-spec
@@ -68,24 +73,131 @@ local obs_domain = Ct(atom * (C"." * atom)^0) / table.concat
6873
local domain = obs_domain + dot_atom + domain_literal
6974
local obs_local_part = Ct(word * (C"." * word)^0) / table.concat
7075
local local_part = obs_local_part + dot_atom + quoted_string
71-
local addr_spec = local_part * P"@" * domain
76+
local addr_spec = Cg(local_part, "local-part") * P"@" * Cg(domain, "domain")
7277

7378
local display_name = phrase
7479
local obs_domain_list = (CFWS + P",")^0 * P"@" * domain
7580
* (P"," * CFWS^-1 * (P"@" * domain)^-1)^0
76-
local obs_route = Cg(Ct(obs_domain_list) * P":", "route")
77-
local obs_angle_addr = CFWS^-1 * P"<" * obs_route * addr_spec * P">" * CFWS^-1
78-
local angle_addr = CFWS^-1 * P"<" * addr_spec * P">" * CFWS^-1
81+
local obs_route = Ct(obs_domain_list) * P":"
82+
local obs_angle_addr = CFWS^-1 * P"<" * Cg(obs_route, "route") * C(addr_spec) * P">" * CFWS^-1
83+
local angle_addr = CFWS^-1 * P"<" * C(addr_spec) * P">" * CFWS^-1
7984
+ obs_angle_addr
80-
local name_addr = Cg(display_name, "display")^-1 * angle_addr
81-
local mailbox = name_addr + addr_spec
85+
local name_addr = Cg(display_name^-1, "display") * angle_addr
86+
local mailbox = name_addr + C(addr_spec)
87+
88+
-- https://www.rfc-editor.org/rfc/rfc5322#section-2.2
89+
-- Header fields are lines beginning with a field name, followed by a
90+
-- colon (":"), followed by a field body, and terminated by CRLF. A
91+
-- field name MUST be composed of printable US-ASCII characters (i.e.,
92+
-- characters that have values between 33 and 126, inclusive), except
93+
-- colon. A field body may be composed of printable US-ASCII characters
94+
-- as well as the space (SP, ASCII value 32) and horizontal tab (HTAB,
95+
-- ASCII value 9) characters (together known as the white space
96+
-- characters, WSP). A field body MUST NOT include CR and LF except
97+
-- when used in "folding" and "unfolding", as described in section
98+
-- 2.2.3. All field bodies MUST conform to the syntax described in
99+
-- sections 3 and 4 of this specification.
100+
-- local ftext = S("\33\57","\59\126")
101+
-- local field_name = ftext^1
102+
-- local header_field_body = R("\33\126") + FWS
103+
104+
-- RFC 5322 Section 3.4
105+
local obs_mbox_list = (CFWS^-1 * P",")^0 * Ct(mailbox) * (P"," * (Ct(mailbox) + CFWS)^-1)^0
106+
-- mailbox_list is a super-set of obs_mbox_list that allowed empty fields
107+
local mailbox_list = obs_mbox_list
108+
local obs_group_list = (CFWS^-1 * P",")^1 * CFWS^-1
109+
local group_list = mailbox_list + CFWS + obs_group_list
110+
local group = Cg(display_name, "display") * P":" * Cg(Ct(group_list^-1), "members") * P";" * CFWS^-1
111+
local address = mailbox + group
112+
local obs_addr_list = (CFWS^-1 * P",")^0 * Ct(address) * (P"," * (Ct(address) + CFWS)^-1)^0
113+
-- address_list is a super-set of obs_addr_list that allowed empty fields
114+
local address_list = obs_addr_list
115+
116+
-- RFC 5322 Section 4.5.4
117+
local obs_id_left = local_part
118+
local obs_id_right = domain
119+
120+
-- RFC 5322 Section 3.6.4
121+
local no_fold_literal = P"[" * dtext^0 * P"]"
122+
local id_left = dot_atom_text + obs_id_left
123+
local id_right = dot_atom_text + no_fold_literal + obs_id_right
124+
-- Semantically, the angle bracket characters are not part of the
125+
-- msg-id; the msg-id is what is contained between the two angle bracket
126+
-- characters.
127+
local msg_id = CFWS^-1 * P"<" * C(id_left * P"@" * id_right) * ">" * CFWS^-1
128+
129+
-- RFC 5987
130+
local mime_charsetc = ALPHA + DIGIT + S"!#$%&+-^_`{}~"
131+
local mime_charset = C(mime_charsetc^1)
132+
133+
-- RFC 2047
134+
local charset = mime_charset / string.lower
135+
local encoding = mime_charset / string.lower
136+
local encoded_text = (printable_character - S"?")^1
137+
local encoded_word = P"=?" * charset * P"?" * encoding * P"?" * C(encoded_text) * P"?="
82138

83139
return {
84-
local_part = local_part;
140+
obs_NO_WS_CTL = obs_NO_WS_CTL;
141+
obs_qp = obs_qp;
142+
quoted_pair = quoted_pair;
143+
FWS = FWS;
144+
ctext = ctext;
145+
comment = comment;
146+
CFWS = CFWS;
147+
specials = specials;
148+
atext = atext;
149+
atom = atom;
150+
dot_atom_text = dot_atom_text;
151+
dot_atom = dot_atom;
152+
qtext = qtext;
153+
qcontent = qcontent;
154+
quoted_string_text = quoted_string_text;
155+
quoted_string = quoted_string;
156+
word = word;
157+
obs_phrase = obs_phrase;
158+
phrase = phrase;
159+
obs_dtext = obs_dtext;
160+
dtext = dtext;
161+
domain_literal_text = domain_literal_text;
162+
domain_text = domain_text;
163+
local_part_text = local_part_text;
164+
addr_spec_text = addr_spec_text;
165+
domain_literal = domain_literal;
166+
obs_domain = obs_domain;
85167
domain = domain;
86-
email = addr_spec;
168+
obs_local_part = obs_local_part;
169+
local_part = local_part;
170+
addr_spec = addr_spec;
171+
display_name = display_name;
172+
obs_domain_list = obs_domain_list;
173+
obs_route = obs_route;
174+
obs_angle_addr = obs_angle_addr;
175+
angle_addr = angle_addr;
87176
name_addr = name_addr;
88177
mailbox = mailbox;
178+
-- ftext = ftext;
179+
-- field_name = field_name;
180+
-- header_field_body = header_field_body;
181+
obs_mbox_list = obs_mbox_list;
182+
mailbox_list = mailbox_list;
183+
obs_group_list = obs_group_list;
184+
group_list = group_list;
185+
group = group;
186+
address = address;
187+
obs_addr_list = obs_addr_list;
188+
address_list = address_list;
189+
obs_id_left = obs_id_left;
190+
obs_id_right = obs_id_right;
191+
no_fold_literal = no_fold_literal;
192+
id_left = id_left;
193+
id_right = id_right;
194+
msg_id = msg_id;
195+
mime_charsetc = mime_charsetc;
196+
mime_charset = mime_charset;
197+
charset = charset;
198+
encoding = encoding;
199+
encoded_text = encoded_text;
200+
encoded_word = encoded_word;
89201

90202
-- A variant that does not allow comments or folding whitespace
91203
local_part_nocfws = local_part_text;

0 commit comments

Comments
 (0)