@@ -12,9 +12,14 @@ local Cs = lpeg.Cs
1212local Ct = lpeg .Ct
1313
1414local core = require " lpeg_patterns.core"
15+
16+ local printable_character = lpeg .R (" \33\126 " )
17+
18+ local ALPHA = core .ALPHA
1519local CHAR = core .CHAR
1620local CRLF = core .CRLF
1721local CTL = core .CTL
22+ local DIGIT = core .DIGIT
1823local DQUOTE = core .DQUOTE
1924local WSP = core .WSP
2025local VCHAR = core .VCHAR
@@ -51,14 +56,15 @@ local quoted_string = CFWS^-1 * quoted_string_text * CFWS^-1
5156
5257-- Miscellaneous Tokens
5358local word = atom + quoted_string
54- local obs_phrase = C (word * (word + P " ." + CFWS ) ^ 0 / function () end )
59+ local obs_phrase = Cs (word / 1 * (word / 1 + P " ." + CFWS / " " ) ^ 0 )
5560local phrase = obs_phrase -- obs_phrase is more broad than `word^1`, it's really the same but allows "."
5661
5762-- Addr-spec
5863local obs_dtext = obs_NO_WS_CTL + quoted_pair
5964local dtext = R (" \33\90 " , " \94\126 " ) + obs_dtext
6065local domain_literal_text = P " [" * Cs ((FWS ^- 1 * dtext )^ 0 * FWS ^- 1 ) * P " ]"
6166
67+ -- A non-spec-compliant variant that does not allow comments or folding whitespace
6268local domain_text = dot_atom_text + domain_literal_text
6369local local_part_text = dot_atom_text + quoted_string_text
6470local addr_spec_text = local_part_text * P " @" * domain_text
@@ -68,24 +74,131 @@ local obs_domain = Ct(atom * (C"." * atom)^0) / table.concat
6874local domain = obs_domain + dot_atom + domain_literal
6975local obs_local_part = Ct (word * (C " ." * word )^ 0 ) / table.concat
7076local local_part = obs_local_part + dot_atom + quoted_string
71- local addr_spec = local_part * P " @" * domain
77+ local addr_spec = Cg ( local_part , " local-part " ) * P " @" * Cg ( domain , " domain " )
7278
7379local display_name = phrase
7480local obs_domain_list = (CFWS + P " ," )^ 0 * P " @" * domain
7581 * (P " ," * CFWS ^- 1 * (P " @" * domain )^- 1 )^ 0
76- local obs_route = Cg ( Ct (obs_domain_list ) * P " :" , " route " )
77- local obs_angle_addr = CFWS ^- 1 * P " <" * obs_route * addr_spec * P " >" * CFWS ^- 1
78- local angle_addr = CFWS ^- 1 * P " <" * addr_spec * P " >" * CFWS ^- 1
82+ local obs_route = Ct (obs_domain_list ) * P " :"
83+ local obs_angle_addr = CFWS ^- 1 * P " <" * Cg ( obs_route , " route " ) * C ( addr_spec ) * P " >" * CFWS ^- 1
84+ local angle_addr = CFWS ^- 1 * P " <" * C ( addr_spec ) * P " >" * CFWS ^- 1
7985 + obs_angle_addr
80- local name_addr = Cg (display_name , " display" )^- 1 * angle_addr
81- local mailbox = name_addr + addr_spec
86+ local name_addr = Cg (display_name ^- 1 , " display" ) * angle_addr
87+ local mailbox = name_addr + C (addr_spec )
88+
89+ -- https://www.rfc-editor.org/rfc/rfc5322#section-2.2
90+ -- Header fields are lines beginning with a field name, followed by a
91+ -- colon (":"), followed by a field body, and terminated by CRLF. A
92+ -- field name MUST be composed of printable US-ASCII characters (i.e.,
93+ -- characters that have values between 33 and 126, inclusive), except
94+ -- colon. A field body may be composed of printable US-ASCII characters
95+ -- as well as the space (SP, ASCII value 32) and horizontal tab (HTAB,
96+ -- ASCII value 9) characters (together known as the white space
97+ -- characters, WSP). A field body MUST NOT include CR and LF except
98+ -- when used in "folding" and "unfolding", as described in section
99+ -- 2.2.3. All field bodies MUST conform to the syntax described in
100+ -- sections 3 and 4 of this specification.
101+ -- local ftext = S("\33\57","\59\126")
102+ -- local field_name = ftext^1
103+ -- local header_field_body = R("\33\126") + FWS
104+
105+ -- RFC 5322 Section 3.4
106+ local obs_mbox_list = (CFWS ^- 1 * P " ," )^ 0 * Ct (mailbox ) * (P " ," * (Ct (mailbox ) + CFWS )^- 1 )^ 0
107+ -- mailbox_list is a super-set of obs_mbox_list that allowed empty fields
108+ local mailbox_list = obs_mbox_list
109+ local obs_group_list = (CFWS ^- 1 * P " ," )^ 1 * CFWS ^- 1
110+ local group_list = mailbox_list + CFWS + obs_group_list
111+ local group = Cg (display_name , " display" ) * P " :" * Cg (Ct (group_list ^- 1 ), " members" ) * P " ;" * CFWS ^- 1
112+ local address = mailbox + group
113+ local obs_addr_list = (CFWS ^- 1 * P " ," )^ 0 * Ct (address ) * (P " ," * (Ct (address ) + CFWS )^- 1 )^ 0
114+ -- address_list is a super-set of obs_addr_list that allowed empty fields
115+ local address_list = obs_addr_list
116+
117+ -- RFC 5322 Section 4.5.4
118+ local obs_id_left = local_part
119+ local obs_id_right = domain
120+
121+ -- RFC 5322 Section 3.6.4
122+ local no_fold_literal = P " [" * dtext ^ 0 * P " ]"
123+ local id_left = dot_atom_text + obs_id_left
124+ local id_right = dot_atom_text + no_fold_literal + obs_id_right
125+ -- Semantically, the angle bracket characters are not part of the
126+ -- msg-id; the msg-id is what is contained between the two angle bracket
127+ -- characters.
128+ local msg_id = CFWS ^- 1 * P " <" * C (id_left * P " @" * id_right ) * " >" * CFWS ^- 1
129+
130+ -- RFC 5987
131+ local mime_charsetc = ALPHA + DIGIT + S " !#$%&+-^_`{}~"
132+ local mime_charset = C (mime_charsetc ^ 1 )
133+
134+ -- RFC 2047
135+ local charset = mime_charset / string.lower
136+ local encoding = mime_charset / string.lower
137+ local encoded_text = (printable_character - S " ?" )^ 1
138+ local encoded_word = P " =?" * charset * P " ?" * encoding * P " ?" * C (encoded_text ) * P " ?="
82139
83140return {
84- local_part = local_part ;
141+ obs_NO_WS_CTL = obs_NO_WS_CTL ;
142+ obs_qp = obs_qp ;
143+ quoted_pair = quoted_pair ;
144+ FWS = FWS ;
145+ ctext = ctext ;
146+ comment = comment ;
147+ CFWS = CFWS ;
148+ specials = specials ;
149+ atext = atext ;
150+ atom = atom ;
151+ dot_atom_text = dot_atom_text ;
152+ dot_atom = dot_atom ;
153+ qtext = qtext ;
154+ qcontent = qcontent ;
155+ quoted_string_text = quoted_string_text ;
156+ quoted_string = quoted_string ;
157+ word = word ;
158+ obs_phrase = obs_phrase ;
159+ phrase = phrase ;
160+ obs_dtext = obs_dtext ;
161+ dtext = dtext ;
162+ domain_literal_text = domain_literal_text ;
163+ domain_literal = domain_literal ;
164+ obs_domain = obs_domain ;
85165 domain = domain ;
86- email = addr_spec ;
166+ obs_local_part = obs_local_part ;
167+ local_part = local_part ;
168+ addr_spec = addr_spec ;
169+ display_name = display_name ;
170+ obs_domain_list = obs_domain_list ;
171+ obs_route = obs_route ;
172+ obs_angle_addr = obs_angle_addr ;
173+ angle_addr = angle_addr ;
87174 name_addr = name_addr ;
88175 mailbox = mailbox ;
176+ -- ftext = ftext;
177+ -- field_name = field_name;
178+ -- header_field_body = header_field_body;
179+ obs_mbox_list = obs_mbox_list ;
180+ mailbox_list = mailbox_list ;
181+ obs_group_list = obs_group_list ;
182+ group_list = group_list ;
183+ group = group ;
184+ address = address ;
185+ obs_addr_list = obs_addr_list ;
186+ address_list = address_list ;
187+ obs_id_left = obs_id_left ;
188+ obs_id_right = obs_id_right ;
189+ no_fold_literal = no_fold_literal ;
190+ id_left = id_left ;
191+ id_right = id_right ;
192+ msg_id = msg_id ;
193+ mime_charsetc = mime_charsetc ;
194+ mime_charset = mime_charset ;
195+ charset = charset ;
196+ encoding = encoding ;
197+ encoded_text = encoded_text ;
198+ encoded_word = encoded_word ;
199+
200+ -- Handy alias
201+ email = addr_spec ;
89202
90203 -- A variant that does not allow comments or folding whitespace
91204 local_part_nocfws = local_part_text ;
0 commit comments