@@ -36,6 +36,10 @@ static bool S_last_line_blank(const cmark_node *node) {
3636 return (node -> flags & CMARK_NODE__LAST_LINE_BLANK ) != 0 ;
3737}
3838
39+ static bool S_last_line_checked (const cmark_node * node ) {
40+ return (node -> flags & CMARK_NODE__LAST_LINE_CHECKED ) != 0 ;
41+ }
42+
3943static CMARK_INLINE cmark_node_type S_type (const cmark_node * node ) {
4044 return (cmark_node_type )node -> type ;
4145}
@@ -47,6 +51,10 @@ static void S_set_last_line_blank(cmark_node *node, bool is_blank) {
4751 node -> flags &= ~CMARK_NODE__LAST_LINE_BLANK ;
4852}
4953
54+ static void S_set_last_line_checked (cmark_node * node ) {
55+ node -> flags |= CMARK_NODE__LAST_LINE_CHECKED ;
56+ }
57+
5058static CMARK_INLINE bool S_is_line_end_char (char c ) {
5159 return (c == '\n' || c == '\r' );
5260}
@@ -121,8 +129,6 @@ static void cmark_parser_reset(cmark_parser *parser) {
121129 parser -> root = document ;
122130 parser -> current = document ;
123131
124- parser -> last_buffer_ended_with_cr = false;
125-
126132 parser -> syntax_extensions = saved_exts ;
127133 parser -> inline_syntax_extensions = saved_inline_exts ;
128134 parser -> options = saved_options ;
@@ -234,26 +240,43 @@ static void remove_trailing_blank_lines(cmark_strbuf *ln) {
234240
235241// Check to see if a node ends with a blank line, descending
236242// if needed into lists and sublists.
237- static bool ends_with_blank_line (cmark_node * node ) {
238- cmark_node * cur = node ;
239- while (cur != NULL ) {
240- if (S_last_line_blank (cur )) {
241- return true;
242- }
243- if (S_type (cur ) == CMARK_NODE_LIST || S_type (cur ) == CMARK_NODE_ITEM ) {
244- cur = cur -> last_child ;
245- } else {
246- cur = NULL ;
247- }
243+ static bool S_ends_with_blank_line (cmark_node * node ) {
244+ if (S_last_line_checked (node )) {
245+ return (S_last_line_blank (node ));
246+ } else if ((S_type (node ) == CMARK_NODE_LIST ||
247+ S_type (node ) == CMARK_NODE_ITEM ) && node -> last_child ) {
248+ S_set_last_line_checked (node );
249+ return (S_ends_with_blank_line (node -> last_child ));
250+ } else {
251+ S_set_last_line_checked (node );
252+ return (S_last_line_blank (node ));
248253 }
249- return false;
254+ }
255+
256+ // returns true if content remains after link defs are resolved.
257+ static bool resolve_reference_link_definitions (
258+ cmark_parser * parser ,
259+ cmark_node * b ) {
260+ bufsize_t pos ;
261+ cmark_strbuf * node_content = & b -> content ;
262+ cmark_chunk chunk = {node_content -> ptr , node_content -> size , 0 };
263+ while (chunk .len && chunk .data [0 ] == '[' &&
264+ (pos = cmark_parse_reference_inline (parser -> mem , & chunk ,
265+ parser -> refmap ))) {
266+
267+ chunk .data += pos ;
268+ chunk .len -= pos ;
269+ }
270+ cmark_strbuf_drop (node_content , (node_content -> size - chunk .len ));
271+ return !is_blank (& b -> content , 0 );
250272}
251273
252274static cmark_node * finalize (cmark_parser * parser , cmark_node * b ) {
253275 bufsize_t pos ;
254276 cmark_node * item ;
255277 cmark_node * subitem ;
256278 cmark_node * parent ;
279+ bool has_content ;
257280
258281 parent = b -> parent ;
259282 assert (b -> flags &
@@ -283,15 +306,8 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
283306 switch (S_type (b )) {
284307 case CMARK_NODE_PARAGRAPH :
285308 {
286- cmark_chunk chunk = {node_content -> ptr , node_content -> size , 0 };
287- while (chunk .len && chunk .data [0 ] == '[' &&
288- (pos = cmark_parse_reference_inline (parser -> mem , & chunk , parser -> refmap ))) {
289-
290- chunk .data += pos ;
291- chunk .len -= pos ;
292- }
293- cmark_strbuf_drop (node_content , (node_content -> size - chunk .len ));
294- if (is_blank (node_content , 0 )) {
309+ has_content = resolve_reference_link_definitions (parser , b );
310+ if (!has_content ) {
295311 // remove blank node (former reference def)
296312 cmark_node_free (b );
297313 }
@@ -343,7 +359,8 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
343359 // spaces between them:
344360 subitem = item -> first_child ;
345361 while (subitem ) {
346- if (ends_with_blank_line (subitem ) && (item -> next || subitem -> next )) {
362+ if ((item -> next || subitem -> next ) &&
363+ S_ends_with_blank_line (subitem )) {
347364 b -> as .list .tight = false;
348365 break ;
349366 }
@@ -748,6 +765,40 @@ static void chop_trailing_hashtags(cmark_chunk *ch) {
748765 }
749766}
750767
768+ // Check for thematic break. On failure, return 0 and update
769+ // thematic_break_kill_pos with the index at which the
770+ // parse fails. On success, return length of match.
771+ // "...three or more hyphens, asterisks,
772+ // or underscores on a line by themselves. If you wish, you may use
773+ // spaces between the hyphens or asterisks."
774+ static int S_scan_thematic_break (cmark_parser * parser , cmark_chunk * input ,
775+ bufsize_t offset ) {
776+ bufsize_t i ;
777+ char c ;
778+ char nextc = '\0' ;
779+ int count ;
780+ i = offset ;
781+ c = peek_at (input , i );
782+ if (!(c == '*' || c == '_' || c == '-' )) {
783+ parser -> thematic_break_kill_pos = i ;
784+ return 0 ;
785+ }
786+ count = 1 ;
787+ while ((nextc = peek_at (input , ++ i ))) {
788+ if (nextc == c ) {
789+ count ++ ;
790+ } else if (nextc != ' ' && nextc != '\t' ) {
791+ break ;
792+ }
793+ }
794+ if (count >= 3 && (nextc == '\r' || nextc == '\n' )) {
795+ return (i - offset ) + 1 ;
796+ } else {
797+ parser -> thematic_break_kill_pos = i ;
798+ return 0 ;
799+ }
800+ }
801+
751802// Find first nonspace character from current offset, setting
752803// parser->first_nonspace, parser->first_nonspace_column,
753804// parser->indent, and parser->blank. Does not advance parser->offset.
@@ -1040,6 +1091,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
10401091 bufsize_t matched = 0 ;
10411092 int lev = 0 ;
10421093 bool save_partially_consumed_tab ;
1094+ bool has_content ;
10431095 int save_offset ;
10441096 int save_column ;
10451097
@@ -1112,13 +1164,20 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
11121164 } else if (!indented && cont_type == CMARK_NODE_PARAGRAPH &&
11131165 (lev =
11141166 scan_setext_heading_line (input , parser -> first_nonspace ))) {
1115- (* container )-> type = (uint16_t )CMARK_NODE_HEADING ;
1116- (* container )-> as .heading .level = lev ;
1117- (* container )-> as .heading .setext = true;
1118- S_advance_offset (parser , input , input -> len - 1 - parser -> offset , false);
1167+ // finalize paragraph, resolving reference links
1168+ has_content = resolve_reference_link_definitions (parser , * container );
1169+
1170+ if (has_content ) {
1171+
1172+ (* container )-> type = (uint16_t )CMARK_NODE_HEADING ;
1173+ (* container )-> as .heading .level = lev ;
1174+ (* container )-> as .heading .setext = true;
1175+ S_advance_offset (parser , input , input -> len - 1 - parser -> offset , false);
1176+ }
11191177 } else if (!indented &&
11201178 !(cont_type == CMARK_NODE_PARAGRAPH && !all_matched ) &&
1121- (matched = scan_thematic_break (input , parser -> first_nonspace ))) {
1179+ (parser -> thematic_break_kill_pos <= parser -> first_nonspace ) &&
1180+ (matched = S_scan_thematic_break (parser , input , parser -> first_nonspace ))) {
11221181 // it's only now that we know the line is not part of a setext heading:
11231182 * container = add_child (parser , * container , CMARK_NODE_THEMATIC_BREAK ,
11241183 parser -> first_nonspace + 1 );
@@ -1377,6 +1436,7 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
13771436 parser -> column = 0 ;
13781437 parser -> first_nonspace = 0 ;
13791438 parser -> first_nonspace_column = 0 ;
1439+ parser -> thematic_break_kill_pos = 0 ;
13801440 parser -> indent = 0 ;
13811441 parser -> blank = false;
13821442 parser -> partially_consumed_tab = false;
0 commit comments