1212
1313
1414#define CHAR_EOF -1
15- #define CHAR_NULL 0
1615#define CHAR_BANG 33
1716#define CHAR_DOUBLE 34
1817#define CHAR_PERCENT 37
2423#define CHAR_GT 62
2524#define CHAR_QUESTION 63
2625#define CHAR_RIGHTB 93
27- #define CHAR_TICK 96
2826
2927/* prototypes */
3028
@@ -43,7 +41,6 @@ static int h5_state_before_attribute_name(h5_state_t* hs);
4341static int h5_state_before_attribute_value (h5_state_t * hs );
4442static int h5_state_attribute_value_double_quote (h5_state_t * hs );
4543static int h5_state_attribute_value_single_quote (h5_state_t * hs );
46- static int h5_state_attribute_value_back_quote (h5_state_t * hs );
4744static int h5_state_attribute_value_no_quote (h5_state_t * hs );
4845static int h5_state_after_attribute_value_quoted_state (h5_state_t * hs );
4946static int h5_state_comment (h5_state_t * hs );
@@ -63,28 +60,16 @@ static int h5_state_doctype(h5_state_t* hs);
6360/**
6461 * public function
6562 */
66- void libinjection_h5_init (h5_state_t * hs , const char * s , size_t len , enum html5_flags flags )
63+ void libinjection_h5_init (h5_state_t * hs , const char * s , size_t len , int flags )
6764{
6865 memset (hs , 0 , sizeof (h5_state_t ));
6966 hs -> s = s ;
7067 hs -> len = len ;
71-
72- switch (flags ) {
73- case DATA_STATE :
68+ hs -> state = h5_state_data ;
69+ if (flags == 0 ) {
7470 hs -> state = h5_state_data ;
75- break ;
76- case VALUE_NO_QUOTE :
77- hs -> state = h5_state_before_attribute_name ;
78- break ;
79- case VALUE_SINGLE_QUOTE :
80- hs -> state = h5_state_attribute_value_single_quote ;
81- break ;
82- case VALUE_DOUBLE_QUOTE :
83- hs -> state = h5_state_attribute_value_double_quote ;
84- break ;
85- case VALUE_BACK_QUOTE :
86- hs -> state = h5_state_attribute_value_back_quote ;
87- break ;
71+ } else {
72+ assert (0 );
8873 }
8974}
9075
@@ -100,18 +85,10 @@ int libinjection_h5_next(h5_state_t* hs)
10085/**
10186 * Everything below here is private
10287 *
103- */
104-
88+ */
10589
10690static int h5_is_white (char ch )
10791{
108- /*
109- * \t = horizontal tab = 0x09
110- * \n = newline = 0x0A
111- * \v = vertical tab = 0x0B
112- * \f = form feed = 0x0C
113- * \r = cr = 0x0D
114- */
11592 return strchr (" \t\n\v\f\r" , ch ) != NULL ;
11693}
11794
@@ -120,17 +97,9 @@ static int h5_skip_white(h5_state_t* hs)
12097 char ch ;
12198 while (hs -> pos < hs -> len ) {
12299 ch = hs -> s [hs -> pos ];
123- switch (ch ) {
124- case 0x00 : /* IE only */
125- case 0x20 :
126- case 0x09 :
127- case 0x0A :
128- case 0x0B : /* IE only */
129- case 0x0C :
130- case 0x0D : /* IE only */
100+ if (ch == ' ' ) {
131101 hs -> pos += 1 ;
132- break ;
133- default :
102+ } else {
134103 return ch ;
135104 }
136105 }
@@ -198,9 +167,6 @@ static int h5_state_tag_open(h5_state_t* hs)
198167 return h5_state_bogus_comment2 (hs );
199168 } else if ((ch >= 'a' && ch <= 'z' ) || (ch >= 'A' && ch <= 'Z' )) {
200169 return h5_state_tag_name (hs );
201- } else if (ch == CHAR_NULL ) {
202- /* IE-ism NULL characters are ignored */
203- return h5_state_tag_name (hs );
204170 } else {
205171 /* user input mistake in configuring state */
206172 if (hs -> pos == 0 ) {
@@ -231,9 +197,7 @@ static int h5_state_end_tag_open(h5_state_t* hs)
231197 } else if ((ch >= 'a' && ch <= 'z' ) || (ch >= 'A' && ch <= 'Z' )) {
232198 return h5_state_tag_name (hs );
233199 }
234-
235- hs -> is_close = 0 ;
236- return h5_state_bogus_comment (hs );
200+ return h5_state_data (hs );
237201}
238202/*
239203 *
@@ -267,12 +231,7 @@ static int h5_state_tag_name(h5_state_t* hs)
267231 pos = hs -> pos ;
268232 while (pos < hs -> len ) {
269233 ch = hs -> s [pos ];
270- if (ch == 0 ) {
271- /* special non-standard case */
272- /* allow nulls in tag name */
273- /* some old browsers apparently allow and ignore them */
274- pos += 1 ;
275- } else if (h5_is_white (ch )) {
234+ if (h5_is_white (ch )) {
276235 hs -> token_start = hs -> s + hs -> pos ;
277236 hs -> token_len = pos - hs -> pos ;
278237 hs -> token_type = TAG_NAME_OPEN ;
@@ -340,7 +299,7 @@ static int h5_state_before_attribute_name(h5_state_t* hs)
340299 default : {
341300 return h5_state_attribute_name (hs );
342301 }
343- }
302+ }
344303}
345304
346305static int h5_state_attribute_name (h5_state_t * hs )
@@ -349,7 +308,7 @@ static int h5_state_attribute_name(h5_state_t* hs)
349308 size_t pos ;
350309
351310 TRACE ();
352- pos = hs -> pos + 1 ;
311+ pos = hs -> pos ;
353312 while (pos < hs -> len ) {
354313 ch = hs -> s [pos ];
355314 if (h5_is_white (ch )) {
@@ -399,19 +358,21 @@ static int h5_state_attribute_name(h5_state_t* hs)
399358static int h5_state_after_attribute_name (h5_state_t * hs )
400359{
401360 int c ;
361+ size_t pos ;
402362
403363 TRACE ();
364+ pos = hs -> pos ;
404365 c = h5_skip_white (hs );
405366 switch (c ) {
406367 case CHAR_EOF : {
407368 return 0 ;
408369 }
409370 case CHAR_SLASH : {
410- hs -> pos += 1 ;
371+ hs -> pos = pos + 1 ;
411372 return h5_state_self_closing_start_tag (hs );
412373 }
413374 case CHAR_EQUALS : {
414- hs -> pos += 1 ;
375+ hs -> pos = pos + 1 ;
415376 return h5_state_before_attribute_value (hs );
416377 }
417378 case CHAR_GT : {
@@ -442,9 +403,6 @@ static int h5_state_before_attribute_value(h5_state_t* hs)
442403 return h5_state_attribute_value_double_quote (hs );
443404 } else if (c == CHAR_SINGLE ) {
444405 return h5_state_attribute_value_single_quote (hs );
445- } else if (c == CHAR_TICK ) {
446- /* NON STANDARD IE */
447- return h5_state_attribute_value_back_quote (hs );
448406 } else {
449407 return h5_state_attribute_value_no_quote (hs );
450408 }
@@ -457,16 +415,8 @@ static int h5_state_attribute_value_quote(h5_state_t* hs, char qchar)
457415
458416 TRACE ();
459417
460- /* skip initial quote in normal case.
461- * don't do this "if (pos == 0)" since it means we have started
462- * in a non-data state. given an input of '><foo
463- * we want to make 0-length attribute name
464- */
465- if (hs -> pos > 0 ) {
466- hs -> pos += 1 ;
467- }
468-
469-
418+ /* skip quote */
419+ hs -> pos += 1 ;
470420 idx = (const char * ) memchr (hs -> s + hs -> pos , qchar , hs -> len - hs -> pos );
471421 if (idx == NULL ) {
472422 hs -> token_start = hs -> s + hs -> pos ;
@@ -497,13 +447,6 @@ int h5_state_attribute_value_single_quote(h5_state_t* hs)
497447 return h5_state_attribute_value_quote (hs , CHAR_SINGLE );
498448}
499449
500- static
501- int h5_state_attribute_value_back_quote (h5_state_t * hs )
502- {
503- TRACE ();
504- return h5_state_attribute_value_quote (hs , CHAR_TICK );
505- }
506-
507450static int h5_state_attribute_value_no_quote (h5_state_t * hs )
508451{
509452 char ch ;
@@ -713,13 +656,10 @@ static int h5_state_comment(h5_state_t* hs)
713656 char ch ;
714657 const char * idx ;
715658 size_t pos ;
716- size_t offset ;
717- const char * end = hs -> s + hs -> len ;
718659
719660 TRACE ();
720661 pos = hs -> pos ;
721662 while (1 ) {
722-
723663 idx = (const char * ) memchr (hs -> s + pos , CHAR_DASH , hs -> len - pos );
724664
725665 /* did not find anything or has less than 3 chars left */
@@ -730,62 +670,21 @@ static int h5_state_comment(h5_state_t* hs)
730670 hs -> token_type = TAG_COMMENT ;
731671 return 1 ;
732672 }
733- offset = 1 ;
734-
735- /* skip all nulls */
736- while (idx + offset < end && * (idx + offset ) == 0 ) {
737- offset += 1 ;
738- }
739- if (idx + offset == end ) {
740- hs -> state = h5_state_eof ;
741- hs -> token_start = hs -> s + hs -> pos ;
742- hs -> token_len = hs -> len - hs -> pos ;
743- hs -> token_type = TAG_COMMENT ;
744- return 1 ;
745- }
746-
747- ch = * (idx + offset );
673+ ch = * (idx + 1 );
748674 if (ch != CHAR_DASH && ch != CHAR_BANG ) {
749675 pos = (size_t )(idx - hs -> s ) + 1 ;
750676 continue ;
751677 }
752-
753- /* need to test */
754- #if 0
755- /* skip all nulls */
756- while (idx + offset < end && * (idx + offset ) == 0 ) {
757- offset += 1 ;
758- }
759- if (idx + offset == end ) {
760- hs -> state = h5_state_eof ;
761- hs -> token_start = hs -> s + hs -> pos ;
762- hs -> token_len = hs -> len - hs -> pos ;
763- hs -> token_type = TAG_COMMENT ;
764- return 1 ;
765- }
766- #endif
767-
768- offset += 1 ;
769- if (idx + offset == end ) {
770- hs -> state = h5_state_eof ;
771- hs -> token_start = hs -> s + hs -> pos ;
772- hs -> token_len = hs -> len - hs -> pos ;
773- hs -> token_type = TAG_COMMENT ;
774- return 1 ;
775- }
776-
777-
778- ch = * (idx + offset );
678+ ch = * (idx + 2 );
779679 if (ch != CHAR_GT ) {
780680 pos = (size_t )(idx - hs -> s ) + 1 ;
781681 continue ;
782682 }
783- offset += 1 ;
784683
785684 /* ends in --> or -!> */
786685 hs -> token_start = hs -> s + hs -> pos ;
787686 hs -> token_len = (size_t )(idx - hs -> s ) - hs -> pos ;
788- hs -> pos = (size_t )(idx + offset - hs -> s );
687+ hs -> pos = (size_t )(idx - hs -> s ) + 3 ;
789688 hs -> state = h5_state_data ;
790689 hs -> token_type = TAG_COMMENT ;
791690 return 1 ;
0 commit comments