Fixed the problem of parsing chunks when there was a script tag.

lexborisov · lexborisov · commit 25fef09c5fd9 · 2018-08-18T21:49:56.000+03:00
diff --git a/source/myhtml/tokenizer_script.c b/source/myhtml/tokenizer_script.c
@@ -104,7 +104,7 @@ size_t myhtml_tokenizer_state_script_data_end_tag_name(myhtml_tree_t* tree, myht
     {
         if(myhtml_whithspace(html[html_offset], ==, ||))
         {
-            if((html_offset - token_node->str.length) != 6) {
+            if(((html_offset + tree->global_offset) - token_node->str.length) != 6) {
                 myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_SCRIPT_DATA;
                 html_offset++;
                 break;
@@ -121,10 +121,10 @@ size_t myhtml_tokenizer_state_script_data_end_tag_name(myhtml_tree_t* tree, myht
                     return 0;
                 }
                 
-                token_node->raw_begin     = tmp_size;
-                token_node->raw_length    = 6;
-                token_node->tag_id   = MyHTML_TAG_SCRIPT;
-                token_node->type          = MyHTML_TOKEN_TYPE_CLOSE;
+                token_node->raw_begin = tmp_size;
+                token_node->raw_length = 6;
+                token_node->tag_id = MyHTML_TAG_SCRIPT;
+                token_node->type = MyHTML_TOKEN_TYPE_CLOSE;
                 
                 myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_BEFORE_ATTRIBUTE_NAME;
             }
@@ -137,7 +137,7 @@ size_t myhtml_tokenizer_state_script_data_end_tag_name(myhtml_tree_t* tree, myht
         }
         else if(html[html_offset] == '/')
         {
-            if((html_offset - token_node->str.length) != 6) {
+            if(((html_offset + tree->global_offset) - token_node->str.length) != 6) {
                 myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_SCRIPT_DATA;
                 html_offset++;
                 break;
@@ -154,10 +154,10 @@ size_t myhtml_tokenizer_state_script_data_end_tag_name(myhtml_tree_t* tree, myht
                     return 0;
                 }
                 
-                token_node->raw_begin   = tmp_size;
-                token_node->raw_length  = 6;
+                token_node->raw_begin = tmp_size;
+                token_node->raw_length = 6;
                 token_node->tag_id = MyHTML_TAG_SCRIPT;
-                token_node->type        = MyHTML_TOKEN_TYPE_CLOSE|MyHTML_TOKEN_TYPE_CLOSE_SELF;
+                token_node->type = MyHTML_TOKEN_TYPE_CLOSE|MyHTML_TOKEN_TYPE_CLOSE_SELF;
                 
                 myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_BEFORE_ATTRIBUTE_NAME;
             }
@@ -170,7 +170,7 @@ size_t myhtml_tokenizer_state_script_data_end_tag_name(myhtml_tree_t* tree, myht
         }
         else if(html[html_offset] == '>')
         {
-            if((html_offset - token_node->str.length) != 6) {
+            if(((html_offset + tree->global_offset) - token_node->str.length) != 6) {
                 myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_SCRIPT_DATA;
                 html_offset++;
                 break;
@@ -187,10 +187,10 @@ size_t myhtml_tokenizer_state_script_data_end_tag_name(myhtml_tree_t* tree, myht
                     return 0;
                 }
                 
-                token_node->raw_begin   = tmp_size;
-                token_node->raw_length  = 6;
+                token_node->raw_begin = tmp_size;
+                token_node->raw_length = 6;
                 token_node->tag_id = MyHTML_TAG_SCRIPT;
-                token_node->type        = MyHTML_TOKEN_TYPE_CLOSE;
+                token_node->type  = MyHTML_TOKEN_TYPE_CLOSE;
                 
                 html_offset++;
                 
@@ -280,7 +280,7 @@ size_t myhtml_tokenizer_state_script_data_escaped_end_tag_name(myhtml_tree_t* tr
     {
         if(myhtml_whithspace(html[html_offset], ==, ||))
         {
-            if((html_offset - token_node->str.length) != 6) {
+            if(((html_offset + tree->global_offset) - token_node->str.length) != 6) {
                 myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_SCRIPT_DATA_ESCAPED;
                 html_offset++;
                 break;
@@ -297,10 +297,10 @@ size_t myhtml_tokenizer_state_script_data_escaped_end_tag_name(myhtml_tree_t* tr
                     return 0;
                 }
                 
-                token_node->raw_begin   = tmp_size;
-                token_node->raw_length  = 6;
+                token_node->raw_begin = tmp_size;
+                token_node->raw_length = 6;
                 token_node->tag_id = MyHTML_TAG_SCRIPT;
-                token_node->type        = MyHTML_TOKEN_TYPE_CLOSE;
+                token_node->type = MyHTML_TOKEN_TYPE_CLOSE;
                 
                 myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_BEFORE_ATTRIBUTE_NAME;
             }
@@ -313,7 +313,7 @@ size_t myhtml_tokenizer_state_script_data_escaped_end_tag_name(myhtml_tree_t* tr
         }
         else if(html[html_offset] == '/')
         {
-            if((html_offset - token_node->str.length) != 6) {
+            if(((html_offset + tree->global_offset) - token_node->str.length) != 6) {
                 myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_SCRIPT_DATA_ESCAPED;
                 html_offset++;
                 break;
@@ -330,10 +330,10 @@ size_t myhtml_tokenizer_state_script_data_escaped_end_tag_name(myhtml_tree_t* tr
                     return 0;
                 }
                 
-                token_node->raw_begin   = tmp_size;
-                token_node->raw_length  = 6;
+                token_node->raw_begin = tmp_size;
+                token_node->raw_length = 6;
                 token_node->tag_id = MyHTML_TAG_SCRIPT;
-                token_node->type        = MyHTML_TOKEN_TYPE_CLOSE|MyHTML_TOKEN_TYPE_CLOSE_SELF;
+                token_node->type = MyHTML_TOKEN_TYPE_CLOSE|MyHTML_TOKEN_TYPE_CLOSE_SELF;
                 
                 myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_BEFORE_ATTRIBUTE_NAME;
             }
@@ -346,7 +346,7 @@ size_t myhtml_tokenizer_state_script_data_escaped_end_tag_name(myhtml_tree_t* tr
         }
         else if(html[html_offset] == '>')
         {
-            if((html_offset - token_node->str.length) != 6) {
+            if(((html_offset + tree->global_offset) - token_node->str.length) != 6) {
                 myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_SCRIPT_DATA_ESCAPED;
                 html_offset++;
                 break;
@@ -363,10 +363,10 @@ size_t myhtml_tokenizer_state_script_data_escaped_end_tag_name(myhtml_tree_t* tr
                     return 0;
                 }
                 
-                token_node->raw_begin   = tmp_size;
-                token_node->raw_length  = 6;
+                token_node->raw_begin = tmp_size;
+                token_node->raw_length = 6;
                 token_node->tag_id = MyHTML_TAG_SCRIPT;
-                token_node->type        = MyHTML_TOKEN_TYPE_CLOSE;
+                token_node->type = MyHTML_TOKEN_TYPE_CLOSE;
                 
                 myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_DATA;
                 
@@ -446,7 +446,7 @@ size_t myhtml_tokenizer_state_script_data_double_escape_start(myhtml_tree_t* tre
     {
         if(myhtml_whithspace(html[html_offset], ==, ||) || html[html_offset] == '/' || html[html_offset] == '>')
         {
-            if((html_offset - token_node->str.length) != 6) {
+            if(((html_offset + tree->global_offset) - token_node->str.length) != 6) {
                 myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_SCRIPT_DATA_ESCAPED;
                 html_offset++;
                 break;
@@ -564,7 +564,7 @@ size_t myhtml_tokenizer_state_script_data_double_escape_end(myhtml_tree_t* tree,
     {
         if(myhtml_whithspace(html[html_offset], ==, ||) || html[html_offset] == '/' || html[html_offset] == '>')
         {
-            if((html_offset - token_node->str.length) != 6) {
+            if(((html_offset + tree->global_offset) - token_node->str.length) != 6) {
                 myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_SCRIPT_DATA_DOUBLE_ESCAPED;
                 html_offset++;
                 break;
@@ -594,7 +594,3 @@ size_t myhtml_tokenizer_state_script_data_double_escape_end(myhtml_tree_t* tree,
     
     return html_offset;
 }
-
-
-
-

Original file line number	Diff line number	Diff line change
`@@ -104,7 +104,7 @@ size_t myhtml_tokenizer_state_script_data_end_tag_name(myhtml_tree_t* tree, myht`
`104`	`104`	`{`
`105`	`105`	`if(myhtml_whithspace(html[html_offset], ==, \|\|))`
`106`	`106`	`{`
`107`		`- if((html_offset - token_node->str.length) != 6) {`
	`107`	`+ if(((html_offset + tree->global_offset) - token_node->str.length) != 6) {`
`108`	`108`	`myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_SCRIPT_DATA;`
`109`	`109`	`html_offset++;`
`110`	`110`	`break;`
`@@ -121,10 +121,10 @@ size_t myhtml_tokenizer_state_script_data_end_tag_name(myhtml_tree_t* tree, myht`
`121`	`121`	`return 0;`
`122`	`122`	`}`
`123`	`123`
`124`		`- token_node->raw_begin = tmp_size;`
`125`		`- token_node->raw_length = 6;`
`126`		`- token_node->tag_id = MyHTML_TAG_SCRIPT;`
`127`		`- token_node->type = MyHTML_TOKEN_TYPE_CLOSE;`
	`124`	`+ token_node->raw_begin = tmp_size;`
	`125`	`+ token_node->raw_length = 6;`
	`126`	`+ token_node->tag_id = MyHTML_TAG_SCRIPT;`
	`127`	`+ token_node->type = MyHTML_TOKEN_TYPE_CLOSE;`
`128`	`128`
`129`	`129`	`myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_BEFORE_ATTRIBUTE_NAME;`
`130`	`130`	`}`
`@@ -137,7 +137,7 @@ size_t myhtml_tokenizer_state_script_data_end_tag_name(myhtml_tree_t* tree, myht`
`137`	`137`	`}`
`138`	`138`	`else if(html[html_offset] == '/')`
`139`	`139`	`{`
`140`		`- if((html_offset - token_node->str.length) != 6) {`
	`140`	`+ if(((html_offset + tree->global_offset) - token_node->str.length) != 6) {`
`141`	`141`	`myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_SCRIPT_DATA;`
`142`	`142`	`html_offset++;`
`143`	`143`	`break;`
`@@ -154,10 +154,10 @@ size_t myhtml_tokenizer_state_script_data_end_tag_name(myhtml_tree_t* tree, myht`
`154`	`154`	`return 0;`
`155`	`155`	`}`
`156`	`156`
`157`		`- token_node->raw_begin = tmp_size;`
`158`		`- token_node->raw_length = 6;`
	`157`	`+ token_node->raw_begin = tmp_size;`
	`158`	`+ token_node->raw_length = 6;`
`159`	`159`	`token_node->tag_id = MyHTML_TAG_SCRIPT;`
`160`		`- token_node->type = MyHTML_TOKEN_TYPE_CLOSE\|MyHTML_TOKEN_TYPE_CLOSE_SELF;`
	`160`	`+ token_node->type = MyHTML_TOKEN_TYPE_CLOSE\|MyHTML_TOKEN_TYPE_CLOSE_SELF;`
`161`	`161`
`162`	`162`	`myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_BEFORE_ATTRIBUTE_NAME;`
`163`	`163`	`}`
`@@ -170,7 +170,7 @@ size_t myhtml_tokenizer_state_script_data_end_tag_name(myhtml_tree_t* tree, myht`
`170`	`170`	`}`
`171`	`171`	`else if(html[html_offset] == '>')`
`172`	`172`	`{`
`173`		`- if((html_offset - token_node->str.length) != 6) {`
	`173`	`+ if(((html_offset + tree->global_offset) - token_node->str.length) != 6) {`
`174`	`174`	`myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_SCRIPT_DATA;`
`175`	`175`	`html_offset++;`
`176`	`176`	`break;`
`@@ -187,10 +187,10 @@ size_t myhtml_tokenizer_state_script_data_end_tag_name(myhtml_tree_t* tree, myht`
`187`	`187`	`return 0;`
`188`	`188`	`}`
`189`	`189`
`190`		`- token_node->raw_begin = tmp_size;`
`191`		`- token_node->raw_length = 6;`
	`190`	`+ token_node->raw_begin = tmp_size;`
	`191`	`+ token_node->raw_length = 6;`
`192`	`192`	`token_node->tag_id = MyHTML_TAG_SCRIPT;`
`193`		`- token_node->type = MyHTML_TOKEN_TYPE_CLOSE;`
	`193`	`+ token_node->type = MyHTML_TOKEN_TYPE_CLOSE;`
`194`	`194`
`195`	`195`	`html_offset++;`
`196`	`196`
`@@ -280,7 +280,7 @@ size_t myhtml_tokenizer_state_script_data_escaped_end_tag_name(myhtml_tree_t* tr`
`280`	`280`	`{`
`281`	`281`	`if(myhtml_whithspace(html[html_offset], ==, \|\|))`
`282`	`282`	`{`
`283`		`- if((html_offset - token_node->str.length) != 6) {`
	`283`	`+ if(((html_offset + tree->global_offset) - token_node->str.length) != 6) {`
`284`	`284`	`myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_SCRIPT_DATA_ESCAPED;`
`285`	`285`	`html_offset++;`
`286`	`286`	`break;`
`@@ -297,10 +297,10 @@ size_t myhtml_tokenizer_state_script_data_escaped_end_tag_name(myhtml_tree_t* tr`
`297`	`297`	`return 0;`
`298`	`298`	`}`
`299`	`299`
`300`		`- token_node->raw_begin = tmp_size;`
`301`		`- token_node->raw_length = 6;`
	`300`	`+ token_node->raw_begin = tmp_size;`
	`301`	`+ token_node->raw_length = 6;`
`302`	`302`	`token_node->tag_id = MyHTML_TAG_SCRIPT;`
`303`		`- token_node->type = MyHTML_TOKEN_TYPE_CLOSE;`
	`303`	`+ token_node->type = MyHTML_TOKEN_TYPE_CLOSE;`
`304`	`304`
`305`	`305`	`myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_BEFORE_ATTRIBUTE_NAME;`
`306`	`306`	`}`
`@@ -313,7 +313,7 @@ size_t myhtml_tokenizer_state_script_data_escaped_end_tag_name(myhtml_tree_t* tr`
`313`	`313`	`}`
`314`	`314`	`else if(html[html_offset] == '/')`
`315`	`315`	`{`
`316`		`- if((html_offset - token_node->str.length) != 6) {`
	`316`	`+ if(((html_offset + tree->global_offset) - token_node->str.length) != 6) {`
`317`	`317`	`myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_SCRIPT_DATA_ESCAPED;`
`318`	`318`	`html_offset++;`
`319`	`319`	`break;`
`@@ -330,10 +330,10 @@ size_t myhtml_tokenizer_state_script_data_escaped_end_tag_name(myhtml_tree_t* tr`
`330`	`330`	`return 0;`
`331`	`331`	`}`
`332`	`332`
`333`		`- token_node->raw_begin = tmp_size;`
`334`		`- token_node->raw_length = 6;`
	`333`	`+ token_node->raw_begin = tmp_size;`
	`334`	`+ token_node->raw_length = 6;`
`335`	`335`	`token_node->tag_id = MyHTML_TAG_SCRIPT;`
`336`		`- token_node->type = MyHTML_TOKEN_TYPE_CLOSE\|MyHTML_TOKEN_TYPE_CLOSE_SELF;`
	`336`	`+ token_node->type = MyHTML_TOKEN_TYPE_CLOSE\|MyHTML_TOKEN_TYPE_CLOSE_SELF;`
`337`	`337`
`338`	`338`	`myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_BEFORE_ATTRIBUTE_NAME;`
`339`	`339`	`}`
`@@ -346,7 +346,7 @@ size_t myhtml_tokenizer_state_script_data_escaped_end_tag_name(myhtml_tree_t* tr`
`346`	`346`	`}`
`347`	`347`	`else if(html[html_offset] == '>')`
`348`	`348`	`{`
`349`		`- if((html_offset - token_node->str.length) != 6) {`
	`349`	`+ if(((html_offset + tree->global_offset) - token_node->str.length) != 6) {`
`350`	`350`	`myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_SCRIPT_DATA_ESCAPED;`
`351`	`351`	`html_offset++;`
`352`	`352`	`break;`
`@@ -363,10 +363,10 @@ size_t myhtml_tokenizer_state_script_data_escaped_end_tag_name(myhtml_tree_t* tr`
`363`	`363`	`return 0;`
`364`	`364`	`}`
`365`	`365`
`366`		`- token_node->raw_begin = tmp_size;`
`367`		`- token_node->raw_length = 6;`
	`366`	`+ token_node->raw_begin = tmp_size;`
	`367`	`+ token_node->raw_length = 6;`
`368`	`368`	`token_node->tag_id = MyHTML_TAG_SCRIPT;`
`369`		`- token_node->type = MyHTML_TOKEN_TYPE_CLOSE;`
	`369`	`+ token_node->type = MyHTML_TOKEN_TYPE_CLOSE;`
`370`	`370`
`371`	`371`	`myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_DATA;`
`372`	`372`
`@@ -446,7 +446,7 @@ size_t myhtml_tokenizer_state_script_data_double_escape_start(myhtml_tree_t* tre`
`446`	`446`	`{`
`447`	`447`	`if(myhtml_whithspace(html[html_offset], ==, \|\|) \|\| html[html_offset] == '/' \|\| html[html_offset] == '>')`
`448`	`448`	`{`
`449`		`- if((html_offset - token_node->str.length) != 6) {`
	`449`	`+ if(((html_offset + tree->global_offset) - token_node->str.length) != 6) {`
`450`	`450`	`myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_SCRIPT_DATA_ESCAPED;`
`451`	`451`	`html_offset++;`
`452`	`452`	`break;`
`@@ -564,7 +564,7 @@ size_t myhtml_tokenizer_state_script_data_double_escape_end(myhtml_tree_t* tree,`
`564`	`564`	`{`
`565`	`565`	`if(myhtml_whithspace(html[html_offset], ==, \|\|) \|\| html[html_offset] == '/' \|\| html[html_offset] == '>')`
`566`	`566`	`{`
`567`		`- if((html_offset - token_node->str.length) != 6) {`
	`567`	`+ if(((html_offset + tree->global_offset) - token_node->str.length) != 6) {`
`568`	`568`	`myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_SCRIPT_DATA_DOUBLE_ESCAPED;`
`569`	`569`	`html_offset++;`
`570`	`570`	`break;`
`@@ -594,7 +594,3 @@ size_t myhtml_tokenizer_state_script_data_double_escape_end(myhtml_tree_t* tree,`
`594`	`594`
`595`	`595`	`return html_offset;`
`596`	`596`	`}`
`597`		`-`
`598`		`-`
`599`		`-`
`600`		`-`