Skip to content

Commit 5e6d270

Browse files
authored
Merge pull request #1070 from lightpanda-io/dump_strip_mode
Replace --noscript with more advanced --strip_mode
2 parents e6b9be5 + 2ddcc6d commit 5e6d270

File tree

3 files changed

+102
-13
lines changed

3 files changed

+102
-13
lines changed

src/browser/dump.zig

Lines changed: 52 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,13 @@ pub const Opts = struct {
2626
// set to include element shadowroots in the dump
2727
page: ?*const Page = null,
2828

29-
exclude_scripts: bool = false,
29+
strip_mode: StripMode = .{},
30+
31+
pub const StripMode = struct {
32+
js: bool = false,
33+
ui: bool = false,
34+
css: bool = false,
35+
};
3036
};
3137

3238
// writer must be a std.io.Writer
@@ -67,7 +73,7 @@ pub fn writeNode(node: *parser.Node, opts: Opts, writer: *std.Io.Writer) anyerro
6773
.element => {
6874
// open the tag
6975
const tag_type = try parser.nodeHTMLGetTagType(node) orelse .undef;
70-
if (opts.exclude_scripts and try isScriptOrRelated(tag_type, node)) {
76+
if (try isStripped(tag_type, node, opts.strip_mode)) {
7177
return;
7278
}
7379

@@ -159,9 +165,22 @@ pub fn writeChildren(root: *parser.Node, opts: Opts, writer: *std.Io.Writer) !vo
159165
}
160166
}
161167

162-
// When `exclude_scripts` is passed to dump, we don't include <script> tags.
163-
// We also want to omit <link rel=preload as=ascript>
164-
fn isScriptOrRelated(tag_type: parser.Tag, node: *parser.Node) !bool {
168+
fn isStripped(tag_type: parser.Tag, node: *parser.Node, strip_mode: Opts.StripMode) !bool {
169+
if (strip_mode.js and try isJsRelated(tag_type, node)) {
170+
return true;
171+
}
172+
173+
if (strip_mode.css and try isCssRelated(tag_type, node)) {
174+
return true;
175+
}
176+
177+
if (strip_mode.ui and try isUIRelated(tag_type, node)) {
178+
return true;
179+
}
180+
return false;
181+
}
182+
183+
fn isJsRelated(tag_type: parser.Tag, node: *parser.Node) !bool {
165184
if (tag_type == .script) {
166185
return true;
167186
}
@@ -178,6 +197,34 @@ fn isScriptOrRelated(tag_type: parser.Tag, node: *parser.Node) !bool {
178197
return false;
179198
}
180199

200+
fn isCssRelated(tag_type: parser.Tag, node: *parser.Node) !bool {
201+
if (tag_type == .style) {
202+
return true;
203+
}
204+
if (tag_type == .link) {
205+
const el = parser.nodeToElement(node);
206+
const rel = try parser.elementGetAttribute(el, "rel") orelse return false;
207+
return std.ascii.eqlIgnoreCase(rel, "stylesheet");
208+
}
209+
return false;
210+
}
211+
212+
fn isUIRelated(tag_type: parser.Tag, node: *parser.Node) !bool {
213+
if (try isCssRelated(tag_type, node)) {
214+
return true;
215+
}
216+
if (tag_type == .img or tag_type == .picture or tag_type == .video) {
217+
return true;
218+
}
219+
if (tag_type == .undef) {
220+
const name = try parser.nodeLocalName(node);
221+
if (std.mem.eql(u8, name, "svg")) {
222+
return true;
223+
}
224+
}
225+
return false;
226+
}
227+
181228
// area, base, br, col, embed, hr, img, input, link, meta, source, track, wbr
182229
// https://html.spec.whatwg.org/#void-elements
183230
fn isVoid(elem: *parser.Element) !bool {

src/browser/page.zig

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ pub const Page = struct {
181181
// set to include element shadowroots in the dump
182182
page: ?*const Page = null,
183183
with_base: bool = false,
184-
exclude_scripts: bool = false,
184+
strip_mode: Dump.Opts.StripMode = .{},
185185
};
186186

187187
// dump writes the page content into the given file.
@@ -228,7 +228,7 @@ pub const Page = struct {
228228

229229
try Dump.writeHTML(doc, .{
230230
.page = opts.page,
231-
.exclude_scripts = opts.exclude_scripts,
231+
.strip_mode = opts.strip_mode,
232232
}, out);
233233
}
234234

src/main.zig

Lines changed: 48 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ const log = @import("log.zig");
2424
const App = @import("app.zig").App;
2525
const Server = @import("server.zig").Server;
2626
const Browser = @import("browser/browser.zig").Browser;
27+
const DumpStripMode = @import("browser/dump.zig").Opts.StripMode;
2728

2829
const build_config = @import("build_config");
2930

@@ -184,7 +185,7 @@ fn run(alloc: Allocator) !void {
184185
try page.dump(.{
185186
.page = page,
186187
.with_base = opts.withbase,
187-
.exclude_scripts = opts.noscript,
188+
.strip_mode = opts.strip_mode,
188189
}, &writer.interface);
189190
try writer.interface.flush();
190191
}
@@ -292,8 +293,8 @@ const Command = struct {
292293
url: []const u8,
293294
dump: bool = false,
294295
common: Common,
295-
noscript: bool = false,
296296
withbase: bool = false,
297+
strip_mode: DumpStripMode = .{},
297298
};
298299

299300
const Common = struct {
@@ -372,7 +373,14 @@ const Command = struct {
372373
\\Options:
373374
\\--dump Dumps document to stdout.
374375
\\ Defaults to false.
375-
\\--noscript Exclude <script> tags in dump. Defaults to false.
376+
\\
377+
\\--strip_mode Comma separated list of tag groups to remove from dump
378+
\\ the dump. e.g. --strip_mode js,css
379+
\\ - "js" script and link[as=script, rel=preload]
380+
\\ - "ui" includes img, picture, video, css and svg
381+
\\ - "css" includes style and link[rel=stylesheet]
382+
\\ - "full" includes js, ui and css
383+
\\
376384
\\--with_base Add a <base> tag in dump. Defaults to false.
377385
\\
378386
++ common_options ++
@@ -460,6 +468,10 @@ fn inferMode(opt: []const u8) ?App.RunMode {
460468
return .fetch;
461469
}
462470

471+
if (std.mem.eql(u8, opt, "--strip_mode")) {
472+
return .fetch;
473+
}
474+
463475
if (std.mem.eql(u8, opt, "--with_base")) {
464476
return .fetch;
465477
}
@@ -545,10 +557,10 @@ fn parseFetchArgs(
545557
args: *std.process.ArgIterator,
546558
) !Command.Fetch {
547559
var dump: bool = false;
548-
var noscript: bool = false;
549560
var withbase: bool = false;
550561
var url: ?[]const u8 = null;
551562
var common: Command.Common = .{};
563+
var strip_mode: DumpStripMode = .{};
552564

553565
while (args.next()) |opt| {
554566
if (std.mem.eql(u8, "--dump", opt)) {
@@ -557,7 +569,11 @@ fn parseFetchArgs(
557569
}
558570

559571
if (std.mem.eql(u8, "--noscript", opt)) {
560-
noscript = true;
572+
log.warn(.app, "deprecation warning", .{
573+
.feature = "--noscript argument",
574+
.hint = "use '--strip_mode js' instead",
575+
});
576+
strip_mode.js = true;
561577
continue;
562578
}
563579

@@ -566,6 +582,32 @@ fn parseFetchArgs(
566582
continue;
567583
}
568584

585+
if (std.mem.eql(u8, "--strip_mode", opt)) {
586+
const str = args.next() orelse {
587+
log.fatal(.app, "missing argument value", .{ .arg = "--strip_mode" });
588+
return error.InvalidArgument;
589+
};
590+
591+
var it = std.mem.splitScalar(u8, str, ',');
592+
while (it.next()) |part| {
593+
const trimmed = std.mem.trim(u8, part, &std.ascii.whitespace);
594+
if (std.mem.eql(u8, trimmed, "js")) {
595+
strip_mode.js = true;
596+
} else if (std.mem.eql(u8, trimmed, "ui")) {
597+
strip_mode.ui = true;
598+
} else if (std.mem.eql(u8, trimmed, "css")) {
599+
strip_mode.css = true;
600+
} else if (std.mem.eql(u8, trimmed, "full")) {
601+
strip_mode.js = true;
602+
strip_mode.ui = true;
603+
strip_mode.css = true;
604+
} else {
605+
log.fatal(.app, "invalid option choice", .{ .arg = "--strip_mode", .value = trimmed });
606+
}
607+
}
608+
continue;
609+
}
610+
569611
if (try parseCommonArg(allocator, opt, args, &common)) {
570612
continue;
571613
}
@@ -591,8 +633,8 @@ fn parseFetchArgs(
591633
.url = url.?,
592634
.dump = dump,
593635
.common = common,
594-
.noscript = noscript,
595636
.withbase = withbase,
637+
.strip_mode = strip_mode,
596638
};
597639
}
598640

0 commit comments

Comments
 (0)