Skip to content

Commit 6b55c30

Browse files
committed
feat: progress grid
1 parent d4a5495 commit 6b55c30

File tree

12 files changed

+574
-25
lines changed

12 files changed

+574
-25
lines changed

plugins/plugin-client-default/notebooks/dashboard.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@ layout:
1111
3:
1212
position: default
1313
maximized: true
14+
4:
15+
position: default
16+
maximized: true
17+
inverseColors: true
1418
---
1519

1620
--8<-- "./dashboard-summary.md"
@@ -62,3 +66,14 @@ layout:
6266
---
6367
chart all "${LOGDIR}"
6468
```
69+
70+
---
71+
72+
=== "Events"
73+
```shell
74+
---
75+
execute: now
76+
outputOnly: true
77+
---
78+
chart progress "${LOGDIR}"
79+
```
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
/*
2+
* Copyright 2022 The Kubernetes Authors
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
import React from "react"
18+
import { Tooltip } from "@kui-shell/plugin-client-common"
19+
20+
import Event from "../controller/events/Event"
21+
22+
import "@kui-shell/plugin-client-common/web/scss/components/Table/_index.scss"
23+
import "@kui-shell/plugin-client-common/web/scss/components/Table/Grid/_index.scss"
24+
import "../../web/scss/components/Dashboard/Grid.scss"
25+
26+
interface Props {
27+
events: Event<string, unknown>[]
28+
}
29+
30+
export default class Grid extends React.PureComponent<Props> {
31+
private tooltipContent(event: Event<string, unknown>) {
32+
const title = event.name
33+
const subtitle = event.subtitle || event.type
34+
const status = event.state
35+
const showMoreDetail = event.message || " "
36+
37+
return `### ${title}
38+
#### ${subtitle}
39+
40+
${status ? "Status: " + status : ""}
41+
42+
\`${showMoreDetail}\``
43+
}
44+
45+
private readonly cell = (event: Event<string, unknown>, idx: number) => {
46+
return (
47+
<Tooltip key={idx} markdown={this.tooltipContent(event)}>
48+
<span className="kui--grid-cell" data-tag="badge" data-type={event.type} data-state={event.state}>
49+
<span data-tag="badge-circle"></span>
50+
</span>
51+
</Tooltip>
52+
)
53+
}
54+
55+
public render() {
56+
return (
57+
<div className="kui--data-table-wrapper kui--data-table-as-grid">
58+
<div className="kui--table-like-wrapper kui--data-table-as-grid">{this.props.events.map(this.cell)}</div>
59+
</div>
60+
)
61+
}
62+
}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
/*
2+
* Copyright 2022 The Kubernetes Authors
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
export type State = "InProgress" | "Done" | "Error"
18+
type Event<T extends string, Detail> = Detail & {
19+
name: string
20+
subtitle?: string
21+
message: string
22+
type: T
23+
state: State
24+
timestamp: number
25+
hidden?: boolean
26+
}
27+
28+
export default Event
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
/*
2+
* Copyright 2022 The Kubernetes Authors
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
import React from "react"
18+
import { join } from "path"
19+
import stripAnsi from "strip-ansi"
20+
import { Arguments } from "@kui-shell/core"
21+
22+
import kubeEvents from "./kube"
23+
import torchEvents from "./torch"
24+
25+
import { expand } from "../../lib/util"
26+
import Grid from "../../components/Grid"
27+
28+
async function eventsUI(filepath: string, REPL: Arguments["REPL"]) {
29+
const [kube, logs] = await Promise.all([
30+
REPL.qexec<string>(`vfs fslice ${join(expand(filepath), "events/kubernetes.txt")} 0`).then(stripAnsi),
31+
REPL.qexec<string>(`vfs fslice ${join(expand(filepath), "logs/job.txt")} 0`).then(stripAnsi),
32+
])
33+
34+
const events = [...kubeEvents(kube), ...torchEvents(logs)]
35+
.filter((_) => !_.hidden)
36+
.sort((a, b) => a.timestamp - b.timestamp)
37+
return <Grid events={events} />
38+
}
39+
40+
export default async function eventsCmd(args: Arguments) {
41+
const filepath = args.argvNoOptions[2]
42+
if (!filepath) {
43+
throw new Error(`Usage chart progress ${filepath}`)
44+
}
45+
46+
return {
47+
react: await eventsUI(expand(filepath), args.REPL),
48+
}
49+
}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
/*
2+
* Copyright 2022 The Kubernetes Authors
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
import { Registrar } from "@kui-shell/core"
18+
19+
/** Register Kui Commands for rendering dashboard event UIs */
20+
export default function registerEventCommands(registrar: Registrar) {
21+
registrar.listen("/chart/progress", (args) => import("./Events").then((_) => _.default(args)), { needsUI: true })
22+
}
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
/*
2+
* Copyright 2022 The Kubernetes Authors
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
import Event from "./Event"
18+
19+
type EventType = "Pulling" | "Pulled"
20+
type KubeEvent = Event<EventType, { node: string }>
21+
22+
function findPrevious(M: KubeEvent[], node: KubeEvent["node"], type: EventType) {
23+
for (let idx = M.length - 1; idx >= 0; idx--) {
24+
const evt = M[idx]
25+
if (evt.type === type && evt.node === node) {
26+
return evt
27+
}
28+
}
29+
}
30+
31+
function collateEvent(M: KubeEvent[], line: string) {
32+
const pullMatch = line.match(/(Pulling|Pulled)\s+(\S+)\s+(.+)$/)
33+
if (pullMatch) {
34+
const type = pullMatch[1] as EventType
35+
const node = pullMatch[2]
36+
const message = pullMatch[3]
37+
const state = type === "Pulling" ? "InProgress" : "Done"
38+
39+
if (type === "Pulled") {
40+
const pulling = findPrevious(M, node, "Pulling")
41+
if (pulling) {
42+
pulling.state = "Done"
43+
}
44+
} else {
45+
M.push({
46+
name: "Pulling base image",
47+
subtitle: node,
48+
node,
49+
type,
50+
message,
51+
state,
52+
timestamp: -1,
53+
})
54+
}
55+
}
56+
57+
return M
58+
}
59+
60+
/** @return lifecycle events from Kubernetes */
61+
export default function kubeEvents(kubeEvents: string): KubeEvent[] {
62+
return kubeEvents.split(/\n/).reduce(collateEvent, [] as KubeEvent[])
63+
}
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
/*
2+
* Copyright 2022 The Kubernetes Authors
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
import Event from "./Event"
18+
19+
type EventType = "Epoch" | "Iteration" | "Marker"
20+
type Detail = { epoch: number; step: number; nSteps: number; ip: string }
21+
type TorchEvent = Event<EventType, Detail>
22+
23+
function findPrevious(M: TorchEvent[], ip: TorchEvent["ip"], type: EventType) {
24+
for (let idx = M.length - 1; idx >= 0; idx--) {
25+
const evt = M[idx]
26+
if (evt.type === type && evt.ip === ip) {
27+
return evt
28+
}
29+
}
30+
}
31+
32+
function findEpoch(M: TorchEvent[], ip: TorchEvent["ip"]) {
33+
const evt = findPrevious(M, ip, "Epoch")
34+
return evt ? evt.step : -1
35+
}
36+
37+
function collateEvent(M: TorchEvent[], line: string) {
38+
const startMatch = line.match(/ip=([\d.]+)\)\s+(\d+\/\d+\/\d+\s+\d+:\d+:\d+)\s+.+\*\*\*\*\* Running training/)
39+
if (startMatch) {
40+
const ip = startMatch[1]
41+
const type = "Marker"
42+
const name = type
43+
const message = type
44+
const hidden = true
45+
const timestamp = new Date(startMatch[2]).getTime()
46+
const epoch = -1
47+
const step = -1
48+
const nSteps = -1
49+
const state = "InProgress"
50+
M.push({ ip, name, message, state, type, hidden, timestamp, epoch, step, nSteps })
51+
return M
52+
}
53+
54+
const match = line.match(/ip=([\d.]+)\)\s+(Epoch|Iteration):\s+(\d+)%\|[^|]+\|\s(\d+)\/(\d+)/)
55+
if (match) {
56+
const ip = match[1]
57+
const type = match[2] as EventType
58+
// const percentage = parseInt(match[3], 10)
59+
const step = parseInt(match[4], 10)
60+
const nSteps = parseInt(match[5], 10)
61+
62+
const epoch = type === "Epoch" ? step : findEpoch(M, ip)
63+
const timestampMarker = findPrevious(M, ip, "Marker")
64+
65+
const event = {
66+
name: `Torch Training on ${ip}`,
67+
message: `Epoch ${epoch}${type !== "Epoch" ? ` - ${type} ${step}` : ""} of ${nSteps}`,
68+
ip,
69+
type,
70+
step,
71+
nSteps,
72+
epoch,
73+
timestamp: timestampMarker ? timestampMarker.timestamp : Date.now(),
74+
state: "InProgress" as const,
75+
}
76+
77+
// find previous by ip and mark it Done
78+
const prev = findPrevious(M, ip, type)
79+
if (prev) {
80+
prev.state = "Done"
81+
82+
if (type === "Epoch" && prev.step === step) {
83+
// strange, torch seems to repeat the e.g. Epoch 6/6 event...
84+
return M
85+
}
86+
}
87+
88+
M.push(event)
89+
}
90+
91+
return M
92+
}
93+
94+
function sortFn(a: TorchEvent, b: TorchEvent) {
95+
return a.ip.localeCompare(b.ip) || a.epoch - b.epoch || a.step - b.step || a.type.localeCompare(b.type)
96+
}
97+
98+
/** @return lifecycle events (Epoch, Iteration) for Torch training */
99+
export default function torchEvents(jobLogs: string): TorchEvent[] {
100+
return jobLogs
101+
.split(/\n/)
102+
.reduce(collateEvent, [] as TorchEvent[])
103+
.sort(sortFn)
104+
}

0 commit comments

Comments
 (0)