Skip to content

Commit bb70c04

Browse files
committed
new PI planning prep for problem table
1 parent 41877af commit bb70c04

File tree

2 files changed

+355
-0
lines changed

2 files changed

+355
-0
lines changed
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# Training Data Quality Analyzer for ServiceNow Predictive Intelligence (Problem)
2+
3+
## Overview
4+
This script analyzes the quality of problem data in ServiceNow to determine readiness for Predictive Intelligence (PI) model training. It provides detailed statistics and quality metrics to help ServiceNow developers and admins identify and address data issues before starting ML training jobs.
5+
6+
## Purpose
7+
- Assess completeness and quality of key fields in problem records
8+
- Identify common data issues that could impact PI model performance
9+
- Provide actionable insights for improving training data
10+
11+
## Features
12+
- Checks completeness of important fields (e.g., short_description, description, category, assignment_group, close_notes, state)
13+
- Lists all fields and ancestor tables for the problem table
14+
- Outputs results to the ServiceNow system logs
15+
16+
## Setup Requirements
17+
1. **ServiceNow Instance** with Predictive Intelligence plugin enabled
18+
2. **Script Execution Permissions**: Run as a background script or Script Include with access to the `problem` table
19+
3. **No external dependencies**: Uses only standard ServiceNow APIs (GlideRecord, GlideAggregate)
20+
4. **Sufficient Data Volume**: At least 50 closed problems recommended for meaningful analysis
21+
22+
## How It Works
23+
1. **Field Existence Check**: Dynamically verifies that each key field exists on the problem table or its parent tables
24+
2. **Statistics Gathering**: Collects counts for total and filled key fields
25+
3. **Field Listing**: Lists all fields (including inherited fields) and ancestor tables
26+
4. **Log Output**: Prints all results and warnings to the ServiceNow logs for review
27+
28+
## Customization
29+
- Adjust the `keyFields` array in the script to match your organization's data requirements
30+
- Add or remove fields and statistics as needed
31+
32+
## Security & Best Practices
33+
- Do not run in production without review
34+
- Ensure no sensitive data is exposed in logs
35+
- Validate script results in a sub-production environment before using for model training
Lines changed: 320 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,320 @@
1+
(function analyzeProblemDataQuality() {
2+
var config = {
3+
table: 'problem',
4+
keyFields: [
5+
'short_description',
6+
'description',
7+
'category',
8+
'assignment_group',
9+
'close_notes',
10+
'state'
11+
],
12+
thresholds: {
13+
minDescriptionLength: 20,
14+
minCloseNotesLength: 50,
15+
minResolutionTime: 5,
16+
maxAge: 365,
17+
targetCompleteness: 80
18+
},
19+
states: {
20+
// Try multiple possible closed state values
21+
closedStates: ['3', '4', '9', '10'] // Common closed/resolved states
22+
},
23+
sampleSize: 500
24+
};
25+
26+
gs.info('========================================');
27+
gs.info('PI Training Data Quality Analysis (Problem)');
28+
gs.info('========================================');
29+
30+
// identify what closed states we have
31+
var actualClosedStates = identifyClosedStates();
32+
if (actualClosedStates.length === 0) {
33+
gs.warn('⚠️ No closed states identified. Using all records for analysis.');
34+
config.useAllRecords = true;
35+
} else {
36+
gs.info('Using closed states: ' + actualClosedStates.join(', '));
37+
config.states.closedStates = actualClosedStates;
38+
}
39+
40+
// Get overall statistics
41+
var stats = getOverallStats(config);
42+
gs.info('');
43+
gs.info('=== STEP 1: Overall Statistics ===');
44+
gs.info('Total Problems: ' + stats.total);
45+
gs.info('Closed Problems: ' + stats.closed);
46+
gs.info('Recent 90 Days: ' + stats.recent90);
47+
gs.info('Recent 365 Days: ' + stats.recent365);
48+
gs.info('');
49+
50+
if (stats.closed < 50) {
51+
gs.warn('⚠️ Low number of closed problems - need at least 50 for training');
52+
gs.info('Current: ' + stats.closed);
53+
} else {
54+
gs.info('✅ Sufficient closed problems for training');
55+
}
56+
57+
// Field completeness analysis
58+
gs.info('');
59+
gs.info('=== STEP 2: Field Completeness Analysis ===');
60+
var completeness = analyzeFieldCompleteness(config);
61+
gs.info('Field Completeness Scores:');
62+
for (var field in completeness) {
63+
var pct = completeness[field].percentage;
64+
var icon = pct >= 80 ? '✅' : pct >= 50 ? '⚠️' : '❌';
65+
gs.info(icon + ' ' + field + ': ' + pct.toFixed(1) + '% (' +
66+
completeness[field].filled + '/' + completeness[field].total + ')');
67+
}
68+
69+
// Text quality analysis
70+
gs.info('');
71+
gs.info('=== STEP 3: Text Quality Analysis ===');
72+
var textQuality = analyzeTextQuality(config);
73+
gs.info('Description Quality: Avg ' + textQuality.description.avgLength.toFixed(0) +
74+
' chars, ' + textQuality.description.goodQualityPct.toFixed(1) + '% good quality');
75+
gs.info('Close Notes Quality: Avg ' + textQuality.closeNotes.avgLength.toFixed(0) +
76+
' chars, ' + textQuality.closeNotes.goodQualityPct.toFixed(1) + '% good quality');
77+
78+
// Category distribution
79+
gs.info('');
80+
gs.info('=== STEP 4: Category Distribution ===');
81+
var categoryDist = analyzeCategoryDistribution(config);
82+
gs.info('Categories found: ' + categoryDist.length);
83+
for (var i = 0; i < Math.min(5, categoryDist.length); i++) {
84+
var cat = categoryDist[i];
85+
gs.info(' ' + (cat.category || '(empty)') + ': ' + cat.count);
86+
}
87+
88+
// Overall score
89+
var overallScore = calculateOverallScore(completeness, textQuality, {tooQuickPct: 15});
90+
gs.info('');
91+
gs.info('=== OVERALL QUALITY SCORE ===');
92+
gs.info('Score: ' + overallScore.toFixed(0) + '/100');
93+
94+
gs.info('');
95+
gs.info('========================================');
96+
gs.info('Analysis Complete');
97+
gs.info('========================================');
98+
99+
// Helper functions
100+
function identifyClosedStates() {
101+
var stateGr = new GlideAggregate('problem');
102+
stateGr.groupBy('state');
103+
stateGr.addAggregate('COUNT');
104+
stateGr.query();
105+
106+
var states = [];
107+
var stateInfo = [];
108+
109+
while (stateGr.next()) {
110+
var state = stateGr.getValue('state');
111+
var count = stateGr.getAggregate('COUNT');
112+
stateInfo.push({state: state, count: parseInt(count)});
113+
}
114+
115+
// Look for states that might be "closed" - typically higher numbers with reasonable counts
116+
for (var i = 0; i < stateInfo.length; i++) {
117+
var info = stateInfo[i];
118+
// Include states that are likely closed (3, 4, 9, 10) or have significant counts
119+
if (['3', '4', '9', '10'].indexOf(info.state) >= 0 || info.count > 10) {
120+
states.push(info.state);
121+
gs.info('Including state ' + info.state + ' (' + info.count + ' records)');
122+
}
123+
}
124+
125+
return states;
126+
}
127+
128+
function getOverallStats(config) {
129+
var result = {total: 0, closed: 0, recent90: 0, recent365: 0};
130+
131+
// Total
132+
var totalGr = new GlideAggregate('problem');
133+
totalGr.addAggregate('COUNT');
134+
totalGr.query();
135+
if (totalGr.next()) {
136+
result.total = parseInt(totalGr.getAggregate('COUNT'));
137+
}
138+
139+
// Closed (use identified states or all if none found)
140+
if (!config.useAllRecords && config.states.closedStates.length > 0) {
141+
var closedGr = new GlideAggregate('problem');
142+
closedGr.addQuery('state', 'IN', config.states.closedStates.join(','));
143+
closedGr.addAggregate('COUNT');
144+
closedGr.query();
145+
if (closedGr.next()) {
146+
result.closed = parseInt(closedGr.getAggregate('COUNT'));
147+
}
148+
} else {
149+
result.closed = result.total; // Use all records if no closed states identified
150+
}
151+
152+
// Recent counts - use broader criteria
153+
var recent365Gr = new GlideAggregate('problem');
154+
recent365Gr.addQuery('sys_created_on', '>=', 'javascript:gs.daysAgoStart(365)');
155+
recent365Gr.addAggregate('COUNT');
156+
recent365Gr.query();
157+
if (recent365Gr.next()) {
158+
result.recent365 = parseInt(recent365Gr.getAggregate('COUNT'));
159+
}
160+
161+
var recent90Gr = new GlideAggregate('problem');
162+
recent90Gr.addQuery('sys_created_on', '>=', 'javascript:gs.daysAgoStart(90)');
163+
recent90Gr.addAggregate('COUNT');
164+
recent90Gr.query();
165+
if (recent90Gr.next()) {
166+
result.recent90 = parseInt(recent90Gr.getAggregate('COUNT'));
167+
}
168+
169+
return result;
170+
}
171+
172+
function analyzeFieldCompleteness(config) {
173+
var results = {};
174+
175+
// Get total count - use more inclusive criteria
176+
var totalGr = new GlideAggregate('problem');
177+
if (!config.useAllRecords && config.states.closedStates.length > 0) {
178+
totalGr.addQuery('state', 'IN', config.states.closedStates.join(','));
179+
}
180+
totalGr.addQuery('sys_created_on', '>=', 'javascript:gs.daysAgoStart(' + config.thresholds.maxAge + ')');
181+
totalGr.addAggregate('COUNT');
182+
totalGr.query();
183+
184+
var total = 0;
185+
if (totalGr.next()) {
186+
total = parseInt(totalGr.getAggregate('COUNT'));
187+
}
188+
189+
gs.info('Analyzing ' + total + ' records for field completeness...');
190+
191+
for (var f = 0; f < config.keyFields.length; f++) {
192+
var fieldName = config.keyFields[f];
193+
var testGr = new GlideRecord('problem');
194+
if (!testGr.isValidField(fieldName)) {
195+
gs.warn('Field ' + fieldName + ' not found, skipping');
196+
continue;
197+
}
198+
199+
var filledGr = new GlideAggregate('problem');
200+
if (!config.useAllRecords && config.states.closedStates.length > 0) {
201+
filledGr.addQuery('state', 'IN', config.states.closedStates.join(','));
202+
}
203+
filledGr.addQuery('sys_created_on', '>=', 'javascript:gs.daysAgoStart(' + config.thresholds.maxAge + ')');
204+
filledGr.addQuery(fieldName, '!=', '');
205+
filledGr.addNotNullQuery(fieldName);
206+
filledGr.addAggregate('COUNT');
207+
filledGr.query();
208+
209+
var filled = 0;
210+
if (filledGr.next()) {
211+
filled = parseInt(filledGr.getAggregate('COUNT'));
212+
}
213+
214+
results[fieldName] = {
215+
total: total,
216+
filled: filled,
217+
percentage: total > 0 ? (filled / total * 100) : 0
218+
};
219+
}
220+
221+
return results;
222+
}
223+
224+
function analyzeTextQuality(config) {
225+
var gr = new GlideRecord('problem');
226+
if (!config.useAllRecords && config.states.closedStates.length > 0) {
227+
gr.addQuery('state', 'IN', config.states.closedStates.join(','));
228+
}
229+
gr.addQuery('sys_created_on', '>=', 'javascript:gs.daysAgoStart(' + config.thresholds.maxAge + ')');
230+
gr.setLimit(config.sampleSize);
231+
gr.query();
232+
233+
var descStats = {totalLength: 0, count: 0, tooShort: 0, goodQuality: 0};
234+
var closeNotesStats = {totalLength: 0, count: 0, tooShort: 0, goodQuality: 0};
235+
236+
while (gr.next()) {
237+
// Description analysis
238+
var desc = gr.getValue('description') || '';
239+
if (desc) {
240+
descStats.count++;
241+
descStats.totalLength += desc.length;
242+
if (desc.length < config.thresholds.minDescriptionLength) {
243+
descStats.tooShort++;
244+
} else {
245+
descStats.goodQuality++;
246+
}
247+
}
248+
249+
// Close notes analysis
250+
var closeNotes = gr.getValue('close_notes') || '';
251+
if (closeNotes) {
252+
closeNotesStats.count++;
253+
closeNotesStats.totalLength += closeNotes.length;
254+
if (closeNotes.length < config.thresholds.minCloseNotesLength) {
255+
closeNotesStats.tooShort++;
256+
} else {
257+
closeNotesStats.goodQuality++;
258+
}
259+
}
260+
}
261+
262+
return {
263+
description: {
264+
avgLength: descStats.count > 0 ? descStats.totalLength / descStats.count : 0,
265+
goodQualityPct: descStats.count > 0 ? (descStats.goodQuality / descStats.count * 100) : 0
266+
},
267+
closeNotes: {
268+
avgLength: closeNotesStats.count > 0 ? closeNotesStats.totalLength / closeNotesStats.count : 0,
269+
goodQualityPct: closeNotesStats.count > 0 ? (closeNotesStats.goodQuality / closeNotesStats.count * 100) : 0
270+
}
271+
};
272+
}
273+
274+
function analyzeCategoryDistribution(config) {
275+
var catGr = new GlideAggregate('problem');
276+
if (!config.useAllRecords && config.states.closedStates.length > 0) {
277+
catGr.addQuery('state', 'IN', config.states.closedStates.join(','));
278+
}
279+
catGr.addQuery('sys_created_on', '>=', 'javascript:gs.daysAgoStart(' + config.thresholds.maxAge + ')');
280+
catGr.groupBy('category');
281+
catGr.addAggregate('COUNT');
282+
catGr.query();
283+
284+
var categories = [];
285+
while (catGr.next()) {
286+
categories.push({
287+
category: catGr.getValue('category'),
288+
count: parseInt(catGr.getAggregate('COUNT'))
289+
});
290+
}
291+
292+
categories.sort(function(a, b) { return b.count - a.count; });
293+
return categories;
294+
}
295+
296+
function calculateOverallScore(completeness, textQuality, timeAnalysis) {
297+
var score = 0;
298+
var weights = {completeness: 40, textQuality: 40, timeQuality: 20};
299+
300+
// Completeness score
301+
var compTotal = 0, compCount = 0;
302+
for (var field in completeness) {
303+
compTotal += completeness[field].percentage;
304+
compCount++;
305+
}
306+
var compScore = compCount > 0 ? (compTotal / compCount) : 0;
307+
score += (compScore / 100) * weights.completeness;
308+
309+
// Text quality score
310+
var textScore = (textQuality.description.goodQualityPct + textQuality.closeNotes.goodQualityPct) / 2;
311+
score += (textScore / 100) * weights.textQuality;
312+
313+
// Time quality score
314+
var timeScore = 100 - timeAnalysis.tooQuickPct;
315+
score += (timeScore / 100) * weights.timeQuality;
316+
317+
return score;
318+
}
319+
320+
})();

0 commit comments

Comments
 (0)