From bb70c04ecde43cf50ecd946ec2039985d7b0dc28 Mon Sep 17 00:00:00 2001
From: unknown <chasemillers@outlook.com>
Date: Mon, 27 Oct 2025 07:30:16 -0700
Subject: [PATCH] new PI planning prep for problem table

---
 .../Problem Table/README.md                   |  35 ++
 .../analyze_problem_data_training_quality.js  | 320 ++++++++++++++++++
 2 files changed, 355 insertions(+)
 create mode 100644 Specialized Areas/Predictive Intelligence/Training Data Preparer/Problem Table/README.md
 create mode 100644 Specialized Areas/Predictive Intelligence/Training Data Preparer/Problem Table/analyze_problem_data_training_quality.js

diff --git a/Specialized Areas/Predictive Intelligence/Training Data Preparer/Problem Table/README.md b/Specialized Areas/Predictive Intelligence/Training Data Preparer/Problem Table/README.md
new file mode 100644
index 0000000000..38cc99511b
--- /dev/null
+++ b/Specialized Areas/Predictive Intelligence/Training Data Preparer/Problem Table/README.md	
@@ -0,0 +1,35 @@
+# Training Data Quality Analyzer for ServiceNow Predictive Intelligence (Problem)
+
+## Overview
+This script analyzes the quality of problem data in ServiceNow to determine readiness for Predictive Intelligence (PI) model training. It provides detailed statistics and quality metrics to help ServiceNow developers and admins identify and address data issues before starting ML training jobs.
+
+## Purpose
+- Assess completeness and quality of key fields in problem records
+- Identify common data issues that could impact PI model performance
+- Provide actionable insights for improving training data
+
+## Features
+- Checks completeness of important fields (e.g., short_description, description, category, assignment_group, close_notes, state)
+- Lists all fields and ancestor tables for the problem table
+- Outputs results to the ServiceNow system logs
+
+## Setup Requirements
+1. **ServiceNow Instance** with Predictive Intelligence plugin enabled
+2. **Script Execution Permissions**: Run as a background script or Script Include with access to the `problem` table
+3. **No external dependencies**: Uses only standard ServiceNow APIs (GlideRecord, GlideAggregate)
+4. **Sufficient Data Volume**: At least 50 closed problems recommended for meaningful analysis
+
+## How It Works
+1. **Field Existence Check**: Dynamically verifies that each key field exists on the problem table or its parent tables
+2. **Statistics Gathering**: Collects counts for total and filled key fields
+3. **Field Listing**: Lists all fields (including inherited fields) and ancestor tables
+4. **Log Output**: Prints all results and warnings to the ServiceNow logs for review
+
+## Customization
+- Adjust the `keyFields` array in the script to match your organization's data requirements
+- Add or remove fields and statistics as needed
+
+## Security & Best Practices
+- Do not run in production without review
+- Ensure no sensitive data is exposed in logs
+- Validate script results in a sub-production environment before using for model training
diff --git a/Specialized Areas/Predictive Intelligence/Training Data Preparer/Problem Table/analyze_problem_data_training_quality.js b/Specialized Areas/Predictive Intelligence/Training Data Preparer/Problem Table/analyze_problem_data_training_quality.js
new file mode 100644
index 0000000000..f69743ce46
--- /dev/null
+++ b/Specialized Areas/Predictive Intelligence/Training Data Preparer/Problem Table/analyze_problem_data_training_quality.js	
@@ -0,0 +1,320 @@
+(function analyzeProblemDataQuality() {
+    var config = {
+        table: 'problem',
+        keyFields: [
+            'short_description',
+            'description', 
+            'category',
+            'assignment_group',
+            'close_notes',
+            'state'
+        ],
+        thresholds: {
+            minDescriptionLength: 20,
+            minCloseNotesLength: 50,
+            minResolutionTime: 5,
+            maxAge: 365,
+            targetCompleteness: 80
+        },
+        states: {
+            // Try multiple possible closed state values
+            closedStates: ['3', '4', '9', '10'] // Common closed/resolved states
+        },
+        sampleSize: 500
+    };
+
+    gs.info('========================================');
+    gs.info('PI Training Data Quality Analysis (Problem)');
+    gs.info('========================================');
+
+    // identify what closed states we have
+    var actualClosedStates = identifyClosedStates();
+    if (actualClosedStates.length === 0) {
+        gs.warn('⚠️ No closed states identified. Using all records for analysis.');
+        config.useAllRecords = true;
+    } else {
+        gs.info('Using closed states: ' + actualClosedStates.join(', '));
+        config.states.closedStates = actualClosedStates;
+    }
+
+    // Get overall statistics
+    var stats = getOverallStats(config);
+    gs.info('');
+    gs.info('=== STEP 1: Overall Statistics ===');
+    gs.info('Total Problems: ' + stats.total);
+    gs.info('Closed Problems: ' + stats.closed);
+    gs.info('Recent 90 Days: ' + stats.recent90);
+    gs.info('Recent 365 Days: ' + stats.recent365);
+    gs.info('');
+
+    if (stats.closed < 50) {
+        gs.warn('⚠️ Low number of closed problems - need at least 50 for training');
+        gs.info('Current: ' + stats.closed);
+    } else {
+        gs.info('✅ Sufficient closed problems for training');
+    }
+
+    // Field completeness analysis
+    gs.info('');
+    gs.info('=== STEP 2: Field Completeness Analysis ===');
+    var completeness = analyzeFieldCompleteness(config);
+    gs.info('Field Completeness Scores:');
+    for (var field in completeness) {
+        var pct = completeness[field].percentage;
+        var icon = pct >= 80 ? '✅' : pct >= 50 ? '⚠️' : '❌';
+        gs.info(icon + ' ' + field + ': ' + pct.toFixed(1) + '% (' + 
+                completeness[field].filled + '/' + completeness[field].total + ')');
+    }
+
+    // Text quality analysis
+    gs.info('');
+    gs.info('=== STEP 3: Text Quality Analysis ===');
+    var textQuality = analyzeTextQuality(config);
+    gs.info('Description Quality: Avg ' + textQuality.description.avgLength.toFixed(0) + 
+            ' chars, ' + textQuality.description.goodQualityPct.toFixed(1) + '% good quality');
+    gs.info('Close Notes Quality: Avg ' + textQuality.closeNotes.avgLength.toFixed(0) + 
+            ' chars, ' + textQuality.closeNotes.goodQualityPct.toFixed(1) + '% good quality');
+
+    // Category distribution
+    gs.info('');
+    gs.info('=== STEP 4: Category Distribution ===');
+    var categoryDist = analyzeCategoryDistribution(config);
+    gs.info('Categories found: ' + categoryDist.length);
+    for (var i = 0; i < Math.min(5, categoryDist.length); i++) {
+        var cat = categoryDist[i];
+        gs.info('  ' + (cat.category || '(empty)') + ': ' + cat.count);
+    }
+
+    // Overall score
+    var overallScore = calculateOverallScore(completeness, textQuality, {tooQuickPct: 15});
+    gs.info('');
+    gs.info('=== OVERALL QUALITY SCORE ===');
+    gs.info('Score: ' + overallScore.toFixed(0) + '/100');
+
+    gs.info('');
+    gs.info('========================================');
+    gs.info('Analysis Complete');
+    gs.info('========================================');
+
+    // Helper functions
+    function identifyClosedStates() {
+        var stateGr = new GlideAggregate('problem');
+        stateGr.groupBy('state');
+        stateGr.addAggregate('COUNT');
+        stateGr.query();
+        
+        var states = [];
+        var stateInfo = [];
+        
+        while (stateGr.next()) {
+            var state = stateGr.getValue('state');
+            var count = stateGr.getAggregate('COUNT');
+            stateInfo.push({state: state, count: parseInt(count)});
+        }
+        
+        // Look for states that might be "closed" - typically higher numbers with reasonable counts
+        for (var i = 0; i < stateInfo.length; i++) {
+            var info = stateInfo[i];
+            // Include states that are likely closed (3, 4, 9, 10) or have significant counts
+            if (['3', '4', '9', '10'].indexOf(info.state) >= 0 || info.count > 10) {
+                states.push(info.state);
+                gs.info('Including state ' + info.state + ' (' + info.count + ' records)');
+            }
+        }
+        
+        return states;
+    }
+
+    function getOverallStats(config) {
+        var result = {total: 0, closed: 0, recent90: 0, recent365: 0};
+        
+        // Total
+        var totalGr = new GlideAggregate('problem');
+        totalGr.addAggregate('COUNT');
+        totalGr.query();
+        if (totalGr.next()) {
+            result.total = parseInt(totalGr.getAggregate('COUNT'));
+        }
+        
+        // Closed (use identified states or all if none found)
+        if (!config.useAllRecords && config.states.closedStates.length > 0) {
+            var closedGr = new GlideAggregate('problem');
+            closedGr.addQuery('state', 'IN', config.states.closedStates.join(','));
+            closedGr.addAggregate('COUNT');
+            closedGr.query();
+            if (closedGr.next()) {
+                result.closed = parseInt(closedGr.getAggregate('COUNT'));
+            }
+        } else {
+            result.closed = result.total; // Use all records if no closed states identified
+        }
+        
+        // Recent counts - use broader criteria
+        var recent365Gr = new GlideAggregate('problem');
+        recent365Gr.addQuery('sys_created_on', '>=', 'javascript:gs.daysAgoStart(365)');
+        recent365Gr.addAggregate('COUNT');
+        recent365Gr.query();
+        if (recent365Gr.next()) {
+            result.recent365 = parseInt(recent365Gr.getAggregate('COUNT'));
+        }
+        
+        var recent90Gr = new GlideAggregate('problem');
+        recent90Gr.addQuery('sys_created_on', '>=', 'javascript:gs.daysAgoStart(90)');
+        recent90Gr.addAggregate('COUNT');
+        recent90Gr.query();
+        if (recent90Gr.next()) {
+            result.recent90 = parseInt(recent90Gr.getAggregate('COUNT'));
+        }
+        
+        return result;
+    }
+
+    function analyzeFieldCompleteness(config) {
+        var results = {};
+        
+        // Get total count - use more inclusive criteria
+        var totalGr = new GlideAggregate('problem');
+        if (!config.useAllRecords && config.states.closedStates.length > 0) {
+            totalGr.addQuery('state', 'IN', config.states.closedStates.join(','));
+        }
+        totalGr.addQuery('sys_created_on', '>=', 'javascript:gs.daysAgoStart(' + config.thresholds.maxAge + ')');
+        totalGr.addAggregate('COUNT');
+        totalGr.query();
+        
+        var total = 0;
+        if (totalGr.next()) {
+            total = parseInt(totalGr.getAggregate('COUNT'));
+        }
+        
+        gs.info('Analyzing ' + total + ' records for field completeness...');
+        
+        for (var f = 0; f < config.keyFields.length; f++) {
+            var fieldName = config.keyFields[f];
+            var testGr = new GlideRecord('problem');
+            if (!testGr.isValidField(fieldName)) {
+                gs.warn('Field ' + fieldName + ' not found, skipping');
+                continue;
+            }
+            
+            var filledGr = new GlideAggregate('problem');
+            if (!config.useAllRecords && config.states.closedStates.length > 0) {
+                filledGr.addQuery('state', 'IN', config.states.closedStates.join(','));
+            }
+            filledGr.addQuery('sys_created_on', '>=', 'javascript:gs.daysAgoStart(' + config.thresholds.maxAge + ')');
+            filledGr.addQuery(fieldName, '!=', '');
+            filledGr.addNotNullQuery(fieldName);
+            filledGr.addAggregate('COUNT');
+            filledGr.query();
+            
+            var filled = 0;
+            if (filledGr.next()) {
+                filled = parseInt(filledGr.getAggregate('COUNT'));
+            }
+            
+            results[fieldName] = {
+                total: total,
+                filled: filled,
+                percentage: total > 0 ? (filled / total * 100) : 0
+            };
+        }
+        
+        return results;
+    }
+
+    function analyzeTextQuality(config) {
+        var gr = new GlideRecord('problem');
+        if (!config.useAllRecords && config.states.closedStates.length > 0) {
+            gr.addQuery('state', 'IN', config.states.closedStates.join(','));
+        }
+        gr.addQuery('sys_created_on', '>=', 'javascript:gs.daysAgoStart(' + config.thresholds.maxAge + ')');
+        gr.setLimit(config.sampleSize);
+        gr.query();
+
+        var descStats = {totalLength: 0, count: 0, tooShort: 0, goodQuality: 0};
+        var closeNotesStats = {totalLength: 0, count: 0, tooShort: 0, goodQuality: 0};
+
+        while (gr.next()) {
+            // Description analysis
+            var desc = gr.getValue('description') || '';
+            if (desc) {
+                descStats.count++;
+                descStats.totalLength += desc.length;
+                if (desc.length < config.thresholds.minDescriptionLength) {
+                    descStats.tooShort++;
+                } else {
+                    descStats.goodQuality++;
+                }
+            }
+
+            // Close notes analysis
+            var closeNotes = gr.getValue('close_notes') || '';
+            if (closeNotes) {
+                closeNotesStats.count++;
+                closeNotesStats.totalLength += closeNotes.length;
+                if (closeNotes.length < config.thresholds.minCloseNotesLength) {
+                    closeNotesStats.tooShort++;
+                } else {
+                    closeNotesStats.goodQuality++;
+                }
+            }
+        }
+
+        return {
+            description: {
+                avgLength: descStats.count > 0 ? descStats.totalLength / descStats.count : 0,
+                goodQualityPct: descStats.count > 0 ? (descStats.goodQuality / descStats.count * 100) : 0
+            },
+            closeNotes: {
+                avgLength: closeNotesStats.count > 0 ? closeNotesStats.totalLength / closeNotesStats.count : 0,
+                goodQualityPct: closeNotesStats.count > 0 ? (closeNotesStats.goodQuality / closeNotesStats.count * 100) : 0
+            }
+        };
+    }
+
+    function analyzeCategoryDistribution(config) {
+        var catGr = new GlideAggregate('problem');
+        if (!config.useAllRecords && config.states.closedStates.length > 0) {
+            catGr.addQuery('state', 'IN', config.states.closedStates.join(','));
+        }
+        catGr.addQuery('sys_created_on', '>=', 'javascript:gs.daysAgoStart(' + config.thresholds.maxAge + ')');
+        catGr.groupBy('category');
+        catGr.addAggregate('COUNT');
+        catGr.query();
+
+        var categories = [];
+        while (catGr.next()) {
+            categories.push({
+                category: catGr.getValue('category'),
+                count: parseInt(catGr.getAggregate('COUNT'))
+            });
+        }
+
+        categories.sort(function(a, b) { return b.count - a.count; });
+        return categories;
+    }
+
+    function calculateOverallScore(completeness, textQuality, timeAnalysis) {
+        var score = 0;
+        var weights = {completeness: 40, textQuality: 40, timeQuality: 20};
+
+        // Completeness score
+        var compTotal = 0, compCount = 0;
+        for (var field in completeness) {
+            compTotal += completeness[field].percentage;
+            compCount++;
+        }
+        var compScore = compCount > 0 ? (compTotal / compCount) : 0;
+        score += (compScore / 100) * weights.completeness;
+
+        // Text quality score
+        var textScore = (textQuality.description.goodQualityPct + textQuality.closeNotes.goodQualityPct) / 2;
+        score += (textScore / 100) * weights.textQuality;
+
+        // Time quality score
+        var timeScore = 100 - timeAnalysis.tooQuickPct;
+        score += (timeScore / 100) * weights.timeQuality;
+
+        return score;
+    }
+
+})();
\ No newline at end of file