@@ -148,7 +148,7 @@ describe('processSchema', function () {
148148 expect ( result . arrayLengthMap ) . to . deep . equal ( { } ) ;
149149 } ) ;
150150
151- it ( 'limits sample values to 10 ' , function ( ) {
151+ it ( 'limits sample values to 5 ' , function ( ) {
152152 const manyValues = Array . from ( { length : 20 } , ( _ , i ) => `value${ i } ` ) ;
153153
154154 const schema : Schema = {
@@ -177,9 +177,9 @@ describe('processSchema', function () {
177177
178178 const result = processSchema ( schema ) ;
179179
180- expect ( result . fieldInfo . field . sampleValues ) . to . have . length ( 10 ) ;
180+ expect ( result . fieldInfo . field . sampleValues ) . to . have . length ( 5 ) ;
181181 expect ( result . fieldInfo . field . sampleValues ) . to . deep . equal (
182- manyValues . slice ( 0 , 10 )
182+ manyValues . slice ( 0 , 5 )
183183 ) ;
184184 } ) ;
185185
@@ -486,7 +486,7 @@ describe('processSchema', function () {
486486 } ,
487487 binary : {
488488 type : 'Binary' ,
489- sampleValues : [ 'dGVzdA==' ] ,
489+ // sampleValues property should be absent for Binary fields
490490 probability : 1.0 ,
491491 } ,
492492 regex : {
@@ -533,6 +533,132 @@ describe('processSchema', function () {
533533 expect ( result . arrayLengthMap ) . to . deep . equal ( { } ) ;
534534 } ) ;
535535
536+ it ( 'excludes sample values for Binary fields to avoid massive payloads' , function ( ) {
537+ const embedding = new Binary ( Buffer . from ( [ 1 , 2 , 3 , 4 ] ) ) ; // Test Binary field logic
538+ const schema : Schema = {
539+ fields : [
540+ {
541+ name : 'plot_embedding' ,
542+ path : [ 'plot_embedding' ] ,
543+ count : 1 ,
544+ type : [ 'Binary' ] ,
545+ probability : 1.0 ,
546+ hasDuplicates : false ,
547+ types : [
548+ {
549+ name : 'Binary' ,
550+ bsonType : 'Binary' ,
551+ path : [ 'plot_embedding' ] ,
552+ count : 1 ,
553+ probability : 1.0 ,
554+ values : [ embedding ] ,
555+ } ,
556+ ] ,
557+ } ,
558+ {
559+ name : 'regular_field' ,
560+ path : [ 'regular_field' ] ,
561+ count : 1 ,
562+ type : [ 'String' ] ,
563+ probability : 1.0 ,
564+ hasDuplicates : false ,
565+ types : [
566+ {
567+ name : 'String' ,
568+ bsonType : 'String' ,
569+ path : [ 'regular_field' ] ,
570+ count : 1 ,
571+ probability : 1.0 ,
572+ values : [ 'test' ] ,
573+ } ,
574+ ] ,
575+ } ,
576+ ] ,
577+ count : 1 ,
578+ } ;
579+
580+ const result = processSchema ( schema ) ;
581+
582+ expect ( result . fieldInfo ) . to . deep . equal ( {
583+ plot_embedding : {
584+ type : 'Binary' ,
585+ // sampleValues property should be absent for Binary fields
586+ probability : 1.0 ,
587+ } ,
588+ regular_field : {
589+ type : 'String' ,
590+ sampleValues : [ 'test' ] , // Should still have sample values for non-Binary fields
591+ probability : 1.0 ,
592+ } ,
593+ } ) ;
594+ expect ( result . arrayLengthMap ) . to . deep . equal ( { } ) ;
595+ } ) ;
596+
597+ it ( 'truncates very long sample values to prevent massive payloads' , function ( ) {
598+ const longText = 'A' . repeat ( 1000 ) ;
599+ const schema : Schema = {
600+ fields : [
601+ {
602+ name : 'longField' ,
603+ path : [ 'longField' ] ,
604+ count : 1 ,
605+ type : [ 'String' ] ,
606+ probability : 1.0 ,
607+ hasDuplicates : false ,
608+ types : [
609+ {
610+ name : 'String' ,
611+ bsonType : 'String' ,
612+ path : [ 'longField' ] ,
613+ count : 1 ,
614+ probability : 1.0 ,
615+ values : [ longText , 'short' ] ,
616+ } ,
617+ ] ,
618+ } ,
619+ ] ,
620+ count : 1 ,
621+ } ;
622+
623+ const result = processSchema ( schema ) ;
624+
625+ expect ( result . fieldInfo . longField . sampleValues ) . to . have . length ( 2 ) ;
626+ expect ( result . fieldInfo . longField . sampleValues ! [ 0 ] ) . to . equal (
627+ 'A' . repeat ( 300 ) + '...'
628+ ) ;
629+ expect ( result . fieldInfo . longField . sampleValues ! [ 1 ] ) . to . equal ( 'short' ) ;
630+ } ) ;
631+
632+ it ( 'rounds probability to 2 decimal places' , function ( ) {
633+ const schema : Schema = {
634+ fields : [
635+ {
636+ name : 'field' ,
637+ path : [ 'field' ] ,
638+ count : 1 ,
639+ type : [ 'String' ] ,
640+ probability : 0.23076923076923078 , // Very precise decimal
641+ hasDuplicates : false ,
642+ types : [
643+ {
644+ name : 'String' ,
645+ bsonType : 'String' ,
646+ path : [ 'field' ] ,
647+ count : 1 ,
648+ probability : 1.0 ,
649+ values : [ 'test' ] ,
650+ } ,
651+ ] ,
652+ } ,
653+ ] ,
654+ count : 1 ,
655+ } ;
656+
657+ const result = processSchema ( schema ) ;
658+
659+ expect ( result . fieldInfo . field . probability ) . to . equal ( 0.23 ) ; // Rounded to 2 decimal places
660+ } ) ;
661+
536662 it ( 'transforms nested document field' , function ( ) {
537663 const schema : Schema = {
538664 fields : [
@@ -908,7 +1034,7 @@ describe('processSchema', function () {
9081034 expect ( result . fieldInfo ) . to . deep . equal ( {
9091035 'cube[][][]' : {
9101036 type : 'Number' ,
911- sampleValues : [ 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 ] ,
1037+ sampleValues : [ 1 , 2 , 3 , 4 , 5 ] ,
9121038 probability : 1.0 ,
9131039 } ,
9141040 } ) ;
0 commit comments