@@ -15,28 +15,51 @@ class FunctionPaths
1515 private string $ function_name ;
1616
1717 /** @var array<Path> */
18- private array $ paths ;
18+ private array $ paths = [];
19+
20+ private ?int $ max_length = null ;
21+ private ?float $ sample_rate = null ;
1922
2023 /** @param $paths array<Path> */
21- public function __construct (string $ function_name , array $ paths = [] )
24+ public function __construct (string $ function_name , ? int $ max_length = null )
2225 {
2326 $ this ->function_name = $ function_name ;
24- $ this ->paths = $ paths ;
27+ $ this ->max_length = $ max_length ;
2528 }
2629
2730 public function appendPath (Path $ path ): void
2831 {
29- $ this ->paths [] = $ path ;
32+ if ($ this ->sample_rate === null ) {
33+ $ this ->paths [] = $ path ;
34+ } else {
35+ if (rand (0 , 1000 ) / 1000.0 >= $ this ->sample_rate ) {
36+ $ this ->paths [] = $ path ;
37+ }
38+ }
39+
40+ // Once we break 2x the max paths, randomly cut it in half and start
41+ // sampling at 50%
42+ if ($ this ->sample_rate === null && $ this ->max_length !== null ) {
43+ if (count ($ this ->paths ) > (2 * $ this ->max_length )) {
44+ // Set a sample rate of 50%
45+ $ this ->sample_rate = 0.5 ;
46+
47+ // Trim off a random 50% of existing paths
48+ $ paths = $ this ->paths ;
49+ shuffle ($ paths );
50+ $ this ->paths = array_values (array_slice ($ paths , 0 , $ this ->max_length ));
51+ }
52+ }
3053 }
3154
3255 /**
3356 * @return \Generator<FunctionPaths>
3457 */
35- public static function fromFileName (string $ file_name , bool $ use_node_ids = false ): \Generator
58+ public static function fromFileName (string $ file_name , bool $ use_node_ids = false , ? int $ max_length = null ): \Generator
3659 {
3760 $ ast = parse_file ($ file_name , get_version ());
3861 foreach ((new FunctionScanner ())($ ast ) as $ function_ast ) {
39- $ function_path = new FunctionPaths ($ function_ast ->children ['name ' ] ?? 'anonymous ' );
62+ $ function_path = new FunctionPaths ($ function_ast ->children ['name ' ] ?? 'anonymous ' , $ max_length );
4063 foreach ((new GraphNodeVisitor (null , $ use_node_ids ))($ function_ast )->allTerminals () as $ terminal ) {
4164 foreach ($ terminal ->allPathsToOtherTerminals () as $ path ) {
4265 $ function_path ->appendPath ($ path );
@@ -51,16 +74,16 @@ public function isEmpty(): bool
5174 return empty ($ this ->paths );
5275 }
5376
54- public function toString (? int $ max_length = null ): string
77+ public function toString (): string
5578 {
5679 if ($ this ->isEmpty ()) {
5780 return '' ;
5881 }
5982
6083 $ paths = $ this ->paths ;
61- if ($ max_length !== null && count ($ paths ) > $ max_length ) {
84+ if ($ this -> max_length !== null && count ($ paths ) > $ this -> max_length ) {
6285 shuffle ($ paths );
63- $ paths = array_slice ($ paths , 0 , $ max_length );
86+ $ paths = array_slice ($ paths , 0 , $ this -> max_length );
6487 }
6588
6689 return implode (' ' , [
0 commit comments