Skip to content

Commit 48552c5

Browse files
committed
Auto sampling. Directory exclusions
1 parent 7e3a19b commit 48552c5

File tree

6 files changed

+61
-26
lines changed

6 files changed

+61
-26
lines changed

Dockerfile

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,24 @@
11
FROM php:8.2.2-cli
22

33
RUN apt-get update --fix-missing
4-
RUN apt-get install -y git
54

65
# nikic/php-ast
6+
RUN apt-get install -y git
77
RUN cd /tmp; git clone https://github.com/nikic/php-ast.git && cd php-ast && phpize && ./configure && make && make install
88
RUN echo "extension=ast" > /usr/local/etc/php/conf.d/docker-php-ast.ini
9+
RUN rm -rf /tmp/php-ast
10+
RUN apt-get purge -y git
911

1012
# Composer
1113
RUN cd /tmp; curl -O https://getcomposer.org/installer && php ./installer --install-dir=/usr/local/bin --filename=composer
14+
RUN rm /tmp/installer
1215

1316
RUN mkdir -p /code2seq-paths-php
1417
WORKDIR /code2seq-paths-php
1518

19+
# Copy the source and executable
1620
COPY vendor /code2seq-paths-php/vendor
1721
COPY src /code2seq-paths-php/src
1822
COPY bin /code2seq-paths-php/bin
1923

20-
ENTRYPOINT ["/code2seq-paths-php/bin/code2seq-paths"]
24+
ENTRYPOINT ["/code2seq-paths-php/bin/code2seq-paths"]

src/Paths/FunctionPaths.php

Lines changed: 32 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,28 +15,51 @@ class FunctionPaths
1515
private string $function_name;
1616

1717
/** @var array<Path> */
18-
private array $paths;
18+
private array $paths = [];
19+
20+
private ?int $max_length = null;
21+
private ?float $sample_rate = null;
1922

2023
/** @param $paths array<Path> */
21-
public function __construct(string $function_name, array $paths = [])
24+
public function __construct(string $function_name, ?int $max_length = null)
2225
{
2326
$this->function_name = $function_name;
24-
$this->paths = $paths;
27+
$this->max_length = $max_length;
2528
}
2629

2730
public function appendPath(Path $path): void
2831
{
29-
$this->paths[] = $path;
32+
if ($this->sample_rate === null) {
33+
$this->paths[] = $path;
34+
} else {
35+
if (rand(0, 1000) / 1000.0 >= $this->sample_rate) {
36+
$this->paths[] = $path;
37+
}
38+
}
39+
40+
// Once we break 2x the max paths, randomly cut it in half and start
41+
// sampling at 50%
42+
if ($this->sample_rate === null && $this->max_length !== null) {
43+
if (count($this->paths) > (2 * $this->max_length)) {
44+
// Set a sample rate of 50%
45+
$this->sample_rate = 0.5;
46+
47+
// Trim off a random 50% of existing paths
48+
$paths = $this->paths;
49+
shuffle($paths);
50+
$this->paths = array_values(array_slice($paths, 0, $this->max_length));
51+
}
52+
}
3053
}
3154

3255
/**
3356
* @return \Generator<FunctionPaths>
3457
*/
35-
public static function fromFileName(string $file_name, bool $use_node_ids = false): \Generator
58+
public static function fromFileName(string $file_name, bool $use_node_ids = false, ?int $max_length = null): \Generator
3659
{
3760
$ast = parse_file($file_name, get_version());
3861
foreach ((new FunctionScanner())($ast) as $function_ast) {
39-
$function_path = new FunctionPaths($function_ast->children['name'] ?? 'anonymous');
62+
$function_path = new FunctionPaths($function_ast->children['name'] ?? 'anonymous', $max_length);
4063
foreach ((new GraphNodeVisitor(null, $use_node_ids))($function_ast)->allTerminals() as $terminal) {
4164
foreach ($terminal->allPathsToOtherTerminals() as $path) {
4265
$function_path->appendPath($path);
@@ -51,16 +74,16 @@ public function isEmpty(): bool
5174
return empty($this->paths);
5275
}
5376

54-
public function toString(?int $max_length = null): string
77+
public function toString(): string
5578
{
5679
if ($this->isEmpty()) {
5780
return '';
5881
}
5982

6083
$paths = $this->paths;
61-
if ($max_length !== null && count($paths) > $max_length) {
84+
if ($this->max_length !== null && count($paths) > $this->max_length) {
6285
shuffle($paths);
63-
$paths = array_slice($paths, 0, $max_length);
86+
$paths = array_slice($paths, 0, $this->max_length);
6487
}
6588

6689
return implode(' ', [

src/Paths/GraphNode/NonTerminal.php

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -58,16 +58,6 @@ public function allPathsToTerminals(PartialPath $prefix): \Generator
5858
yield from $this->children[$i]->allPathsToTerminals($prefix);
5959
}
6060
}
61-
62-
/*
63-
foreach ($this->children as $child) {
64-
// Skip the node we just popped out of
65-
if ($child === $previous_node) {
66-
continue;
67-
}
68-
yield from $child->allPathsToTerminals($prefix);
69-
}
70-
*/
7161
}
7262

7363
/**

src/Paths/GraphNodeVisitor.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ public function __construct(GraphNode $parent = null, bool $use_node_ids = false
2121
$this->use_node_ids = $use_node_ids;
2222
}
2323

24-
public function newGraphNodeVisitorWithParent(GraphNode $parent = null)
24+
public function newGraphNodeVisitorWithParent(GraphNode $parent = null): GraphNodeVisitor
2525
{
2626
return new GraphNodeVisitor($parent, $this->use_node_ids);
2727
}

src/Paths/Scan.php

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
<?php
2+
23
declare(strict_types=1);
34

45
namespace Paths;
@@ -8,9 +9,14 @@ final class Scan
89
/**
910
*
1011
*/
11-
static function filesAndDirectories(array $files_and_directories, callable $callable): void
12+
static function filesAndDirectories(array $files_and_directories, array $exclude_files_and_directories, callable $callable): void
1213
{
1314
foreach ($files_and_directories as $file_or_directory) {
15+
foreach ($exclude_files_and_directories as $exclude_file_or_directory) {
16+
if (strpos($file_or_directory, $exclude_file_or_directory) !== false) {
17+
continue 2;
18+
}
19+
}
1420
self::fileOrDirectory($file_or_directory, $callable);
1521
}
1622
}
@@ -48,4 +54,4 @@ static function file(string $file_name, callable $callable): void
4854
{
4955
$callable($file_name);
5056
}
51-
}
57+
}

src/paths.php

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
use Paths\FunctionPaths;
1111

1212
$rest_index = 0;
13-
$opts = getopt('h', ['help', 'max-length:', 'seed:', 'ids'], $rest_index);
13+
$opts = getopt('hl:s:ie:', ['help', 'max-length:', 'seed:', 'ids', 'exclude:'], $rest_index);
1414

1515
if (!is_array($opts) || isset($opts['help'])) {
1616
echo <<<EOH
@@ -28,6 +28,9 @@
2828
2929
-i, --ids
3030
Use IDs rather than names for context nodes. Defaults to false.
31+
32+
-e PATH, --exclude=PATH
33+
Exclude the file or directory at the given path from being scanned.
3134
3235
...
3336
All other options will be treated as file names to
@@ -50,13 +53,22 @@
5053

5154
$use_node_ids = isset($opts['ids']) || isset($opts['i']);
5255

56+
$exclude_files_and_directories = array_merge(
57+
isset($opts['exclude'])
58+
? (is_array($opts['exclude']) ? $opts['exclude'] : [$opts['exclude']])
59+
: [],
60+
isset($opts['e'])
61+
? (is_array($opts['e']) ? $opts['e'] : [$opts['e']])
62+
: []
63+
);
64+
5365
$files_and_directories = [];
5466
if ($rest_index > 0) {
5567
$rest = array_slice($argv, $rest_index);
5668
$files_and_directories += $rest;
5769
}
5870

59-
Scan::filesAndDirectories($files_and_directories, function ($file_name) use ($max_length, $use_node_ids) {
71+
Scan::filesAndDirectories($files_and_directories, $exclude_files_and_directories, function ($file_name) use ($max_length, $use_node_ids) {
6072
if ('php' !== pathinfo($file_name, PATHINFO_EXTENSION)) {
6173
return;
6274
}

0 commit comments

Comments
 (0)