Skip to content

Commit eb5ed0e

Browse files
authored
[DF-Shape] Use a set range domain for the column names of data frames (#2032)
* feat: set range domain with min and range set * feat: tests for set range domain * feat: integrate set range domain in data frame shape inference * feat: adapt benchmarking to new set range domain * test: adapt domain tests to new set range domain * test: adapt inference tests to new set range domain * feat-fix: type error in set range domain * feat-fix: outsource set range creation * feat: improve documentation of set range domain * feat-fix: clarify abstract domain mutability * refactor: rename extendUp/Down to widenUp/Down * refactor: outsource bottom and top symbol * refactor: getter for lower and upper bound of set range domain * feat: use named record for set ranges
1 parent dde6fa9 commit eb5ed0e

31 files changed

+1770
-1209
lines changed

src/abstract-interpretation/data-frame/absint-visitor.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
import { type CfgBasicBlockVertex, type CfgSimpleVertex, type ControlFlowInformation , CfgVertexType, getVertexRootId, isMarkerVertex } from '../../control-flow/control-flow-graph';
2-
import { type SemanticCfgGuidedVisitorConfiguration , SemanticCfgGuidedVisitor } from '../../control-flow/semantic-cfg-guided-visitor';
1+
import { type CfgBasicBlockVertex, type CfgSimpleVertex, type ControlFlowInformation, CfgVertexType, getVertexRootId, isMarkerVertex } from '../../control-flow/control-flow-graph';
2+
import { type SemanticCfgGuidedVisitorConfiguration, SemanticCfgGuidedVisitor } from '../../control-flow/semantic-cfg-guided-visitor';
33
import type { DataflowGraph } from '../../dataflow/graph/graph';
44
import type { DataflowGraphVertexFunctionCall, DataflowGraphVertexVariableDefinition } from '../../dataflow/graph/vertex';
55
import type { NoInfo, RNode } from '../../r-bridge/lang-4.x/ast/model/model';
66
import type { NormalizedAst, ParentInformation } from '../../r-bridge/lang-4.x/ast/model/processing/decorate';
77
import type { NodeId } from '../../r-bridge/lang-4.x/ast/model/processing/node-id';
88
import { isNotUndefined } from '../../util/assert';
9-
import { DataFrameInfoMarker, hasDataFrameAssignmentInfo, hasDataFrameExpressionInfo, hasDataFrameInfoMarker, type AbstractInterpretationInfo } from './absint-info';
9+
import { type AbstractInterpretationInfo, DataFrameInfoMarker, hasDataFrameAssignmentInfo, hasDataFrameExpressionInfo, hasDataFrameInfoMarker } from './absint-info';
1010
import { DataFrameDomain, DataFrameStateDomain } from './dataframe-domain';
1111
import { mapDataFrameAccess } from './mappers/access-mapper';
1212
import { isAssignmentTarget, mapDataFrameVariableAssignment } from './mappers/assignment-mapper';

src/abstract-interpretation/data-frame/dataframe-domain.ts

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,13 @@ import type { NodeId } from '../../r-bridge/lang-4.x/ast/model/processing/node-i
22
import type { AbstractDomainValue } from '../domains/abstract-domain';
33
import { PosIntervalDomain } from '../domains/positive-interval-domain';
44
import { ProductDomain } from '../domains/product-domain';
5-
import { SetUpperBoundDomain } from '../domains/set-upper-bound-domain';
5+
import type { SetRangeLimit } from '../domains/set-range-domain';
6+
import { SetRangeDomain } from '../domains/set-range-domain';
67
import { StateAbstractDomain } from '../domains/state-abstract-domain';
78

89
/** The type of the abstract product representing the shape of data frames */
910
export type AbstractDataFrameShape = {
10-
colnames: SetUpperBoundDomain<string>;
11+
colnames: SetRangeDomain<string>;
1112
cols: PosIntervalDomain;
1213
rows: PosIntervalDomain;
1314
}
@@ -19,17 +20,17 @@ export type DataFrameShapeProperty<Property extends keyof AbstractDataFrameShape
1920
* The data frame abstract domain as product domain of a column names domain, column count domain, and row count domain.
2021
*/
2122
export class DataFrameDomain extends ProductDomain<AbstractDataFrameShape> {
22-
constructor(value: AbstractDataFrameShape, maxColNames?: number) {
23+
constructor(value: AbstractDataFrameShape, maxColNames?: SetRangeLimit | number) {
2324
super({
24-
colnames: new SetUpperBoundDomain(value.colnames.value, maxColNames ?? value.colnames.limit),
25+
colnames: new SetRangeDomain(value.colnames.value, maxColNames ?? value.colnames.limit),
2526
cols: new PosIntervalDomain(value.cols.value),
2627
rows: new PosIntervalDomain(value.rows.value)
2728
});
2829
}
2930

3031
public create(value: AbstractDataFrameShape): this;
3132
public create(value: AbstractDataFrameShape): DataFrameDomain {
32-
return new DataFrameDomain(value, this.maxColNames);
33+
return new DataFrameDomain(value, this.colnames.limit);
3334
}
3435

3536
/**
@@ -53,24 +54,17 @@ export class DataFrameDomain extends ProductDomain<AbstractDataFrameShape> {
5354
return this.value.rows;
5455
}
5556

56-
/**
57-
* The maximum number of inferred column names of the column names domain.
58-
*/
59-
public get maxColNames(): number {
60-
return this.value.colnames.limit;
61-
}
62-
6357
public static bottom(maxColNames?: number): DataFrameDomain {
6458
return new DataFrameDomain({
65-
colnames: SetUpperBoundDomain.bottom(maxColNames),
59+
colnames: SetRangeDomain.bottom(maxColNames),
6660
cols: PosIntervalDomain.bottom(),
6761
rows: PosIntervalDomain.bottom()
6862
});
6963
}
7064

7165
public static top(maxColNames?: number): DataFrameDomain {
7266
return new DataFrameDomain({
73-
colnames: SetUpperBoundDomain.top(maxColNames),
67+
colnames: SetRangeDomain.top(maxColNames),
7468
cols: PosIntervalDomain.top(),
7569
rows: PosIntervalDomain.top()
7670
});

src/abstract-interpretation/data-frame/mappers/access-mapper.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import type { ResolveInfo } from '../../../dataflow/eval/resolve/alias-tracking'
33
import type { DataflowGraph } from '../../../dataflow/graph/graph';
44
import type { RNode } from '../../../r-bridge/lang-4.x/ast/model/model';
55
import type { RAccess, RIndexAccess, RNamedAccess } from '../../../r-bridge/lang-4.x/ast/model/nodes/r-access';
6-
import { type RFunctionArgument , EmptyArgument } from '../../../r-bridge/lang-4.x/ast/model/nodes/r-function-call';
6+
import { type RFunctionArgument, EmptyArgument } from '../../../r-bridge/lang-4.x/ast/model/nodes/r-function-call';
77
import type { ParentInformation } from '../../../r-bridge/lang-4.x/ast/model/processing/decorate';
88
import { RType } from '../../../r-bridge/lang-4.x/ast/model/type';
99
import type { DataFrameExpressionInfo, DataFrameOperation } from '../absint-info';

src/abstract-interpretation/data-frame/mappers/arguments.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,15 @@ import { isUseVertex, VertexType } from '../../../dataflow/graph/vertex';
44
import { toUnnamedArgument } from '../../../dataflow/internal/process/functions/call/argument/make-argument';
55
import type { RNode } from '../../../r-bridge/lang-4.x/ast/model/model';
66
import type { RArgument } from '../../../r-bridge/lang-4.x/ast/model/nodes/r-argument';
7-
import { type RFunctionArgument, type RFunctionCall , EmptyArgument } from '../../../r-bridge/lang-4.x/ast/model/nodes/r-function-call';
7+
import { type RFunctionArgument, type RFunctionCall, EmptyArgument } from '../../../r-bridge/lang-4.x/ast/model/nodes/r-function-call';
88
import type { RSymbol } from '../../../r-bridge/lang-4.x/ast/model/nodes/r-symbol';
99
import type { ParentInformation } from '../../../r-bridge/lang-4.x/ast/model/processing/decorate';
10+
import { visitAst } from '../../../r-bridge/lang-4.x/ast/model/processing/visitor';
1011
import { RType } from '../../../r-bridge/lang-4.x/ast/model/type';
1112
import { RNull } from '../../../r-bridge/lang-4.x/convert-values';
1213
import type { AbstractInterpretationInfo } from '../absint-info';
13-
import { resolveIdToDataFrameShape } from '../shape-inference';
1414
import { resolveIdToArgName, resolveIdToArgValue, unquoteArgument } from '../resolve-args';
15-
import { visitAst } from '../../../r-bridge/lang-4.x/ast/model/processing/visitor';
15+
import { resolveIdToDataFrameShape } from '../shape-inference';
1616

1717
/** Regular expression representing valid columns names, e.g. for `data.frame` */
1818
const ColNamesRegex = /^[A-Za-z.][A-Za-z0-9_.]*$/;

src/abstract-interpretation/data-frame/mappers/function-mapper.ts

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,22 @@
1-
import { VariableResolve } from '../../../config';
1+
import { VariableResolve } from '../../../config';
22
import { type ResolveInfo } from '../../../dataflow/eval/resolve/alias-tracking';
33
import type { DataflowGraph } from '../../../dataflow/graph/graph';
44
import { toUnnamedArgument } from '../../../dataflow/internal/process/functions/call/argument/make-argument';
55
import { findSource } from '../../../dataflow/internal/process/functions/call/built-in/built-in-source';
6+
import type { ReadOnlyFlowrAnalyzerContext } from '../../../project/context/flowr-analyzer-context';
67
import type { RNode } from '../../../r-bridge/lang-4.x/ast/model/model';
7-
import { type RFunctionArgument , EmptyArgument } from '../../../r-bridge/lang-4.x/ast/model/nodes/r-function-call';
8+
import { type RFunctionArgument, EmptyArgument } from '../../../r-bridge/lang-4.x/ast/model/nodes/r-function-call';
89
import type { ParentInformation } from '../../../r-bridge/lang-4.x/ast/model/processing/decorate';
910
import { RType } from '../../../r-bridge/lang-4.x/ast/model/type';
10-
import { type RParseRequest , requestFromInput } from '../../../r-bridge/retriever';
11+
import { type RParseRequest, requestFromInput } from '../../../r-bridge/retriever';
1112
import { assertUnreachable, isNotUndefined, isUndefined } from '../../../util/assert';
1213
import { readLineByLineSync } from '../../../util/files';
1314
import type { DataFrameExpressionInfo, DataFrameOperation } from '../absint-info';
1415
import { DataFrameDomain } from '../dataframe-domain';
1516
import { resolveIdToArgName, resolveIdToArgValue, resolveIdToArgValueSymbolName, resolveIdToArgVectorLength, unescapeSpecialChars } from '../resolve-args';
1617
import type { ConstraintType } from '../semantics';
1718
import { resolveIdToDataFrameShape } from '../shape-inference';
18-
import { type FunctionParameterLocation , escapeRegExp, filterValidNames, getArgumentValue, getEffectiveArgs, getFunctionArgument, getFunctionArguments, getUnresolvedSymbolsInExpression, hasCriticalArgument, isDataFrameArgument, isNamedArgument, isRNull } from './arguments';
19-
import type { ReadOnlyFlowrAnalyzerContext } from '../../../project/context/flowr-analyzer-context';
19+
import { type FunctionParameterLocation, escapeRegExp, filterValidNames, getArgumentValue, getEffectiveArgs, getFunctionArgument, getFunctionArguments, getUnresolvedSymbolsInExpression, hasCriticalArgument, isDataFrameArgument, isNamedArgument, isRNull } from './arguments';
2020

2121
/**
2222
* Represents the different types of data frames in R
@@ -564,7 +564,7 @@ type OtherDataFrameFunctionMapping = OtherDataFrameEntryPoint | OtherDataFrameTr
564564
* - `args` contains the function call arguments
565565
* - `params` contains the expected argument location for each parameter of the function
566566
* - `info` contains the resolve information
567-
* - `ctx` access to the current flowR analyzer context
567+
* - `ctx` contains the current flowR analyzer context
568568
*/
569569
type DataFrameFunctionMapping<Params extends object> = (
570570
args: readonly RFunctionArgument<ParentInformation>[],
@@ -1127,20 +1127,20 @@ function mapDataFrameMutate(
11271127
});
11281128
}
11291129

1130-
if(deletedCols === undefined || deletedCols.length > 0) {
1130+
if(mutatedCols === undefined || mutatedCols.length > 0 || deletedCols?.length === 0) {
11311131
result.push({
1132-
operation: 'removeCols',
1132+
operation: 'mutateCols',
11331133
operand: operand?.info.id,
1134-
colnames: deletedCols,
1135-
options: { maybe: true }
1134+
colnames: mutatedCols
11361135
});
11371136
operand = undefined;
11381137
}
1139-
if(mutatedCols === undefined || mutatedCols.length > 0 || deletedCols?.length === 0) {
1138+
if(deletedCols === undefined || deletedCols.length > 0) {
11401139
result.push({
1141-
operation: 'mutateCols',
1140+
operation: 'removeCols',
11421141
operand: operand?.info.id,
1143-
colnames: mutatedCols
1142+
colnames: deletedCols,
1143+
options: { maybe: true }
11441144
});
11451145
operand = undefined;
11461146
}
@@ -1381,6 +1381,7 @@ function getRequestFromRead(
13811381
}
13821382
}
13831383
request = request ? ctx.files.resolveRequest(request).r : undefined;
1384+
13841385
return { source, request };
13851386
}
13861387

src/abstract-interpretation/data-frame/resolve-args.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { type ResolveInfo , resolveIdToValue } from '../../dataflow/eval/resolve/alias-tracking';
1+
import { type ResolveInfo, resolveIdToValue } from '../../dataflow/eval/resolve/alias-tracking';
22
import type { RArgument } from '../../r-bridge/lang-4.x/ast/model/nodes/r-argument';
33
import type { ParentInformation } from '../../r-bridge/lang-4.x/ast/model/processing/decorate';
44
import type { NodeId } from '../../r-bridge/lang-4.x/ast/model/processing/node-id';

0 commit comments

Comments
 (0)