From 09fce52b85798ecb370274f498b734f52b1d53d7 Mon Sep 17 00:00:00 2001 From: Gordon Smith Date: Wed, 26 Nov 2025 14:06:47 +0000 Subject: [PATCH] chore: fix readme and add copilot instructions Signed-off-by: Gordon Smith --- packages/comms/tests/workunit.spec.ts | 2 +- .../dataflow/.github/copilot-instructions.md | 112 ++++++++++++++++++ packages/dataflow/README.md | 46 +++---- 3 files changed, 136 insertions(+), 24 deletions(-) create mode 100644 packages/dataflow/.github/copilot-instructions.md diff --git a/packages/comms/tests/workunit.spec.ts b/packages/comms/tests/workunit.spec.ts index 259772cd5b..e91a5cd818 100644 --- a/packages/comms/tests/workunit.spec.ts +++ b/packages/comms/tests/workunit.spec.ts @@ -122,7 +122,7 @@ allPeople; return response; }); }); - }); + }, 30000); describe("Syntax Error", () => { it("eclSubmit", () => { diff --git a/packages/dataflow/.github/copilot-instructions.md b/packages/dataflow/.github/copilot-instructions.md new file mode 100644 index 0000000000..b3d0ebc092 --- /dev/null +++ b/packages/dataflow/.github/copilot-instructions.md @@ -0,0 +1,112 @@ +# @hpcc-js/dataflow Copilot Instructions + +## Architecture Overview + +This is a **functional data flow library** using JavaScript generators and iterators for lazy evaluation. Think of it as a streaming data pipeline where data flows through activities and is observed by sensors. + +**Core Concepts:** +- **Source**: Either `T[]` or `IterableIterator` - the input data +- **Activities**: Transform data as it flows (`map`, `filter`, `sort`) - return iterators +- **Observers/Sensors**: Monitor data without modifying it (`count`, `max`, `mean`) - accumulate state +- **Pipe**: Chains activities together into reusable pipelines with full type safety + +**Key Files:** +- `src/activities/activity.ts` - Core type definitions for the entire system +- `src/utils/pipe.ts` - Complex TypeScript type magic for type-safe activity chaining +- `src/observers/observer.ts` - Observer pattern with `observe()` and `peek()` methods + +## Critical Patterns + +### Dual Signature Pattern (Performance Optimization) + +Activities use TypeScript overloads to support both immediate execution and curried usage: + +```typescript +// Immediate execution +export function map(source: Source, callbackFn: MapCallback): IterableIterator; +// Curried (returns reusable activity) +export function map(callbackFn: MapCallback): IterableActivity; + +export function map(s_or_cb: Source | MapCallback, callbackFn?: MapCallback) { + return isSource(s_or_cb) ? mapGen(callbackFn!)(s_or_cb) : mapGen(s_or_cb); +} +``` + +**Performance optimization (in progress):** Activities are being migrated from `isSource()` runtime checks to `arguments.length` checks for better performance. See `sort.ts` for the optimized pattern - it eliminates expensive runtime type inspection in favor of fast argument counting. + +### Generator Functions for Lazy Evaluation + +All activities use generator functions (`function*`) to enable lazy evaluation: + +```typescript +function* (source: Source) { + let i = -1; + for (const item of source) { + yield callbackFn(item, ++i); + } +} +``` + +This ensures data only flows when consumed (e.g., via `[...iterator]` or `for...of`). + +### Observers Accumulate State + +Observers have two methods: +- `observe(value, index)` - Called for each item as it flows through +- `peek()` - Returns accumulated result without consuming the iterator + +Observers can be inserted into pipes using `sensor()` or converted to activities using `scalar()` or `activity()`. + +### Array Mutation Prevention + +**Always use `.slice()` before `.sort()` to avoid mutating input arrays:** + +```typescript +const arr = Array.isArray(source) ? source.slice() : [...source]; +yield* arr.sort(compareFn); +``` + +This pattern appears in `sort.ts`, `median.ts`, `quartile.ts`. + +## Build & Test Workflow + +**Build Commands:** +- `npm run build` - Parallel TypeScript compilation + Vite bundling (`run-p gen-types bundle`) +- `npm run gen-types` - Generate `.d.ts` files in `types/` directory +- `npm run bundle` - Vite builds UMD + ES modules to `dist/` + +**Testing:** +- `npm test` - Runs type checking + vitest (both node & browser environments) +- `npm run test-vitest` - Vitest only (dual environment: node + chromium) +- `npm run bench` - Performance benchmarks (see `tests/pipe.bench.ts`) + +**Test Structure:** +- Each activity/observer has a matching `.spec.ts` file in `tests/` +- `tests/pipe.spec.ts` and `tests/pipe.bench.ts` test pipeline composition +- Tests verify both immediate execution and curried usage patterns + +## TypeScript Configuration + +- Uses `"allowImportingTsExtensions": true` - **always use `.ts` extensions in imports** +- `"module": "NodeNext"` - ES modules with Node.js compatibility +- Type definitions generated to `types/` directory (not inline with source) + +## Common Gotchas + +1. **Index tracking:** Most activities use `let i = -1; for (const item) { ++i }` pattern - maintains correct index through transformations + +2. **Optional parameters with undefined:** When using `arguments.length` optimization, handle explicit `undefined` (e.g., `sort(source, undefined)` for default sort) + +3. **Type inference in pipe():** The `pipe()` function uses sophisticated TypeScript to infer return types - if types break, check that activity input/output types align correctly + +4. **Histogram edge cases:** `histogram` has special handling for empty sources - yields empty buckets with NaN bounds for `buckets` option, returns nothing for `min/range` option + +5. **Generator initialization:** Generators don't execute until iterated - sensors remain `undefined` until data flows through + +## Code Style + +- Use generator functions for all iterable activities +- Prefer `for...of` over manual iterator manipulation +- Use `yield*` to delegate to another generator +- Type parameters: `` allows inference while providing fallback +- Function naming: `activityGen` helper functions create the generator, exported function handles overload dispatch diff --git a/packages/dataflow/README.md b/packages/dataflow/README.md index 08d9a3e5a8..6cba46695e 100644 --- a/packages/dataflow/README.md +++ b/packages/dataflow/README.md @@ -116,7 +116,7 @@ const concatDEF = concat(["d", "e", "f"]); concatDEF(["a", "b", "c"]); // => "a", "b", "c", "d", "e", "f" concatDEF(["1", "2", "3"]); // => "1", "2", "3", "d", "e", "f" ``` -[[source]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/activities/concat.ts) [[tests]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/__tests__/concat.ts) +[[source]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/activities/concat.ts) [[tests]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/tests/concat.spec.ts) # **each**(_iterable_, _callbackFn_): _iterable_
# **each**(_callbackFn): (_iterable_) => _iterable_
@@ -130,7 +130,7 @@ const logFlow = each(console.log); logFlow(["a", "b", "c"]); // => "a", "b", "c" ``` -[[source]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/activities/each.ts) [[tests]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/__tests__/each.ts) +[[source]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/activities/each.ts) [[tests]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/tests/each.spec.ts) # **entries**(_iterable_): _iterable_
# **entries**(): (_iterable_) => _iterable_
@@ -143,7 +143,7 @@ entries(["a", "b", "c"]); // => [0, "a"], [1, "b"], [2, "c"] const calcEntries = entries(); calcEntries(["a", "b", "c"]); // => [0, "a"], [1, "b"], [2, "c"] ``` -[[source]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/activities/entries.ts) [[tests]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/__tests__/entries.ts) +[[source]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/activities/entries.ts) [[tests]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/tests/entries.spec.ts) # **filter**(_iterable_, _condition_): _iterable_
# **filter**(_condition_): (_iterable_) => _iterable_
@@ -159,7 +159,7 @@ const smallWords = filter(word => word.length <= 6); smallWords(words); // => "spray", "limit", "elite" ``` -[[source]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/activities/filter.ts) [[tests]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/__tests__/filter.ts) +[[source]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/activities/filter.ts) [[tests]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/tests/filter.spec.ts) # **first**(_iterable_, _number_): _iterable_
# **first**(_number_): (_iterable_) => _iterable_
@@ -175,7 +175,7 @@ const first2 = first(2); first2(words); // => "spray", "limit" ``` -[[source]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/activities/first.ts) [[tests]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/__tests__/first.ts) +[[source]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/activities/first.ts) [[tests]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/tests/first.spec.ts) # **group**(_iterable_, _condition_): _iterable_
# **group**(_condition_): (_iterable_) => _iterable_
@@ -191,7 +191,7 @@ const groupByLength = group(word => word.length); groupByLength(words); // => {key: 3, value: ["one", "two", "six"]}, {key: 4, value: ["four", "five"]}, { key: 5, value: ["three"]} ``` -[[source]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/activities/group.ts) [[tests]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/__tests__/group.ts) +[[source]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/activities/group.ts) [[tests]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/tests/group.spec.ts) # **histogram**(_iterable_, _condition_, _options_): _iterable_
# **histogram**(_condition_, _options_): (_iterable_) => _iterable_
@@ -214,7 +214,7 @@ histogram(data, n => n, { buckets: 3 }); // => {"from":1,"to":7,"value":[1,3,6] histogram(data, n => n, { min: 0, range: 5 }); // => {"from":0,"to":5,"value":[1,3]},{"from":5,"to":10,"value":[6]},{"from":10,"to":15,"value":[12,13,13,14]},{"from":15,"to":20,"value":[19]} ``` -[[source]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/activities/histogram.ts) [[tests]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/__tests__/histogram.ts) +[[source]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/activities/histogram.ts) [[tests]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/tests/histogram.spec.ts) # **map**(_iterable_, _callback_): _iterable_
# **map**(_callback_): (_iterable_) => _iterable_
@@ -228,7 +228,7 @@ const indexData = map((row, idx) => ({ ...row, index: idx + 1 })); indexData([{ n: 22 }, { n: 11 }, { n: 33 }]); // => { n: 22, index: 1 }, { n: 11, index: 2 }, { n: 33, index: 3 } ``` -[[source]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/activities/map.ts) [[tests]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/__tests__/map.ts) +[[source]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/activities/map.ts) [[tests]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/tests/map.spec.ts) # **skip**(_iterable_, _number_): _iterable_
# **skip**(_number_): (_iterable_) => _iterable_
@@ -244,7 +244,7 @@ const skip4 = skip(4); skip4(words); // => "destruction", "present" ``` -[[source]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/activities/skip.ts) [[tests]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/__tests__/skip.ts) +[[source]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/activities/skip.ts) [[tests]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/tests/skip.spec.ts) # **sort**(_iterable_, _compare_): _iterable_
# **sort**(_compare_): (_iterable_) => _iterable_
@@ -261,7 +261,7 @@ const reverseSort = sort((a, b) => b - a); reverseSort(numbers) // => 5, 4, 3, 2, 1 ``` -[[source]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/activities/sort.ts) [[tests]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/__tests__/sort.ts) +[[source]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/activities/sort.ts) [[tests]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/tests/sort.spec.ts) ### Sensors @@ -307,7 +307,7 @@ s2.peek(); // => 699; const doCount = scalar(count()); doCount([5, 1, 2, -3, 4]); // => 5 ``` -[[source]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/observers/count.ts) [[tests]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/__tests__/count.ts) +[[source]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/observers/count.ts) [[tests]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/tests/count.spec.ts) # **min**(): _Observer_
# **min**(_accessor_): _Observer_
@@ -329,7 +329,7 @@ s2.peek() // => 4 const calcMin = scalar(min(row => row.id)); calcMin([{ id: 22 }, { id: 44 }, { id: 33 }]); // => 22 ``` -[[source]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/observers/min.ts) [[tests]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/__tests__/min.ts) +[[source]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/observers/min.ts) [[tests]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/tests/min.spec.ts) # **max**(): _Observer_
# **max**(_accessor_): _Observer_
@@ -352,7 +352,7 @@ const calcMax = scalar(max(row => row.id)); calcMax([{ id: 22 }, { id: 44 }, { id: 33 }]); // => 44 ``` -[[source]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/observers/max.ts) [[tests]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/__tests__/max.ts) +[[source]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/observers/max.ts) [[tests]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/tests/max.spec.ts) # **extent**(): _Observer_
# **extent**(_accessor_): _Observer_
@@ -375,7 +375,7 @@ const calcExtent = scalar(extent(row => row.id)); calcExtent([{ id: 22 }, { id: 44 }, { id: 33 }]); // => [22, 44] ``` -[[source]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/observers/extent.ts) [[tests]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/__tests__/extent.ts) +[[source]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/observers/extent.ts) [[tests]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/tests/extent.spec.ts) # **mean**(): _Observer_
# **mean**(_accessor_): _Observer_
@@ -387,7 +387,7 @@ const calcMean = scalar(mean()); calcMean([5, -6, 1, 2, -2])) // => 0 ``` -[[source]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/observers/mean.ts) [[tests]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/__tests__/mean.ts) +[[source]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/observers/mean.ts) [[tests]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/tests/mean.spec.ts) # **median**(): _Observer_
# **median**(_accessor_): _Observer_
@@ -403,7 +403,7 @@ calcMedian([5, -6, 1, 2, -2, 6]) // => 1.5 calcMedian([9]) // => 9 ``` -[[source]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/observers/median.ts) [[tests]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/__tests__/median.ts) +[[source]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/observers/median.ts) [[tests]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/tests/median.spec.ts) # **quartile**(): _Observer_
# **quartile**(_accessor_): _Observer_
@@ -418,7 +418,7 @@ calcQuartile([1, 22, 133]) // => [1, 1, 22, 133 calcQuartile([2, 144, 33]) // => [2, 2, 33, 144, 144] ``` -[[source]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/observers/quartile.ts) [[tests]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/__tests__/quartile.ts) +[[source]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/observers/quartile.ts) [[tests]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/tests/quartile.spec.ts) # **reduce**(_reducer_[, _initialValue_]): _Observer_
@@ -433,7 +433,7 @@ calcReduce1([1, 2, 3, 4, 5]) // => 15 calcReduce2([1, 2, 3, 4, 5]) // => 25 ``` -[[source]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/observers/reduce.ts) [[tests]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/__tests__/reduce.ts) +[[source]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/observers/reduce.ts) [[tests]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/tests/reduce.spec.ts) # **variance**(): _Observer_
# **variance**(_accessor_): _Observer_
@@ -445,7 +445,7 @@ const calcVariance = scalar(variance()); calcVariance([5, 1, 2, 3, 4]) // => 2.5 ``` -[[source]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/observers/variance.ts) [[tests]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/__tests__/variance.ts) +[[source]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/observers/variance.ts) [[tests]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/tests/variance.spec.ts) # **deviation**(): _Observer_
# **deviation**(_accessor_): _Observer_
@@ -457,7 +457,7 @@ const calcDeviation = scalar(deviation()); calcDeviation([5, 1, 2, 3, 4]) // => 1.58113883008 == sqrt(2.5) ``` -[[source]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/observers/deviation.ts) [[tests]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/__tests__/deviation.ts) +[[source]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/observers/deviation.ts) [[tests]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/tests/deviation.spec.ts) # **distribution**(): _Observer_<_number_, { min: _number_, mean: _number_, max: _number_, deviation: _number_, variance: _number_}>
# **distribution**(_accessor_): _Observer_<_any_, { min: _number_, mean: _number_, max: _number_, deviation: _number_, variance: _number_}>
@@ -469,7 +469,7 @@ const calcDistribution = scalar(distribution()); calcDistribution([5, 1, 2, 3, 4])) // => { min: 1, mean: 3, max: 5, deviation: Math.sqrt(2.5), variance: 2.5} ``` -[[source]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/observers/distribution.ts) [[tests]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/__tests__/distribution.ts) +[[source]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/observers/distribution.ts) [[tests]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/tests/distribution.spec.ts) ### Convenience @@ -514,7 +514,7 @@ const process_2 = pipe( process_2([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]); // => 0 ``` -[[source]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/utils/pipe.ts) [[tests]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/__tests__/pipe.ts) +[[source]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/utils/pipe.ts) [[tests]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/tests/pipe.spec.ts) # **generate**(_generatorFn_[, _maxIterations_]): _iterable_
@@ -527,4 +527,4 @@ generate(Math.random, 100); // => Random number iterator limited to 100 items ``` -[[source]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/utils/generate.ts) [[tests]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/__tests__/generate.ts) +[[source]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/src/utils/generate.ts) [[tests]](https://github.com/hpcc-systems/Visualization/blob/trunk/packages/dataflow/tests/generate.spec.ts)