Skip to content

Commit 6ae6b3e

Browse files
committed
Add Excel formula evaluation feature to OpenXML SDK
Implements a high-performance formula evaluator with 97.65% accuracy validated against Excel's calculations. This feature enables developers to evaluate Excel formulas in spreadsheet documents without requiring Excel to be installed. Core Features: - Formula-to-Lambda compilation for native performance - 60+ built-in functions (math, text, logical, lookup, date/time, statistical) - Dependency graph analysis with topological sorting - Incremental recalculation (250-1000x faster than full recalculation) - Circular reference detection - Oracle validation system for testing against Excel Architecture: - Parser: Lexer + recursive descent parser for Excel formula syntax - Compiler: Converts formula AST to compiled Lambda expressions - Evaluator: Executes compiled formulas with cell context - Function Registry: Extensible function system Performance: - Formula compilation: ~0.5-2ms per formula - Evaluation: ~0.01-0.1ms per evaluation (native code) - RecalculateSheet: O(n) for n formulas - RecalculateDependents: O(d) for d dirty cells (250-1000x faster) Validation: - 85 oracle test cases validated against Excel - 83/85 passed (97.65% accuracy) - SharedString resolution, lookup functions, statistical functions all verified Implementation Details: - Zero external dependencies beyond OpenXML SDK - Multi-framework support (net35, net40, net46, netstandard2.0, net8.0) - Feature-based API using IFeatureCollection pattern - Comprehensive test suite with 200+ unit tests
1 parent ddc3e5f commit 6ae6b3e

File tree

84 files changed

+10039
-1
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

84 files changed

+10039
-1
lines changed

Open-XML-SDK.sln

Lines changed: 210 additions & 0 deletions
Large diffs are not rendered by default.

src/Directory.Build.targets

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
<?xml version="1.0" encoding="utf-8"?>
22
<Project>
33

4-
<PropertyGroup>
4+
<PropertyGroup Condition="'$(MSBuildProjectName)' != 'DocumentFormat.OpenXml.Formulas'">
55
<EnablePackageValidation>true</EnablePackageValidation>
66
<PackageValidationBaselineVersion>3.1.1</PackageValidationBaselineVersion>
77
</PropertyGroup>
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
// Copyright (c) Matt Liotta
2+
// Licensed under the MIT license. See LICENSE file in the project root for full license information.
3+
4+
using System;
5+
6+
namespace DocumentFormat.OpenXml.Features.FormulaEvaluation;
7+
8+
/// <summary>
9+
/// Represents a cell value with its type.
10+
/// </summary>
11+
public readonly struct CellValue : IEquatable<CellValue>
12+
{
13+
/// <summary>
14+
/// Gets the type of the cell value.
15+
/// </summary>
16+
public CellValueType Type { get; }
17+
18+
/// <summary>
19+
/// Gets the raw value object.
20+
/// </summary>
21+
public object? Value { get; }
22+
23+
/// <summary>
24+
/// Gets the numeric value. Returns 0 if not a number.
25+
/// </summary>
26+
public double NumericValue => Type == CellValueType.Number ? (double)Value! : 0;
27+
28+
/// <summary>
29+
/// Gets the string value.
30+
/// </summary>
31+
public string StringValue => Value?.ToString() ?? string.Empty;
32+
33+
/// <summary>
34+
/// Gets the boolean value. Returns false if not a boolean.
35+
/// </summary>
36+
public bool BoolValue => Type == CellValueType.Boolean && (bool)Value!;
37+
38+
/// <summary>
39+
/// Gets a value indicating whether this is an error value.
40+
/// </summary>
41+
public bool IsError => Type == CellValueType.Error;
42+
43+
/// <summary>
44+
/// Gets the error value string. Returns null if not an error.
45+
/// </summary>
46+
public string? ErrorValue => IsError ? (string?)Value : null;
47+
48+
private CellValue(CellValueType type, object? value)
49+
{
50+
Type = type;
51+
Value = value;
52+
}
53+
54+
/// <summary>
55+
/// Creates a numeric cell value.
56+
/// </summary>
57+
/// <param name="value">The numeric value.</param>
58+
/// <returns>A CellValue representing a number.</returns>
59+
public static CellValue FromNumber(double value) => new(CellValueType.Number, value);
60+
61+
/// <summary>
62+
/// Creates a string cell value.
63+
/// </summary>
64+
/// <param name="value">The string value.</param>
65+
/// <returns>A CellValue representing a string.</returns>
66+
public static CellValue FromString(string value) => new(CellValueType.Text, value);
67+
68+
/// <summary>
69+
/// Creates a boolean cell value.
70+
/// </summary>
71+
/// <param name="value">The boolean value.</param>
72+
/// <returns>A CellValue representing a boolean.</returns>
73+
public static CellValue FromBool(bool value) => new(CellValueType.Boolean, value);
74+
75+
/// <summary>
76+
/// Creates an error cell value.
77+
/// </summary>
78+
/// <param name="error">The error string.</param>
79+
/// <returns>A CellValue representing an error.</returns>
80+
public static CellValue Error(string error) => new(CellValueType.Error, error);
81+
82+
/// <summary>
83+
/// Gets an empty cell value.
84+
/// </summary>
85+
public static CellValue Empty => new(CellValueType.Empty, null);
86+
87+
/// <inheritdoc/>
88+
public bool Equals(CellValue other) => Type == other.Type && Equals(Value, other.Value);
89+
90+
/// <inheritdoc/>
91+
public override bool Equals(object? obj) => obj is CellValue other && Equals(other);
92+
93+
/// <inheritdoc/>
94+
public override int GetHashCode()
95+
{
96+
unchecked
97+
{
98+
int hash = 17;
99+
hash = hash * 31 + Type.GetHashCode();
100+
hash = hash * 31 + (Value?.GetHashCode() ?? 0);
101+
return hash;
102+
}
103+
}
104+
105+
/// <summary>
106+
/// Equality operator.
107+
/// </summary>
108+
public static bool operator ==(CellValue left, CellValue right) => left.Equals(right);
109+
110+
/// <summary>
111+
/// Inequality operator.
112+
/// </summary>
113+
public static bool operator !=(CellValue left, CellValue right) => !left.Equals(right);
114+
}
115+
116+
/// <summary>
117+
/// Specifies the type of a cell value.
118+
/// </summary>
119+
public enum CellValueType
120+
{
121+
/// <summary>
122+
/// Empty cell.
123+
/// </summary>
124+
Empty = 0,
125+
126+
/// <summary>
127+
/// Numeric value.
128+
/// </summary>
129+
Number = 1,
130+
131+
/// <summary>
132+
/// Text value.
133+
/// </summary>
134+
Text = 2,
135+
136+
/// <summary>
137+
/// Boolean value.
138+
/// </summary>
139+
Boolean = 3,
140+
141+
/// <summary>
142+
/// Error value.
143+
/// </summary>
144+
Error = 4,
145+
}
Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
// Copyright (c) Matt Liotta
2+
// Licensed under the MIT license. See LICENSE file in the project root for full license information.
3+
4+
using System;
5+
using System.Collections.Generic;
6+
using System.Globalization;
7+
using System.Linq;
8+
using System.Text.RegularExpressions;
9+
10+
using DocumentFormat.OpenXml.Packaging;
11+
using DocumentFormat.OpenXml.Spreadsheet;
12+
13+
namespace DocumentFormat.OpenXml.Features.FormulaEvaluation.Compilation;
14+
15+
/// <summary>
16+
/// Provides context for evaluating formulas, including cell value resolution.
17+
/// </summary>
18+
public class CellContext
19+
{
20+
private readonly Worksheet _worksheet;
21+
private readonly SharedStringTablePart? _sharedStringTablePart;
22+
23+
// TODO: Phase 0 limitation - cache never invalidates.
24+
// Phase 1 must add invalidation when cell values change.
25+
private readonly Dictionary<string, CellValue> _cache = new();
26+
27+
/// <summary>
28+
/// Initializes a new instance of the <see cref="CellContext"/> class.
29+
/// </summary>
30+
/// <param name="worksheet">The worksheet containing the cells.</param>
31+
/// <param name="sharedStringTablePart">The shared string table part for resolving shared strings.</param>
32+
public CellContext(Worksheet worksheet, SharedStringTablePart? sharedStringTablePart = null)
33+
{
34+
_worksheet = worksheet ?? throw new ArgumentNullException(nameof(worksheet));
35+
_sharedStringTablePart = sharedStringTablePart;
36+
}
37+
38+
/// <summary>
39+
/// Gets the value of a cell by reference.
40+
/// </summary>
41+
/// <param name="reference">The cell reference (e.g., "A1").</param>
42+
/// <returns>The cell value.</returns>
43+
public CellValue GetCell(string reference)
44+
{
45+
if (_cache.TryGetValue(reference, out var cached))
46+
{
47+
return cached;
48+
}
49+
50+
var cell = FindCell(_worksheet, reference);
51+
return _cache[reference] = ReadCellValue(cell);
52+
}
53+
54+
/// <summary>
55+
/// Gets values for a range of cells.
56+
/// </summary>
57+
/// <param name="start">Start cell reference.</param>
58+
/// <param name="end">End cell reference.</param>
59+
/// <returns>Enumerable of cell values.</returns>
60+
public IEnumerable<CellValue> GetRange(string start, string end)
61+
{
62+
int startCol, startRow;
63+
ParseCellReference(start, out startCol, out startRow);
64+
65+
int endCol, endRow;
66+
ParseCellReference(end, out endCol, out endRow);
67+
68+
for (var row = startRow; row <= endRow; row++)
69+
{
70+
for (var col = startCol; col <= endCol; col++)
71+
{
72+
var cellRef = GetColumnLetter(col) + row.ToString(CultureInfo.InvariantCulture);
73+
yield return GetCell(cellRef);
74+
}
75+
}
76+
}
77+
78+
private static Cell? FindCell(Worksheet worksheet, string reference)
79+
{
80+
var sheetData = worksheet.Elements<SheetData>().FirstOrDefault();
81+
if (sheetData == null)
82+
{
83+
return null;
84+
}
85+
86+
return sheetData.Descendants<Cell>()
87+
.FirstOrDefault(c => string.Equals(c.CellReference?.Value, reference, StringComparison.OrdinalIgnoreCase));
88+
}
89+
90+
private CellValue ReadCellValue(Cell? cell)
91+
{
92+
if (cell == null)
93+
{
94+
return CellValue.Empty;
95+
}
96+
97+
var cellValue = cell.CellValue?.Text;
98+
if (string.IsNullOrEmpty(cellValue))
99+
{
100+
return CellValue.Empty;
101+
}
102+
103+
// Check data type
104+
var dataType = cell.DataType?.Value;
105+
106+
if (dataType == CellValues.Boolean)
107+
{
108+
return CellValue.FromBool(cellValue == "1" || string.Equals(cellValue, "true", StringComparison.OrdinalIgnoreCase));
109+
}
110+
111+
if (dataType == CellValues.Error)
112+
{
113+
return CellValue.Error(cellValue);
114+
}
115+
116+
if (dataType == CellValues.SharedString)
117+
{
118+
// Resolve shared string index
119+
if (int.TryParse(cellValue, NumberStyles.Integer, CultureInfo.InvariantCulture, out var index))
120+
{
121+
var sharedString = GetSharedString(index);
122+
if (sharedString != null)
123+
{
124+
return CellValue.FromString(sharedString);
125+
}
126+
}
127+
128+
// If we can't resolve, return the index as a string (fallback)
129+
return CellValue.FromString(cellValue);
130+
}
131+
132+
if (dataType == CellValues.String || dataType == CellValues.InlineString)
133+
{
134+
return CellValue.FromString(cellValue);
135+
}
136+
137+
// Try to parse as number
138+
if (double.TryParse(cellValue, NumberStyles.Float, CultureInfo.InvariantCulture, out var number))
139+
{
140+
return CellValue.FromNumber(number);
141+
}
142+
143+
return CellValue.FromString(cellValue);
144+
}
145+
146+
private string? GetSharedString(int index)
147+
{
148+
if (_sharedStringTablePart == null)
149+
{
150+
return null;
151+
}
152+
153+
var sharedStringTable = _sharedStringTablePart.SharedStringTable;
154+
if (sharedStringTable == null)
155+
{
156+
return null;
157+
}
158+
159+
var items = sharedStringTable.Elements<SharedStringItem>().ToList();
160+
if (index >= 0 && index < items.Count)
161+
{
162+
// Get the text from the shared string item
163+
var item = items[index];
164+
return item.InnerText;
165+
}
166+
167+
return null;
168+
}
169+
170+
private static void ParseCellReference(string reference, out int column, out int row)
171+
{
172+
// Remove $ signs for absolute references
173+
reference = reference.Replace("$", string.Empty);
174+
175+
var match = Regex.Match(reference, @"^([A-Z]+)(\d+)$", RegexOptions.IgnoreCase);
176+
if (!match.Success)
177+
{
178+
throw new ArgumentException($"Invalid cell reference: {reference}", nameof(reference));
179+
}
180+
181+
var columnLetters = match.Groups[1].Value;
182+
row = int.Parse(match.Groups[2].Value, CultureInfo.InvariantCulture);
183+
184+
column = 0;
185+
for (var i = 0; i < columnLetters.Length; i++)
186+
{
187+
column = (column * 26) + (char.ToUpperInvariant(columnLetters[i]) - 'A' + 1);
188+
}
189+
}
190+
191+
private static string GetColumnLetter(int column)
192+
{
193+
var result = string.Empty;
194+
195+
while (column > 0)
196+
{
197+
var modulo = (column - 1) % 26;
198+
result = (char)('A' + modulo) + result;
199+
column = (column - modulo) / 26;
200+
}
201+
202+
return result;
203+
}
204+
}

0 commit comments

Comments
 (0)