A comprehensive set of extension methods for Microsoft.Data.Analysis.DataFrame that provides pandas-like functionality for .NET data science and numerical computing.
- Arithmetic Operations - Element-wise Plus, Minus, Times, Divide
- Calculations - Diff, Apply, Pow operations
- Cumulative Operations - Running sums and absolute sums
- Rolling Windows - Moving averages and custom rolling calculations
- Statistical Methods - Mean, Median, StdDev, Variance, Min, Max, Sum, Count, Quantile, Describe
- Mathematical Functions - Abs, Log, Log10, Exp, Sqrt, Sin, Cos, Round
- Filtering - Predicate-based and index-based filtering
- Column Management - Selection, existence checking, type-safe retrieval
- Null/NaN Handling - Drop rows with missing data
- Shift Operations - Lag/lead column values
- I/O Operations - Pretty printing and RFC 4180 compliant CSV export
- Syntactic Sugar - Method chaining with fluent API
- Multi-targeting - Supports .NET 6.0, 7.0, and 8.0
Install-Package Dimension.DataFrame.Extensions
dotnet add package Dimension.DataFrame.Extensions
<PackageReference Include="Dimension.DataFrame.Extensions" Version="1.1.0" />using Dimension.DataFrame.Extensions;
using Microsoft.Data.Analysis;
// Create a DataFrame
var prices = new PrimitiveDataFrameColumn<double>("Price", new[] { 100.0, 105.0, 103.0, 108.0, 110.0 });
var volumes = new PrimitiveDataFrameColumn<int>("Volume", new[] { 1000, 1500, 1200, 1800, 2000 });
var df = new DataFrame(prices, volumes);
// Calculate price differences
var priceDiff = prices.Diff<double>();
priceDiff.AddTo(df, "PriceChange");
// Calculate rolling average (3-period)
var rollingAvg = prices.Rolling(3, values => values.Average(v => v!.Value));
rollingAvg.AddTo(df, "MA_3");
// Print the DataFrame
df.Print();var col1 = new PrimitiveDataFrameColumn<int>("A", new[] { 1, 2, 3, 4, 5 });
var col2 = new PrimitiveDataFrameColumn<int>("B", new[] { 10, 20, 30, 40, 50 });
// Addition
var sum = col1.Plus(col2); // [11, 22, 33, 44, 55]
// Subtraction
var diff = col1.Minus(col2); // [-9, -18, -27, -36, -45]
// Multiplication
var product = col1.Times(col2); // [10, 40, 90, 160, 250]
// Division
var quotient = col2.Divide(col1, "Quotient"); // [10.0, 10.0, 10.0, 10.0, 10.0]var data = new PrimitiveDataFrameColumn<int>("Data", new[] { 1, 2, 3, 4, 5 });
// Cumulative sum
var cumSum = data.Cumulate(); // [1, 3, 6, 10, 15]
// Cumulative absolute sum
var negData = new PrimitiveDataFrameColumn<int>("NegData", new[] { -1, 2, -3, 4, -5 });
var cumAbsSum = negData.CumulateAbs(); // [1, 3, 6, 10, 15]var prices = new PrimitiveDataFrameColumn<double>("Price", new[] { 100.0, 105.0, 103.0, 108.0 });
// Lag by 1 period (shift forward)
var lag1 = prices.Shift(1); // [null, 100.0, 105.0, 103.0]
// Lead by 1 period (shift backward)
var lead1 = prices.Shift(-1); // [105.0, 103.0, 108.0, null]
// Custom fill value
var lagWithFill = prices.Shift(1, 0.0); // [0.0, 100.0, 105.0, 103.0]var data = new PrimitiveDataFrameColumn<double>("Data", new[] { 1.0, 2.0, 3.0, 4.0, 5.0 });
// Rolling sum
var rollingSum = data.Rolling(3, values => values.Sum(v => v!.Value));
// [null, null, 6.0, 9.0, 12.0]
// Rolling average
var rollingAvg = data.Rolling(3, values => values.Average(v => v!.Value));
// [null, null, 2.0, 3.0, 4.0]
// Rolling maximum
var rollingMax = data.Rolling(3, values => values.Max(v => v!.Value));
// [null, null, 3.0, 4.0, 5.0]var data = new PrimitiveDataFrameColumn<int>("Data", new[] { 1, 2, 3, 4, 5 });
// Square all values
var squared = data.Apply(x => x * x, "Squared"); // [1, 4, 9, 16, 25]
// Apply custom transformation
var transformed = data.Apply(x => x * 2 + 1, "Transformed"); // [3, 5, 7, 9, 11]var df = new DataFrame(
new PrimitiveDataFrameColumn<int>("A", new[] { 1, 2, 3, 4, 5 }),
new PrimitiveDataFrameColumn<double>("B", new[] { 1.5, 2.5, 3.5, 4.5, 5.5 })
);
// Filter by predicate
var filtered = df.Filter<int>("A", value => value > 3);
// Returns DataFrame with rows where A > 3
// Filter by row indices
var subset = df.Filter(new[] { 0, 2, 4 });
// Returns rows at indices 0, 2, and 4var df = new DataFrame(
new PrimitiveDataFrameColumn<int>("A", new int?[] { 1, null, 3, 4 }),
new PrimitiveDataFrameColumn<double>("B", new[] { 1.0, 2.0, double.NaN, 4.0 })
);
// Drop rows with nulls
var noNulls = df.DropNulls(); // Rows 0 and 3 remain
// Drop rows with NaN values
var noNaNs = df.DropNAs(); // Rows 0, 1, and 3 remain
// Drop rows with either nulls or NaNs
var clean = df.DropNullsOrNAs(); // Only rows 0 and 3 remainvar df = new DataFrame();
var col1 = new PrimitiveDataFrameColumn<int>("A", new[] { 1, 2, 3 });
var col2 = new PrimitiveDataFrameColumn<int>("B", new[] { 10, 20, 30 });
// Chain operations together
col1.Plus(col2)
.Pow(2)
.WithName<int>("Sum_Squared")
.AddTo(df);
// df now contains column "Sum_Squared" with values [121, 484, 1089]var df = new DataFrame(
new PrimitiveDataFrameColumn<int>("A", new[] { 1, 2, 3 }),
new PrimitiveDataFrameColumn<int>("B", new[] { 10, 20, 30 }),
new PrimitiveDataFrameColumn<int>("C", new[] { 100, 200, 300 })
);
// Select specific columns
var subset = df.SelectColumns("A", "C");
// Check if column exists
bool hasColumn = df.ColumnExists("B"); // true
// Try to get column with type safety
if (df.TryGetColumn<int>("A", out var columnA))
{
// Use columnA
}var df = new DataFrame(
new PrimitiveDataFrameColumn<int>("ID", new[] { 1, 2, 3 }),
new PrimitiveDataFrameColumn<string>("Name", new[] { "Alice", "Bob", "Charlie" }),
new PrimitiveDataFrameColumn<double>("Score", new[] { 95.5, 87.3, 92.1 })
);
// Print to debug output (aligned columns)
df.Print(numRows: 10, numberFormat: "F2");
// Save to CSV
df.SaveToCsv("output.csv", sep: ",", includeHeader: true);var data = new PrimitiveDataFrameColumn<double>("Data", new[] { 1.5, 2.3, 3.7, 4.2, 5.8, 6.1, 7.9, 8.4, 9.2, 10.5 });
// Calculate mean
var mean = data.Mean(); // 5.96
// Calculate median
var median = data.Median(); // 5.95
// Calculate standard deviation
var stdDev = data.StdDev(); // Sample std dev
// Calculate variance
var variance = data.Variance(); // Sample variance
// Get min and max
var min = data.Min(); // 1.5
var max = data.Max(); // 10.5
// Calculate sum
var sum = data.Sum(); // 59.6
// Get count of non-null values
var count = data.Count(); // 10
// Calculate specific quantile (e.g., 75th percentile)
var q75 = data.Quantile(0.75);
// Get comprehensive statistics
var stats = data.Describe();
// Returns: (Count, Mean, StdDev, Min, Q25, Median, Q75, Max)
Console.WriteLine($"Count: {stats.Count}, Mean: {stats.Mean}, Median: {stats.Median}");var data = new PrimitiveDataFrameColumn<double>("Data", new[] { -2.5, -1.0, 0.0, 1.0, 2.5 });
// Absolute value
var absValues = data.Abs(); // [2.5, 1.0, 0.0, 1.0, 2.5]
// Natural logarithm
var positiveData = new PrimitiveDataFrameColumn<double>("Positive", new[] { 1.0, 2.718, 7.389 });
var logValues = positiveData.Log(); // [0.0, 1.0, 2.0]
// Base-10 logarithm
var log10Values = positiveData.Log10();
// Logarithm with custom base
var log2Values = positiveData.Log(2); // Log base 2
// Exponential (e^x)
var expData = new PrimitiveDataFrameColumn<double>("Exp", new[] { 0.0, 1.0, 2.0 });
var expValues = expData.Exp(); // [1.0, 2.718, 7.389]
// Square root
var sqrtData = new PrimitiveDataFrameColumn<double>("SqrtData", new[] { 0.0, 1.0, 4.0, 9.0, 16.0 });
var sqrtValues = sqrtData.Sqrt(); // [0.0, 1.0, 2.0, 3.0, 4.0]
// Trigonometric functions
var angles = new PrimitiveDataFrameColumn<double>("Angles", new[] { 0.0, Math.PI/2, Math.PI });
var sineValues = angles.Sin();
var cosineValues = angles.Cos();
// Rounding
var decimals = new PrimitiveDataFrameColumn<double>("Decimals", new[] { 1.234, 5.678, 9.999 });
var rounded = decimals.Round(2); // [1.23, 5.68, 10.0]
var roundedInt = decimals.Round(); // [1.0, 6.0, 10.0]- .NET 6.0, 7.0, or 8.0
- Microsoft.Data.Analysis 0.21.1 or later
- MathNet.Numerics 5.0.0 or later
Different operations handle null values in different ways:
- Null values are treated as
default(T)(typically 0 for numeric types) - Example:
1 + null = 1 + 0 = 1
- Null values are skipped and excluded from calculations
- Example:
Mean([1, null, 3]) = (1 + 3) / 2 = 2.0
- Null values are preserved in their new positions
- Fill values can be specified for positions vacated by the shift
- Null values are skipped within each window
- The operation is applied only to non-null values
DropNulls()- Removes rows containing null valuesDropNAs()- Removes rows containing NaN values (for float/double)DropNullsOrNAs()- Removes rows containing either nulls or NaNs
Contributions are welcome! Please feel free to submit a Pull Request.
- Fork the repository
- Create your feature branch (
git checkout -b feature/AmazingFeature) - Commit your changes (
git commit -m 'Add some AmazingFeature') - Push to the branch (
git push origin feature/AmazingFeature) - Open a Pull Request
dotnet testRun performance benchmarks to compare operations:
cd Dimension.DataFrame.Extensions.Benchmarks
dotnet run -c ReleaseRun specific benchmarks:
# Run only arithmetic benchmarks
dotnet run -c Release -- --filter *ArithmeticBenchmarks*
# Run only statistics benchmarks
dotnet run -c Release -- --filter *StatisticsBenchmarks*
# Export results to HTML and JSON
dotnet run -c Release -- --exporters json,htmlBenchmark categories:
- ArithmeticBenchmarks - Plus, Minus, Times, Divide performance
- StatisticsBenchmarks - Mean, Median, StdDev, Variance, Describe performance
- MathBenchmarks - Abs, Log, Exp, Sqrt, trigonometric functions
- RollingWindowBenchmarks - Rolling window operations with various sizes
git clone https://github.com/dimension-zero/Dimension.Data.Extensions.DataFrame.git
cd Dimension.Data.Extensions.DataFrame
dotnet builddotnet pack --configuration ReleaseThis project is licensed under the MIT License - see the LICENSE file for details.
Dimension Technologies
- Built on top of Microsoft.Data.Analysis
- Inspired by pandas for Python
- Uses MathNet.Numerics for numerical operations
For issues, questions, or contributions, please visit the GitHub repository.
Issued under the MIT Licence by Dimension Technologies.