Skip to content

Commit fbd54be

Browse files
committed
Merge remote-tracking branch 'cuda/moving_stats'
2 parents f31b6d8 + 644ea70 commit fbd54be

File tree

5 files changed

+632
-0
lines changed

5 files changed

+632
-0
lines changed

src/Numerics/Numerics.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,7 @@
217217
<Compile Include="SpecialFunctions\Logistic.cs" />
218218
<Compile Include="SpecialFunctions\TestFunctions.cs" />
219219
<Compile Include="Statistics\ArrayStatistics.cs" />
220+
<Compile Include="Statistics\MovingStatistics.cs" />
220221
<Compile Include="Statistics\RunningStatistics.cs" />
221222
<Compile Include="Statistics\QuantileDefinition.cs" />
222223
<Compile Include="Statistics\RankDefinition.cs" />
Lines changed: 359 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,359 @@
1+
// <copyright file="MovingStatistics.cs" company="Math.NET">
2+
// Math.NET Numerics, part of the Math.NET Project
3+
// http://numerics.mathdotnet.com
4+
// http://github.com/mathnet/mathnet-numerics
5+
// http://mathnetnumerics.codeplex.com
6+
//
7+
// Copyright (c) 2009-2015 Math.NET
8+
//
9+
// Permission is hereby granted, free of charge, to any person
10+
// obtaining a copy of this software and associated documentation
11+
// files (the "Software"), to deal in the Software without
12+
// restriction, including without limitation the rights to use,
13+
// copy, modify, merge, publish, distribute, sublicense, and/or sell
14+
// copies of the Software, and to permit persons to whom the
15+
// Software is furnished to do so, subject to the following
16+
// conditions:
17+
//
18+
// The above copyright notice and this permission notice shall be
19+
// included in all copies or substantial portions of the Software.
20+
//
21+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22+
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23+
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24+
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25+
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26+
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27+
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28+
// OTHER DEALINGS IN THE SOFTWARE.
29+
// </copyright>
30+
31+
using System;
32+
using System.Collections.Generic;
33+
using MathNet.Numerics.Properties;
34+
35+
namespace MathNet.Numerics.Statistics
36+
{
37+
/// <summary>
38+
/// Running statistics over a window of data, allows updating by adding values.
39+
/// </summary>
40+
public class MovingStatistics
41+
{
42+
readonly double[] _oldValues;
43+
readonly int _windowSize;
44+
45+
long _count;
46+
long _totalCountOffset;
47+
int _lastIndex;
48+
int _lastNaNTimeToLive;
49+
int _lastPosInfTimeToLive;
50+
int _lastNegInfTimeToLive;
51+
52+
double _m1;
53+
double _m2;
54+
double _max = double.NegativeInfinity;
55+
double _min = double.PositiveInfinity;
56+
57+
public MovingStatistics(int windowSize)
58+
{
59+
if (windowSize < 1)
60+
{
61+
throw new ArgumentException(string.Format(Resources.ArgumentMustBePositive), "windowSize");
62+
}
63+
_windowSize = windowSize;
64+
_oldValues = new double[_windowSize];
65+
}
66+
67+
public MovingStatistics(int windowSize, IEnumerable<double> values)
68+
: this(windowSize)
69+
{
70+
PushRange(values);
71+
}
72+
73+
public int WindowSize
74+
{
75+
get { return _windowSize; }
76+
}
77+
78+
/// <summary>
79+
/// Gets the total number of samples.
80+
/// </summary>
81+
public long Count
82+
{
83+
get { return _totalCountOffset + _count; }
84+
}
85+
86+
/// <summary>
87+
/// Returns the minimum value in the sample data.
88+
/// Returns NaN if data is empty or if any entry is NaN.
89+
/// </summary>
90+
public double Minimum
91+
{
92+
get
93+
{
94+
if (_lastNaNTimeToLive > 0)
95+
{
96+
return double.NaN;
97+
}
98+
99+
if (_lastNegInfTimeToLive > 0)
100+
{
101+
return double.NegativeInfinity;
102+
}
103+
104+
return (_count > 0 || _lastPosInfTimeToLive > 0) ? _min : double.NaN;
105+
}
106+
}
107+
108+
/// <summary>
109+
/// Returns the maximum value in the sample data.
110+
/// Returns NaN if data is empty or if any entry is NaN.
111+
/// </summary>
112+
public double Maximum
113+
{
114+
get
115+
{
116+
if (_lastNaNTimeToLive > 0)
117+
{
118+
return double.NaN;
119+
}
120+
121+
if (_lastPosInfTimeToLive > 0)
122+
{
123+
return double.PositiveInfinity;
124+
}
125+
126+
return (_count > 0 || _lastNegInfTimeToLive > 0) ? _max : double.NaN;
127+
}
128+
}
129+
130+
/// <summary>
131+
/// Evaluates the sample mean, an estimate of the population mean.
132+
/// Returns NaN if data is empty or if any entry is NaN.
133+
/// </summary>
134+
public double Mean
135+
{
136+
get
137+
{
138+
if (_lastNaNTimeToLive > 0 || (_lastPosInfTimeToLive > 0 && _lastNegInfTimeToLive > 0))
139+
{
140+
return double.NaN;
141+
}
142+
143+
if (_lastPosInfTimeToLive > 0)
144+
{
145+
return double.PositiveInfinity;
146+
}
147+
148+
if (_lastNegInfTimeToLive > 0)
149+
{
150+
return double.NegativeInfinity;
151+
}
152+
153+
return _count == 0 ? double.NaN : _m1;
154+
}
155+
}
156+
157+
/// <summary>
158+
/// Estimates the unbiased population variance from the provided samples.
159+
/// On a dataset of size N will use an N-1 normalizer (Bessel's correction).
160+
/// Returns NaN if data has less than two entries or if any entry is NaN.
161+
/// </summary>
162+
public double Variance
163+
{
164+
get
165+
{
166+
if (_lastNaNTimeToLive > 0 || _lastNegInfTimeToLive > 0 || (_lastPosInfTimeToLive > 0 && _lastNegInfTimeToLive > 0))
167+
{
168+
return double.NaN;
169+
}
170+
171+
if (_lastPosInfTimeToLive > 0)
172+
{
173+
return double.PositiveInfinity;
174+
}
175+
176+
return _count < 2 ? double.NaN : _m2 / (_count - 1);
177+
}
178+
}
179+
180+
/// <summary>
181+
/// Evaluates the variance from the provided full population.
182+
/// On a dataset of size N will use an N normalizer and would thus be biased if applied to a subset.
183+
/// Returns NaN if data is empty or if any entry is NaN.
184+
/// </summary>
185+
public double PopulationVariance
186+
{
187+
get
188+
{
189+
if (_lastNaNTimeToLive > 0 || _lastNegInfTimeToLive > 0 || (_lastPosInfTimeToLive > 0 && _lastNegInfTimeToLive > 0))
190+
{
191+
return double.NaN;
192+
}
193+
194+
if (_lastPosInfTimeToLive > 0)
195+
{
196+
return double.PositiveInfinity;
197+
}
198+
199+
return _count < 2 ? double.NaN : _m2 / _count;
200+
}
201+
}
202+
203+
/// <summary>
204+
/// Estimates the unbiased population standard deviation from the provided samples.
205+
/// On a dataset of size N will use an N-1 normalizer (Bessel's correction).
206+
/// Returns NaN if data has less than two entries or if any entry is NaN.
207+
/// </summary>
208+
public double StandardDeviation
209+
{
210+
get
211+
{
212+
if (_lastNaNTimeToLive > 0 || _lastNegInfTimeToLive > 0 || (_lastPosInfTimeToLive > 0 && _lastNegInfTimeToLive > 0))
213+
{
214+
return double.NaN;
215+
}
216+
217+
if (_lastPosInfTimeToLive > 0)
218+
{
219+
return double.PositiveInfinity;
220+
}
221+
222+
return _count < 2 ? double.NaN : Math.Sqrt(_m2 / (_count - 1));
223+
}
224+
}
225+
226+
/// <summary>
227+
/// Evaluates the standard deviation from the provided full population.
228+
/// On a dataset of size N will use an N normalizer and would thus be biased if applied to a subset.
229+
/// Returns NaN if data is empty or if any entry is NaN.
230+
/// </summary>
231+
public double PopulationStandardDeviation
232+
{
233+
get
234+
{
235+
if (_lastNaNTimeToLive > 0 || _lastNegInfTimeToLive > 0 || (_lastPosInfTimeToLive > 0 && _lastNegInfTimeToLive > 0))
236+
{
237+
return double.NaN;
238+
}
239+
240+
if (_lastPosInfTimeToLive > 0)
241+
{
242+
return double.PositiveInfinity;
243+
}
244+
245+
return _count < 2 ? double.NaN : Math.Sqrt(_m2 / _count);
246+
}
247+
}
248+
249+
/// <summary>
250+
/// Update the running statistics by adding another observed sample (in-place).
251+
/// </summary>
252+
public void Push(double value)
253+
{
254+
DecrementTimeToLive();
255+
256+
if (double.IsNaN(value))
257+
{
258+
_lastNaNTimeToLive = _windowSize;
259+
Reset(double.PositiveInfinity, double.NegativeInfinity);
260+
return;
261+
}
262+
263+
if (double.IsPositiveInfinity(value))
264+
{
265+
_lastPosInfTimeToLive = _windowSize;
266+
Reset(_min, double.NegativeInfinity);
267+
return;
268+
}
269+
270+
if (double.IsNegativeInfinity(value))
271+
{
272+
_lastNegInfTimeToLive = _windowSize;
273+
Reset(double.PositiveInfinity, _max);
274+
return;
275+
}
276+
277+
if (_count < _windowSize)
278+
{
279+
_oldValues[_count] = value;
280+
_count++;
281+
var d = value - _m1;
282+
var s = d / _count;
283+
var t = d * s * (_count - 1);
284+
285+
_m1 += s;
286+
_m2 += t;
287+
288+
if (value < _min)
289+
{
290+
_min = value;
291+
}
292+
293+
if (value > _max)
294+
{
295+
_max = value;
296+
}
297+
}
298+
else
299+
{
300+
var oldValue = _oldValues[_lastIndex];
301+
var d = value - oldValue;
302+
var s = d / _count;
303+
var oldM1 = _m1;
304+
_m1 += s;
305+
306+
var x = (value - _m1 + oldValue - oldM1);
307+
var t = d * x;
308+
_m2 += t;
309+
310+
_oldValues[_lastIndex] = value;
311+
_lastIndex++;
312+
if (_lastIndex == WindowSize)
313+
{
314+
_lastIndex = 0;
315+
}
316+
_max = value > _max ? value : _oldValues.Maximum();
317+
_min = value < _min ? value : _oldValues.Minimum();
318+
}
319+
}
320+
321+
/// <summary>
322+
/// Update the running statistics by adding a sequence of observed sample (in-place).
323+
/// </summary>
324+
public void PushRange(IEnumerable<double> values)
325+
{
326+
foreach (var value in values)
327+
{
328+
Push(value);
329+
}
330+
}
331+
332+
private void DecrementTimeToLive()
333+
{
334+
if (_lastNaNTimeToLive > 0)
335+
{
336+
_lastNaNTimeToLive--;
337+
}
338+
339+
if (_lastPosInfTimeToLive > 0)
340+
{
341+
_lastPosInfTimeToLive--;
342+
}
343+
344+
if (_lastNegInfTimeToLive > 0)
345+
{
346+
_lastNegInfTimeToLive--;
347+
}
348+
}
349+
350+
private void Reset(double min, double max)
351+
{
352+
_totalCountOffset += _count + 1;
353+
_count = 0;
354+
_m1 = 0;
355+
_max = max;
356+
_min = min;
357+
}
358+
}
359+
}

src/Numerics/Statistics/Statistics.cs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -931,5 +931,21 @@ public static double Entropy(IEnumerable<double?> data)
931931
{
932932
return StreamingStatistics.Entropy(data.Where(d => d.HasValue).Select(d => d.Value));
933933
}
934+
935+
/// <summary>
936+
/// Evaluates the sample mean over a moving window, for each samples.
937+
/// Returns NaN if no data is empty or if any entry is NaN.
938+
/// </summary>
939+
/// <param name="samples">The sample stream to calculate the mean of.</param>
940+
/// <param name="windowSize">The number of last samples to consider.</param>
941+
public static IEnumerable<double> MovingAverage(this IEnumerable<double> samples, int windowSize)
942+
{
943+
var movingStatistics = new MovingStatistics(windowSize);
944+
return samples.Select(sample =>
945+
{
946+
movingStatistics.Push(sample);
947+
return movingStatistics.Mean;
948+
});
949+
}
934950
}
935951
}

0 commit comments

Comments
 (0)