Coverage for pycommons / math / sample_statistics.py: 98%
328 statements
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-11 03:04 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-11 03:04 +0000
1"""
2A simple and immutable basic statistics record computed over a data sample.
4Here we provide records of statistics that are computed over a fully available
5sample of data.
6Such records are instances of class
7:class:`~pycommons.math.sample_statistics.SampleStatistics`.
8They offer the
9:attr:`~pycommons.math.stream_statistics.StreamStatistics.minimum` and
10:attr:`~pycommons.math.stream_statistics.StreamStatistics.maximum` of the data
11as well as the number
12:attr:`~pycommons.math.stream_statistics.StreamStatistics.n` of observed
13samples.
14They also offer approximations of the arithmetic mean as attribute
15:attr:`~pycommons.math.stream_statistics.StreamStatistics.mean_arith` and
16the approximation of the standard deviation as attribute
17:attr:`~pycommons.math.stream_statistics.StreamStatistics.stddev`.
18Additionally, they provide the sample
19:attr:`~pycommons.math.sample_statistics.SampleStatistics.median`
20and an approximation
21:attr:`~pycommons.math.sample_statistics.SampleStatistics.mean_geom` of the
22geometric mean.
24This class is an extension of class
25:class:`~pycommons.math.stream_statistics.StreamStatistics`.
26Stream statistics are less accurate and do not provide the median or geometric
27mean.
28They, however, can be applied to a stream of data and do not require that all
29the data be available as a complete chunk at once.
30Sample statistics require access to the full data, but also offer higher
31accuracy.
33There is an absolute order defined upon these records.
34They are hashable and immutable.
35We provide methods to store them to CSV format via the class
36:class:`~pycommons.math.sample_statistics.CsvWriter`
37and to load them from CSV data via the class
38:class:`~pycommons.math.sample_statistics.CsvReader`.
39Functions that access attributes can be obtained via
40:meth:`~pycommons.math.stream_statistics.StreamStatistics.getter`.
42>>> ag = SampleStatistics.aggregate()
43>>> ag.update((1, 2, 3))
44>>> ag.add(4)
45>>> ag.add(5)
46>>> r1 = ag.result()
47>>> repr(r1)
48'SampleStatistics(n=5, minimum=1, mean_arith=3, maximum=5, \
49stddev=1.5811388300841898, median=3, mean_geom=2.6051710846973517)'
50>>> str(r1)
51'5;1;3;3;2.6051710846973517;5;1.5811388300841898'
53>>> r2 = SampleStatistics.from_samples((1, 2, 3, 4, 5))
54>>> r1 == r2
55True
57>>> ag.reset()
58>>> try:
59... ag.result()
60... except ValueError as ve:
61... print(ve)
62Data source cannot be empty.
64>>> print(ag.result_or_none())
65None
66"""
68from contextlib import suppress
69from dataclasses import dataclass
70from fractions import Fraction
71from math import ceil, inf, isfinite, nan, nextafter
72from statistics import geometric_mean as stat_geomean
73from statistics import mean as stat_mean
74from typing import Final, Iterable, Union
76from pycommons.io.csv import (
77 CSV_SEPARATOR,
78 csv_column,
79 csv_column_or_none,
80 csv_val_or_none,
81)
82from pycommons.io.csv import CsvReader as CsvReaderBase
83from pycommons.math.int_math import __DBL_INT_LIMIT_P_I as _DBL_INT_LIMIT_P_I
84from pycommons.math.int_math import (
85 ceil_div,
86 float_to_frac,
87 try_int,
88 try_int_div,
89)
90from pycommons.math.stream_statistics import (
91 KEY_MAXIMUM,
92 KEY_MEAN_ARITH,
93 KEY_MEAN_GEOM,
94 KEY_MEDIAN,
95 KEY_MINIMUM,
96 KEY_N,
97 KEY_STDDEV,
98 StreamStatistics,
99 StreamStatisticsAggregate,
100)
101from pycommons.math.stream_statistics import CsvWriter as CsvWriterBase
102from pycommons.strings.string_conv import (
103 str_to_num,
104)
105from pycommons.types import check_int_range, type_error
108def _mean_of_two(a: int | float, b: int | float) -> int | float:
109 """
110 Compute the mean of two numbers.
112 :param a: the first number
113 :param b: the second number
114 :returns: the mean
116 >>> _mean_of_two(1, 1)
117 1
118 >>> _mean_of_two(1.0, 1.0)
119 1
120 >>> _mean_of_two(1, 2)
121 1.5
122 >>> _mean_of_two(1, 3)
123 2
124 >>> _mean_of_two(1.5, 1.7)
125 1.6
127 >>> _mean_of_two(-1, -1)
128 -1
129 >>> _mean_of_two(-1.0, -1.0)
130 -1
131 >>> _mean_of_two(-1, -2)
132 -1.5
133 >>> _mean_of_two(-1, -3)
134 -2
135 >>> _mean_of_two(-1.5, -1.7)
136 -1.6
138 >>> _mean_of_two(1, -1)
139 0
140 >>> _mean_of_two(-1.0, 1.0)
141 0
142 >>> _mean_of_two(1, -2)
143 -0.5
144 >>> _mean_of_two(1, -3)
145 -1
146 >>> _mean_of_two(1.5, -1.7)
147 -0.09999999999999998
148 >>> _mean_of_two(-1.5, 1.7)
149 0.09999999999999998
151 >>> _mean_of_two(1.7976931348623157e+308, 1.7976931348623157e+308)
152 1.7976931348623157e+308
153 >>> _mean_of_two(1.7976931348623155e+308, 1.7976931348623157e+308)
154 1.7976931348623155e+308
155 """
156 a = try_int(a)
157 b = try_int(b)
158 if a == b:
159 return a
160 if isinstance(a, int) and isinstance(b, int):
161 return try_int_div(a + b, 2)
163 res: float = a + b
164 return (0.5 * res) if isfinite(res) else ((0.5 * a) + (0.5 * b))
167def _almost_le(a: int | float, b: int | float) -> bool:
168 """
169 Check if `a <= b` holds approximately.
171 `a <= b` holds if, well, `a` is less than or equal to `b`. It holds almost
172 if `a` is just a tiny bit larger than `b`.
174 :param a: the first value
175 :param b: the second value
176 :returns: `True` if we can say: `a` is approximately less or equal than `b`
177 and any deviation from this probably results from numerical issues.
179 >>> _almost_le(1, 0)
180 False
181 >>> _almost_le(0, 0)
182 True
183 >>> _almost_le(1.1, 1.09)
184 False
185 >>> _almost_le(1.1, 1.099999)
186 False
187 >>> _almost_le(1.1, 1.09999999)
188 False
189 >>> _almost_le(1.1, 1.0999999999)
190 False
191 >>> _almost_le(1.1, 1.099999999999)
192 False
193 >>> _almost_le(1.099999999999, 1.1)
194 True
195 >>> _almost_le(1.1, 1.0999999999999)
196 True
197 >>> _almost_le(1.0999999999999, 1.1)
198 True
200 >>> _almost_le(0, -1)
201 False
202 >>> _almost_le(-1.09, -1.1)
203 False
204 >>> _almost_le(-1.099999, -1.1)
205 False
206 >>> _almost_le(-1.09999999, -1.1)
207 False
208 >>> _almost_le(-1.0999999999, -1.1)
209 False
210 >>> _almost_le(-1.099999999999, -1.1)
211 False
212 >>> _almost_le(-1.1, -1.099999999999)
213 True
214 >>> _almost_le(-1.0999999999999, -1.1)
215 True
216 >>> _almost_le(-1.1, -1.0999999999999)
217 True
219 >>> _almost_le(23384026197294446691258957323460528314494920687616,
220 ... 2.3384026197294286e+49)
221 True
222 >>> _almost_le(nextafter(5, inf), nextafter(5, -inf))
223 True
224 >>> _almost_le(nextafter(nextafter(5, inf), inf),
225 ... nextafter(nextafter(5, -inf), -inf))
226 True
227 >>> _almost_le(nextafter(nextafter(nextafter(5, inf), inf), inf),
228 ... nextafter(nextafter(nextafter(5, -inf), -inf), -inf))
229 True
230 >>> _almost_le(nextafter(nextafter(nextafter(nextafter(5, inf), inf),
231 ... inf), inf), nextafter(nextafter(nextafter(5, -inf),
232 ... -inf), -inf))
233 True
234 >>> _almost_le(5.114672824837722e+148, 5.1146728248374894e+148)
235 True
237 >>> _almost_le(-1.7976931348623157e+308,
238 ... -int(1.7976931348623157e+308) * 10)
239 False
240 >>> _almost_le(-int(1.7976931348623157e+308) * 10,
241 ... -1.7976931348623157e+308)
242 True
243 >>> _almost_le(1e-302, 0)
244 True
245 >>> _almost_le(1e-200, 0)
246 False
247 """
248 if a <= b:
249 return True
251 if a < 0:
252 a, b = -b, -a # maybe: a = -19, b = -20 -> maybe: a = 20, b = 19
253 elif b <= 0:
254 return (b >= 0) and (a <= 1e-300)
256 with suppress(OverflowError):
257 use_a: int | float = a
258 use_b: int | float = b
259 for _ in range(3):
260 use_a = nextafter(use_a, -inf)
261 use_b = nextafter(use_b, inf)
262 if use_a <= use_b:
263 return True
264 try:
265 return (b / a) > 0.9999999999999
266 except OverflowError:
267 a_int: Final[int] = int(a)
268 b_int: Final[int] = int(b)
269 return (9999999999999 * a_int) <= (b_int * 10000000000000)
272def _to_frac(a: int | float) -> Fraction:
273 """
274 Convert a number to a fraction.
276 :param a: the number
277 :returns: the fraction
279 >>> _to_frac(23)
280 Fraction(23, 1)
281 >>> _to_frac(2.34)
282 Fraction(117, 50)
283 """
284 return Fraction(a) if isinstance(a, int) else Fraction(*float_to_frac(a))
287def _from_frac(a: int | float | Fraction) -> int | float:
288 """
289 Convert a fraction to either an integer or a float.
291 :param a: the fraction
292 :returns: the integer or float value
294 >>> _from_frac(1.6)
295 1.6
296 >>> _from_frac(123)
297 123
298 >>> _from_frac(Fraction(7, 8))
299 0.875
300 >>> _from_frac(Fraction(1237, 1))
301 1237
302 """
303 if isinstance(a, int):
304 return a
305 if isinstance(a, float):
306 return try_int(a)
307 num: Final[int] = a.numerator
308 denom: Final[int] = a.denominator
309 if denom == 1:
310 return num
311 return try_int_div(num, denom)
314#: the 0 fraction
315_FRAC_0: Final[Fraction] = Fraction(0, 1)
316#: the 1 fraction
317_FRAC_1: Final[Fraction] = Fraction(1, 1)
320def _int_root_bound_lower(base: int, root: int) -> int:
321 """
322 Compute a lower bound for a root.
324 We use that `log(a ** b) = log(a) * b`.
325 In binary, this means that: `a ** b == 2 ** (log2(a) * b)`, or, for roots
326 `a ** (1/b) == 2 ** (log2(a) / b`.
327 In bits, `2 ** x == 1 << x` and `floor(log2(x)) == x.bit_length() - 1`.
328 Therefore, we know that `a ** (1/b) >= 1 << ((a.bit_length() // b) - 1)`.
329 Similarly, we can have an upper bound by rounding up at each step
330 `a ** (1/b) <= 1 << (1 + ((b.bit_length() + 1) // root)
332 :param base: the base number
333 :param root: the root
334 :returns: the lower bound
336 >>> _int_root_bound_lower(8, 3)
337 1
339 >>> _int_root_bound_lower(8, 2)
340 2
342 >>> _int_root_bound_lower(25, 3)
343 1
344 """
345 logdiv: Final[int] = base.bit_length() // root
346 return (1 << (logdiv - 1)) if logdiv > 0 else (0 if base < 1 else 1)
349def _int_root_bound_upper(base: int, root: int) -> int:
350 """
351 Compute an upper bound for a root.
353 :param base: the base number
354 :param root: the root
355 :returns: the upper bound
357 >>> _int_root_bound_upper(8, 3)
358 4
360 >>> _int_root_bound_upper(8, 2)
361 4
363 >>> _int_root_bound_upper(25, 3)
364 8
365 """
366 return base if root == 1 else min(1 << (1 + ceil_div(
367 base.bit_length() + 1, root)), (base // 2) + (1 if base < 6 else 0))
370def _frac_root_bound_lower(base: Fraction, root: int) -> Fraction:
371 """
372 Compute a lower bound for a root.
374 :param base: the base number
375 :param root: the root
376 :returns: the lower bound
378 >>> _frac_root_bound_lower(Fraction(8), 3)
379 Fraction(1, 1)
381 >>> _frac_root_bound_lower(Fraction(8), 2)
382 Fraction(2, 1)
384 >>> _frac_root_bound_lower(Fraction(25), 3)
385 Fraction(1, 1)
387 >>> _frac_root_bound_lower(Fraction(3, 8), 3)
388 Fraction(1, 2)
390 >>> _frac_root_bound_lower(Fraction(11, 8), 2)
391 Fraction(1, 1)
393 >>> _frac_root_bound_lower(Fraction(11, 25), 3)
394 Fraction(1, 2)
395 """
396 return _FRAC_0 if base <= _FRAC_0 else (
397 Fraction(1, _int_root_bound_upper(ceil_div(
398 base.denominator, base.numerator), root))
399 if base < _FRAC_1 else (
400 _FRAC_1 if base == _FRAC_1 else Fraction(
401 _int_root_bound_lower(int(base), root))))
404def _frac_root_bound_upper(base: Fraction, root: int) -> Fraction:
405 """
406 Compute an upper bound for a root.
408 :param base: the base number
409 :param root: the root
410 :returns: the upper bound
412 >>> _frac_root_bound_upper(Fraction(8), 3)
413 Fraction(4, 1)
415 >>> _frac_root_bound_upper(Fraction(8), 2)
416 Fraction(4, 1)
418 >>> _frac_root_bound_upper(Fraction(25), 3)
419 Fraction(8, 1)
421 >>> _frac_root_bound_upper(Fraction(3, 8), 3)
422 Fraction(1, 1)
424 >>> _frac_root_bound_upper(Fraction(11, 8), 2)
425 Fraction(2, 1)
427 >>> _frac_root_bound_upper(Fraction(11, 25), 3)
428 Fraction(1, 1)
429 """
430 return _FRAC_0 if base <= _FRAC_0 else (
431 Fraction(1, _int_root_bound_lower(
432 base.denominator // base.numerator, root))
433 if base < _FRAC_1 else (
434 _FRAC_1 if base == _FRAC_1 else Fraction(
435 _int_root_bound_upper(ceil(base), root))))
438def _limited_root(base: Fraction, root: int,
439 mini: Fraction = _FRAC_0,
440 maxi: Fraction | None = None) -> int | float:
441 """
442 Try to compute a root at a precision so exact that no digits are lost.
444 :param base: the base
445 :param root: the exponent
446 :param mini: a limit for the smallest possible result
447 :param maxi: a maximum value, the limit for the largest possible result,
448 or `None` if no upper limit is known
449 :returns: the power
451 >>> from math import sqrt
452 >>> sqrt(3)
453 1.7320508075688772
454 >>> _limited_root(Fraction(3, 1), 2)
455 1.7320508075688772
456 >>> _limited_root(Fraction(4, 1), 2)
457 2
459 >>> _limited_root(Fraction(3 ** 3, 1), 3)
460 3
461 >>> type(_limited_root(Fraction(3 ** 3, 1), 3))
462 <class 'int'>
464 >>> _limited_root(Fraction(3 ** 333, 1), 333)
465 3
467 >>> _limited_root(Fraction(9000 ** 1000, 1), 1000)
468 9000
470 >>> _limited_root(Fraction((10 ** 8) ** 100, 1), 35)
471 71968567300115201992879
473 >>> 0.456 ** (1 / 25)
474 0.9690776862089129
475 >>> _limited_root(Fraction(456, 1000), 25)
476 0.9690776862089129
478 >>> _limited_root(Fraction(2, 1), 2)
479 1.4142135623730951
480 >>> sqrt(2)
481 1.4142135623730951
482 """
483 lower: Fraction | None = None
484 upper: Fraction | None = None
485 if base.denominator == 1:
486 ibase = base.numerator
487 if ibase <= 1:
488 return ibase
490 ilower: int = max(int(mini), _int_root_bound_lower(ibase, root))
491 iupper: int = _int_root_bound_upper(ibase, root)
492 if maxi is not None:
493 iupper = min(int(maxi) + 1, iupper)
494 imid: int = ilower
495 while ilower <= iupper:
496 imid = (ilower + iupper) >> 1
497 imid_exp = imid ** root
498 if imid_exp > ibase:
499 iupper = imid - 1
500 elif imid_exp < ibase:
501 ilower = imid + 1
502 else:
503 return imid # We got an exact integer result
504 # No exact integer result, but at least new limits
505 upper = Fraction(imid + 1)
506 lower = Fraction(max(0, imid - 1))
508 # Now we do binary search using fractions
509 if upper is None:
510 upper = max(base, _FRAC_1)
511 if maxi is not None:
512 upper = min(upper, maxi)
513 upper = min(upper, _frac_root_bound_upper(base, root))
514 if lower is None:
515 lower = _FRAC_0
516 lower = max(mini, lower)
517 lower = max(lower, _frac_root_bound_lower(base, root))
519 # Now compute the root using binary search within the limits.
520 guess: int | float = nan
521 equal_steps: int = 4
522 while equal_steps > 0:
523 last_guess: int | float = guess
524 mid: Fraction = (lower + upper) / 2
525 mid_exp = mid ** root
526 if mid_exp > base:
527 upper = mid
528 elif mid_exp < base:
529 lower = mid
530 else:
531 return _from_frac(mid)
533 guess = _from_frac(mid)
534 if (type(guess) is type(last_guess)) and (guess == last_guess):
535 equal_steps -= 1
536 else:
537 equal_steps = 4
538 return guess
541@dataclass(frozen=True, init=False, order=False, eq=False)
542class SampleStatistics(StreamStatistics):
543 """An immutable record with sample statistics of one quantity."""
545 #: The median, i.e., the value in the middle of the sorted list of
546 #: :attr:`~pycommons.math.stream_statistics.StreamStatistics.n` data
547 # samples.
548 median: int | float
549 #: The geometric mean value, if defined. This is the
550 #: :attr:`~pycommons.math.stream_statistics.StreamStatistics.n`-th root
551 #: of the product of all data samples.
552 #: This value will be `None` if there was any sample which is not greater
553 #: than 0.
554 mean_geom: int | float | None
556 def __init__(self, n: int, minimum: int | float, median: int | float,
557 mean_arith: int | float, mean_geom: int | float | None,
558 maximum: int | float, stddev: int | float | None):
559 """
560 Create a sample statistics record.
562 :param n: the sample size, must be `n >= 1`
563 :param minimum: the minimum
564 :param median: the median
565 :param mean_arith: the arithmetic mean
566 :param mean_geom: the geometric mean, or `None` if it is undefined
567 :param maximum: the maximum
568 :param stddev: the standard deviation, must be `None` if `n == 0`
570 >>> s1 = SampleStatistics(2, 1, 2, 4.0, 3, 6, 0.2)
571 >>> s1.n
572 2
573 >>> s1.minimum
574 1
575 >>> s1.median
576 2
577 >>> s1.mean_arith
578 4
579 >>> s1.mean_geom
580 3
581 >>> s1.maximum
582 6
583 >>> s1.stddev
584 0.2
585 >>> hash(s1)
586 8839096310731950625
588 >>> s2 = SampleStatistics(1, 0, 0.0, 0, None, 0.0, None)
589 >>> s2.n
590 1
591 >>> s2.minimum
592 0
593 >>> s2.median
594 0
595 >>> s2.mean_arith
596 0
597 >>> print(s2.mean_geom)
598 None
599 >>> s2.maximum
600 0
601 >>> print(s2.stddev)
602 None
603 >>> hash(s2) == hash((0, 0, 0, inf, 0, inf, 1, 1))
604 True
606 >>> s3 = SampleStatistics(n=3, minimum=5, median=5, maximum=5,
607 ... mean_arith=5, mean_geom=5, stddev=0.0)
608 >>> s3.stddev
609 0
610 >>> hash(s3)
611 1175763770956004139
613 >>> sset = {s1, s1, s2, s1, s3, s3, s2, s1}
614 >>> len(sset)
615 3
616 >>> print(list(sss.n for sss in sorted(sset)))
617 [1, 2, 3]
618 >>> print(list(sss.minimum for sss in sorted(sset)))
619 [0, 1, 5]
621 >>> try:
622 ... SampleStatistics(n=1, minimum=5, median=6, maximum=5,
623 ... mean_arith=5, mean_geom=5, stddev=None)
624 ... except ValueError as ve:
625 ... print(ve)
626 median (6) must equal minimum (5) if n=1.
628 >>> try:
629 ... SampleStatistics(n=2, minimum=5, median=4, maximum=5,
630 ... mean_arith=5, mean_geom=5, stddev=0)
631 ... except ValueError as ve:
632 ... print(ve)
633 median (4) must be >= minimum (5) if n>1.
635 >>> try:
636 ... SampleStatistics(n=1, minimum=5, median=5, maximum=6,
637 ... mean_arith=5, mean_geom=5, stddev=None)
638 ... except ValueError as ve:
639 ... print(ve)
640 maximum (6) must equal minimum (5) if n=1.
642 >>> try:
643 ... SampleStatistics(n=2, minimum=5, median=6, maximum=5,
644 ... mean_arith=5, mean_geom=5, stddev=0)
645 ... except ValueError as ve:
646 ... print(ve)
647 maximum (5) must be >= med (6) if n>1.
649 >>> try:
650 ... SampleStatistics(n=1, minimum=5, median=5, maximum=5,
651 ... mean_arith=4, mean_geom=5, stddev=None)
652 ... except ValueError as ve:
653 ... print(ve)
654 mean_arith (4) must equal minimum (5) if n=1.
656 >>> try:
657 ... SampleStatistics(n=2, minimum=5, median=6, maximum=6,
658 ... mean_arith=4, mean_geom=5, stddev=None)
659 ... except ValueError as ve:
660 ... print(ve)
661 minimum<=mean_arith<=maximum must hold, but got 5, 4, and 6.
663 >>> try:
664 ... SampleStatistics(n=1, minimum=5, median=5, maximum=5,
665 ... mean_arith=5, mean_geom=None, stddev=None)
666 ... except ValueError as ve:
667 ... print(ve)
668 If minimum (5) > 0, then mean_geom must be defined, but it is None.
670 >>> try:
671 ... SampleStatistics(n=1, minimum=0, median=0, maximum=0,
672 ... mean_arith=0, mean_geom=0, stddev=None)
673 ... except ValueError as ve:
674 ... print(ve)
675 If minimum (0) <= 0, then mean_geom is undefined, but it is 0.
677 >>> try:
678 ... SampleStatistics(n=1, minimum=5, median=5, maximum=5,
679 ... mean_arith=5, mean_geom=6, stddev=None)
680 ... except ValueError as ve:
681 ... print(ve)
682 mean_geom (6) must equal minimum (5) if n=1.
684 >>> try:
685 ... SampleStatistics(n=3, minimum=5, median=6, maximum=7,
686 ... mean_arith=6, mean_geom=6.1, stddev=1)
687 ... except ValueError as ve:
688 ... print(ve)
689 mean_geom (6.1) must be <= mean_arith (6).
691 >>> try:
692 ... SampleStatistics(n=3, minimum=5, median=6, maximum=7,
693 ... mean_arith=6, mean_geom=6, stddev=-1)
694 ... except ValueError as ve:
695 ... print(ve)
696 stddev must be >= 0, but is -1.
698 >>> try:
699 ... SampleStatistics(n=3, minimum=5, median=6, maximum=7,
700 ... mean_arith=6, mean_geom=6, stddev=0)
701 ... except ValueError as ve:
702 ... print(str(ve)[:59])
703 If stddev (0) is 0, then minimum (5) must equal maximum (7)
705 >>> try:
706 ... SampleStatistics(n=3, minimum=5, median=5, maximum=5,
707 ... mean_arith=5, mean_geom=5, stddev=1)
708 ... except ValueError as ve:
709 ... print(str(ve)[:59])
710 If stddev (1) is 0, then minimum (5) must equal maximum (5)
712 >>> try:
713 ... SampleStatistics(n=3, minimum=5, median=5, maximum=5,
714 ... mean_arith=5, mean_geom=5, stddev=None)
715 ... except ValueError as ve:
716 ... print(ve)
717 If n=1, stddev=None and vice versa, but got n=3 and stddev=None.
719 >>> try:
720 ... SampleStatistics(n=1, minimum=5, median=5, maximum=5,
721 ... mean_arith=5, mean_geom=5, stddev=1)
722 ... except ValueError as ve:
723 ... print(ve)
724 If n=1, stddev=None and vice versa, but got n=1 and stddev=1.
726 >>> try:
727 ... SampleStatistics(n=2, minimum=5, median=5, maximum=6,
728 ... mean_arith=6, mean_geom=7, stddev=1)
729 ... except ValueError as ve:
730 ... print(ve)
731 minimum<=mean_geom<=maximum must hold, but got 5, 7, and 6.
732 """
733 super().__init__(n, minimum, mean_arith, maximum, stddev)
735 # check minimum
736 median = try_int(median)
737 if n == 1:
738 if median != self.minimum:
739 raise ValueError(f"median ({median}) must equal "
740 f"minimum ({self.minimum}) if n=1.")
741 elif median < self.minimum:
742 raise ValueError(f"median ({median}) must be >= minimum ("
743 f"{self.minimum}) if n>1.")
745 # check maximum
746 if self.maximum < median:
747 raise ValueError(
748 f"maximum ({self.maximum}) must be >= med ({median}) if n>1.")
750 # check geometric mean
751 if mean_geom is None:
752 if self.minimum > 0:
753 raise ValueError(
754 f"If minimum ({self.minimum}) > 0, then mean_geom must be"
755 f" defined, but it is {mean_geom}.")
756 else:
757 if self.minimum <= 0:
758 raise ValueError(
759 f"If minimum ({self.minimum}) <= 0, then mean_geom is "
760 f"undefined, but it is {mean_geom}.")
761 mean_geom = try_int(mean_geom)
762 if n == 1:
763 if mean_geom != self.minimum:
764 raise ValueError(f"mean_geom ({mean_geom}) must equal "
765 f"minimum ({self.minimum}) if n=1.")
766 else:
767 if not self.minimum <= mean_geom <= self.maximum:
768 raise ValueError(
769 "minimum<=mean_geom<=maximum must hold, but got "
770 f"{self.minimum}, {mean_geom}, and {self.maximum}.")
771 if mean_geom > self.mean_arith:
772 raise ValueError(
773 f"mean_geom ({mean_geom}) must be <= "
774 f"mean_arith ({self.mean_arith}).")
776 object.__setattr__(self, "median", median)
777 object.__setattr__(self, "mean_geom", mean_geom)
779 def __str__(self) -> str:
780 """
781 Get a string representation of this object.
783 :returns: the string
784 """
785 return CSV_SEPARATOR.join(map(str, (
786 self.n, self.minimum, self.median, self.mean_arith,
787 self.mean_geom, self.maximum, self.stddev)))
789 def min_mean(self) -> int | float:
790 """
791 Obtain the smallest of the three mean values.
793 :returns: the smallest of `mean_arith`, `mean_geom`, and `median`
795 >>> SampleStatistics(1, 0, 0.0, 0, None, 0.0, None).min_mean()
796 0
797 >>> SampleStatistics(2, 1, 2, 4.0, 3, 6, 0.2).min_mean()
798 2
799 >>> SampleStatistics(2, 1, 3.2, 4.0, 3, 6, 0.2).min_mean()
800 3
801 >>> SampleStatistics(2, 1, 5.2, 4.0, 3, 6, 0.2).min_mean()
802 3
803 """
804 if self.mean_geom is None: # geometric mean is always <= arithmean
805 return min(self.mean_arith, self.median)
806 return min(self.mean_geom, self.median)
808 def max_mean(self) -> int | float:
809 """
810 Obtain the largest of the three mean values.
812 :returns: the largest of `mean_arith`, `mean_geom`, and `median`
814 >>> SampleStatistics(1, 0, 0.0, 0, None, 0.0, None).max_mean()
815 0
816 >>> SampleStatistics(2, 1, 2, 4.0, 3, 6, 0.2).max_mean()
817 4
818 >>> SampleStatistics(2, 1, 3.2, 4.0, 3, 6, 0.2).max_mean()
819 4
820 >>> SampleStatistics(2, 1, 5.2, 4.0, 3, 6, 0.2).max_mean()
821 5.2
822 """
823 return max(self.mean_arith, self.median)
825 def compact(self, needs_n: bool = True) \
826 -> "int | float | SampleStatistics":
827 """
828 Try to represent this object as single number, if possible.
830 :param needs_n: if this is `True`, the default, then the object is
831 only turned into a single number if alsp `n==1`. Otherwise, `n`
832 is ignored
833 :returns: an integer or float if this objects minimum equals its
834 maximum, the object itself otherwise
836 >>> s = SampleStatistics.from_single_value(10, 1)
837 >>> s.compact() == 10
838 True
839 >>> s.compact() == s.compact(True)
840 True
842 >>> s = SampleStatistics.from_single_value(10, 2)
843 >>> s.compact() is s
844 True
845 >>> s.compact() == s.compact(True)
846 True
848 >>> s = SampleStatistics.from_single_value(10, 2)
849 >>> s.compact(False) == 10
850 True
852 >>> s = SampleStatistics(2, 1, 2, 4, 3, 5, 3)
853 >>> s.compact() is s
854 True
856 >>> s = SampleStatistics(2, 1, 2, 4, 3, 5, 3)
857 >>> s.compact(False) is s
858 True
860 >>> try:
861 ... s.compact(1)
862 ... except TypeError as te:
863 ... print(te)
864 needs_n should be an instance of bool but is int, namely 1.
866 >>> try:
867 ... s.compact(None)
868 ... except TypeError as te:
869 ... print(te)
870 needs_n should be an instance of bool but is None.
871 """
872 if not isinstance(needs_n, bool):
873 raise type_error(needs_n, "needs_n", bool)
874 mi: Final[int | float] = self.minimum
875 return self if (mi < self.maximum) or (
876 needs_n and (self.n > 1)) else mi
878 def _key(self) -> tuple[int | float, int | float, int | float,
879 int | float, int | float, int | float, int, int]:
880 r"""
881 Get a comparison and hash key.
883 :returns: the comparison key
885 >>> SampleStatistics(2, 1, 2, 4.0, 3, 6, 0.2)._key()
886 (1, 2, 4, 3, 6, 0.2, 2, 1)
888 >>> SampleStatistics(1, 0, 0, 0, None, 0, None)._key()
889 (0, 0, 0, inf, 0, inf, 1, 1)
890 """
891 return (self.minimum, self.median, self.mean_arith,
892 inf if self.mean_geom is None else self.mean_geom,
893 self.maximum, inf if self.stddev is None else self.stddev,
894 self.n, 1)
896 def get_mean_geom(self) -> int | float | None:
897 """
898 Get the geometric mean (:attr:`~SampleStatistics.mean_geom`).
900 :returns: the geometric mean (:attr:`~SampleStatistics.mean_geom`) of
901 all the samples, `None` if the geometric mean is not defined.
902 :raises TypeError: if an object of the wrong type is passed in as self
904 >>> SampleStatistics(5, 3, 5, 6, 4, 7, 2).get_mean_geom()
905 4
907 >>> try:
908 ... SampleStatistics.get_mean_geom(None)
909 ... except TypeError as te:
910 ... print(str(te)[:20])
911 self should be an in
912 """
913 if not isinstance(self, SampleStatistics):
914 raise type_error(self, "self", SampleStatistics)
915 return self.mean_geom
917 def get_median(self) -> int | float:
918 """
919 Get the :attr:`~SampleStatistics.median` of all the samples.
921 :returns: the :attr:`~SampleStatistics.median` of all the samples.
922 :raises TypeError: if an object of the wrong type is passed in as self
924 >>> SampleStatistics(5, 3, 5, 6, 4, 7, 2).get_median()
925 5
927 >>> try:
928 ... SampleStatistics.get_median(None)
929 ... except TypeError as te:
930 ... print(str(te)[:20])
931 self should be an in
932 """
933 if not isinstance(self, SampleStatistics):
934 raise type_error(self, "self", SampleStatistics)
935 return self.median
937 @classmethod
938 def from_single_value(cls, value: Union[
939 int, float, "StreamStatistics"], n: int = 1) -> "SampleStatistics":
940 r"""
941 Create a sample statistics from a single number.
943 :param value: the single value
944 :param n: the number of samples, i.e., the number of times this value
945 occurred
946 :returns: the sample statistics
948 >>> s = SampleStatistics.from_single_value(10, 2)
949 >>> print(s.stddev)
950 0
951 >>> s.minimum == s.maximum == s.mean_arith == s.mean_geom \
952 ... == s.median == 10
953 True
954 >>> s is SampleStatistics.from_single_value(s, s.n)
955 True
957 >>> s = SampleStatistics.from_single_value(10, 1)
958 >>> print(s.stddev)
959 None
960 >>> s.minimum == s.maximum == s.mean_arith == s.mean_geom \
961 ... == s.median == 10
962 True
963 >>> s is SampleStatistics.from_single_value(s, s.n)
964 True
966 >>> s = SampleStatistics.from_single_value(-10, 2)
967 >>> print(s.stddev)
968 0
969 >>> s.minimum == s.maximum == s.mean_arith == s.median == -10
970 True
971 >>> print(s.mean_geom)
972 None
973 >>> s is SampleStatistics.from_single_value(s, s.n)
974 True
976 >>> s = SampleStatistics.from_single_value(-10, 1)
977 >>> print(s.stddev)
978 None
979 >>> s.minimum == s.maximum == s.mean_arith == s.median == -10
980 True
981 >>> print(s.mean_geom)
982 None
983 >>> s is SampleStatistics.from_single_value(s, s.n)
984 True
986 >>> s = SampleStatistics.from_single_value(10.5, 2)
987 >>> print(s.stddev)
988 0
989 >>> s.minimum == s.maximum == s.mean_arith == s.mean_geom \
990 ... == s.median == 10.5
991 True
992 >>> s is SampleStatistics.from_single_value(s, s.n)
993 True
995 >>> s = SampleStatistics.from_single_value(10.5, 1)
996 >>> print(s.stddev)
997 None
998 >>> s.minimum == s.maximum == s.mean_arith == s.mean_geom \
999 ... == s.median == 10.5
1000 True
1001 >>> s is SampleStatistics.from_single_value(s, s.n)
1002 True
1004 >>> s = SampleStatistics.from_single_value(-10.5, 2)
1005 >>> print(s.stddev)
1006 0
1007 >>> s.minimum == s.maximum == s.mean_arith == s.median == -10.5
1008 True
1009 >>> print(s.mean_geom)
1010 None
1011 >>> s is SampleStatistics.from_single_value(s, s.n)
1012 True
1014 >>> s = SampleStatistics.from_single_value(-10.5, 1)
1015 >>> print(s.stddev)
1016 None
1017 >>> s.minimum == s.maximum == s.mean_arith == s.median == -10.5
1018 True
1019 >>> print(s.mean_geom)
1020 None
1021 >>> s is SampleStatistics.from_single_value(s, s.n)
1022 True
1024 >>> print(SampleStatistics.from_single_value(
1025 ... StreamStatistics(5, 1, 1, 1, 0), 5))
1026 5;1;1;1;1;1;0
1028 >>> try:
1029 ... SampleStatistics.from_single_value(StreamStatistics(
1030 ... 5, 1, 2, 3, 5), 5)
1031 ... except ValueError as ve:
1032 ... print(ve)
1033 Cannot create SampleStatistics from 5;1;2;3;5.
1035 >>> try:
1036 ... SampleStatistics.from_single_value(None)
1037 ... except TypeError as te:
1038 ... print(str(te)[:20])
1039 value should be an i
1041 >>> try:
1042 ... SampleStatistics.from_single_value("a")
1043 ... except TypeError as te:
1044 ... print(str(te)[:20])
1045 value should be an i
1047 >>> try:
1048 ... SampleStatistics.from_single_value(1, None)
1049 ... except TypeError as te:
1050 ... print(str(te)[:20])
1051 n should be an insta
1053 >>> try:
1054 ... SampleStatistics.from_single_value(1, "a")
1055 ... except TypeError as te:
1056 ... print(str(te)[:20])
1057 n should be an insta
1059 >>> try:
1060 ... SampleStatistics.from_single_value(s, 12)
1061 ... except ValueError as ve:
1062 ... print(str(ve)[:20])
1063 Incompatible numbers
1065 >>> try:
1066 ... SampleStatistics.from_single_value(inf)
1067 ... except ValueError as ve:
1068 ... print(str(ve)[:20])
1069 value=inf is not fin
1070 """
1071 n = check_int_range(n, "n", 1, 1_000_000_000_000_000_000)
1073 if isinstance(value, StreamStatistics):
1074 if value.n != n:
1075 raise ValueError( # noqa: TRY004
1076 f"Incompatible numbers of values {n} and {value}.")
1077 if isinstance(value, SampleStatistics):
1078 return value
1079 if value.maximum != value.minimum:
1080 raise ValueError(
1081 f"Cannot create SampleStatistics from {value}.")
1082 value = value.maximum
1083 if not isinstance(value, int | float):
1084 raise type_error(value, "value", (int, float, SampleStatistics))
1085 if not isfinite(value):
1086 raise ValueError(f"value={value} is not finite.")
1087 return SampleStatistics(
1088 n=n, minimum=value, median=value, mean_arith=value,
1089 mean_geom=None if value <= 0 else value, maximum=value,
1090 stddev=None if n <= 1 else 0)
1092 @classmethod
1093 def aggregate(cls) -> StreamStatisticsAggregate["SampleStatistics"]:
1094 """
1095 Get an aggregate suitable for this statistics type.
1097 :return: the aggregate
1099 >>> ag = SampleStatistics.aggregate()
1100 >>> ag.update((1, 2, 3, 4))
1101 >>> ag.result()
1102 SampleStatistics(n=4, minimum=1, mean_arith=2.5, maximum=4, \
1103stddev=1.2909944487358056, median=2.5, mean_geom=2.213363839400643)
1104 >>> ag.reset()
1105 >>> ag.add(4)
1106 >>> ag.add(5)
1107 >>> ag.add(6)
1108 >>> ag.add(7)
1109 >>> ag.result()
1110 SampleStatistics(n=4, minimum=4, mean_arith=5.5, maximum=7, \
1111stddev=1.2909944487358056, median=5.5, mean_geom=5.383563270955295)
1112 """
1113 return _SampleStats()
1115 @classmethod
1116 def from_samples(cls, source: Iterable[
1117 int | float | None]) -> "SampleStatistics":
1118 """
1119 Create a statistics object from an iterable of integers or floats.
1121 As bottom line, this function will forward computations to the
1122 :mod:`statistics` routines that ship with Python if nothing else works.
1123 However, sometimes, something else may work: In particular, if the data
1124 consists of only integers. In this case, it just might be possible to
1125 compute the statistics very accurately with integer precision, where
1126 possible. Also, otherwise, we can often accummulate the data using
1127 instances of :class:`fractions.Fraction`. Indeed, even the
1128 :mod:`statistics` routines may do this, but they convert to `float` in
1129 cases of non-1 denominators, even if the integer presentation was much
1130 more accurate.
1132 :param source: the source
1133 :returns: a statistics representing the statistics over `source`
1135 >>> s = SampleStatistics.from_samples([0.0])
1136 >>> s.n
1137 1
1138 >>> s.minimum
1139 0
1140 >>> s.maximum
1141 0
1142 >>> print(s.mean_geom)
1143 None
1144 >>> s.median
1145 0
1146 >>> print(s.stddev)
1147 None
1149 >>> s = SampleStatistics.from_samples([1.0])
1150 >>> s.n
1151 1
1152 >>> s.minimum
1153 1
1154 >>> s.maximum
1155 1
1156 >>> print(s.mean_geom)
1157 1
1158 >>> s.median
1159 1
1160 >>> print(s.stddev)
1161 None
1163 >>> s = SampleStatistics.from_samples([1.0, 1])
1164 >>> s.n
1165 2
1166 >>> s.minimum
1167 1
1168 >>> s.maximum
1169 1
1170 >>> print(s.mean_geom)
1171 1
1172 >>> s.median
1173 1
1174 >>> print(s.stddev)
1175 0
1177 >>> s = SampleStatistics.from_samples([0, 0.0])
1178 >>> s.n
1179 2
1180 >>> s.minimum
1181 0
1182 >>> s.maximum
1183 0
1184 >>> print(s.mean_geom)
1185 None
1186 >>> s.median
1187 0
1188 >>> print(s.stddev)
1189 0
1191 >>> from statistics import stdev as stat_stddev
1192 >>> dd = [1.5, 2.5]
1193 >>> s = SampleStatistics.from_samples(dd)
1194 >>> s.n
1195 2
1196 >>> s.minimum
1197 1.5
1198 >>> s.maximum
1199 2.5
1200 >>> print(s.mean_geom)
1201 1.9364916731037085
1202 >>> stat_geomean(dd)
1203 1.9364916731037085
1204 >>> s.median
1205 2
1206 >>> print(s.stddev)
1207 0.7071067811865476
1208 >>> stat_stddev(dd)
1209 0.7071067811865476
1211 >>> dd = [1.0, 2.0]
1212 >>> s = SampleStatistics.from_samples(dd)
1213 >>> s.n
1214 2
1215 >>> s.minimum
1216 1
1217 >>> s.maximum
1218 2
1219 >>> print(s.mean_geom)
1220 1.4142135623730951
1221 >>> (1 * 2) ** 0.5
1222 1.4142135623730951
1223 >>> stat_geomean(dd)
1224 1.414213562373095
1225 >>> s.median
1226 1.5
1227 >>> print(s.stddev)
1228 0.7071067811865476
1229 >>> stat_stddev(dd)
1230 0.7071067811865476
1232 >>> dd = [1.0, 2.0, 3.0]
1233 >>> s = SampleStatistics.from_samples(dd)
1234 >>> s.n
1235 3
1236 >>> s.minimum
1237 1
1238 >>> s.maximum
1239 3
1240 >>> print(s.mean_geom)
1241 1.8171205928321397
1242 >>> (1 * 2 * 3) ** (1 / 3)
1243 1.8171205928321397
1244 >>> stat_geomean(dd)
1245 1.8171205928321397
1246 >>> s.median
1247 2
1248 >>> print(s.stddev)
1249 1
1250 >>> stat_stddev(dd)
1251 1.0
1253 >>> dd = [1.0, 0, 3.0]
1254 >>> s = SampleStatistics.from_samples(dd)
1255 >>> s.n
1256 3
1257 >>> s.minimum
1258 0
1259 >>> s.maximum
1260 3
1261 >>> print(s.mean_geom)
1262 None
1263 >>> s.median
1264 1
1265 >>> print(s.stddev)
1266 1.5275252316519468
1267 >>> stat_stddev(dd)
1268 1.5275252316519468
1270 >>> dd = [1.0, -2, 3.0]
1271 >>> s = SampleStatistics.from_samples(dd)
1272 >>> s.n
1273 3
1274 >>> s.minimum
1275 -2
1276 >>> s.maximum
1277 3
1278 >>> print(s.mean_geom)
1279 None
1280 >>> s.median
1281 1
1282 >>> print(s.stddev)
1283 2.516611478423583
1284 >>> stat_stddev(dd)
1285 2.516611478423583
1287 >>> dd = [1e5, 2e7, 3e9]
1288 >>> s = SampleStatistics.from_samples(dd)
1289 >>> s.n
1290 3
1291 >>> s.minimum
1292 100000
1293 >>> s.maximum
1294 3000000000
1295 >>> print(s.mean_geom)
1296 18171205.928321395
1297 >>> (100000 * 20000000 * 3000000000) ** (1 / 3)
1298 18171205.92832138
1299 >>> 100000 * (((100000 // 100000) * (20000000 // 100000) * (
1300 ... 3000000000 // 100000)) ** (1 / 3))
1301 18171205.92832139
1302 >>> print(s.mean_geom ** 3)
1303 5.999999999999999e+21
1304 >>> print(18171205.92832139 ** 3)
1305 5.999999999999995e+21
1306 >>> s.median
1307 20000000
1308 >>> print(s.stddev)
1309 1726277112.7487035
1310 >>> stat_stddev(dd)
1311 1726277112.7487035
1313 >>> dd = [3.3, 2.5, 3.7, 4.9]
1314 >>> s = SampleStatistics.from_samples(dd)
1315 >>> s.n
1316 4
1317 >>> s.minimum
1318 2.5
1319 >>> s.maximum
1320 4.9
1321 >>> print(s.mean_geom)
1322 3.4971393519216964
1323 >>> 3.4971393519216964 ** 4
1324 149.5725
1325 >>> (3.3 * 2.5 * 3.7 * 4.9) ** 0.25
1326 3.497139351921697
1327 >>> s.median
1328 3.5
1329 >>> s.stddev
1330 1.0000000000000002
1331 >>> stat_stddev(dd)
1332 1.0000000000000002
1334 >>> dd = [3, 1, 2, 5]
1335 >>> s = SampleStatistics.from_samples(dd)
1336 >>> print(s.minimum)
1337 1
1338 >>> print(s.maximum)
1339 5
1340 >>> print(s.mean_arith)
1341 2.75
1342 >>> print(s.median)
1343 2.5
1344 >>> print(f"{s.mean_geom:.4f}")
1345 2.3403
1346 >>> print(f"{s.min_mean():.4f}")
1347 2.3403
1348 >>> print(f"{s.max_mean()}")
1349 2.75
1351 >>> dd = [8, 8, 8, 8, 9, 10, 10, 11, 11, 12, 12, 12, 12, 13,
1352 ... 13, 13, 14, 14, 14, 15, 15, 15, 15, 15, 15, 16, 16, 16]
1353 >>> s = SampleStatistics.from_samples(dd)
1354 >>> print(s.minimum)
1355 8
1356 >>> print(s.maximum)
1357 16
1358 >>> print(s.mean_arith)
1359 12.5
1360 >>> print(s.median)
1361 13
1362 >>> print(s.mean_geom)
1363 12.197150265022891
1364 >>> stat_geomean(dd)
1365 12.19715026502289
1366 >>> print(s.stddev)
1367 2.673602092336881
1368 >>> stat_stddev(dd)
1369 2.673602092336881
1371 >>> dd = [3, 4, 7, 14, 15, 16, 26, 28, 29, 30, 31, 31]
1372 >>> s = SampleStatistics.from_samples(dd)
1373 >>> print(s.minimum)
1374 3
1375 >>> print(s.maximum)
1376 31
1377 >>> print(s.mean_arith)
1378 19.5
1379 >>> print(s.median)
1380 21
1382 >>> print(s.mean_geom)
1383 15.354984483655892
1384 >>> stat_geomean(dd)
1385 15.354984483655894
1386 >>> k = 1
1387 >>> for i in dd:
1388 ... k *= i
1389 >>> k
1390 171787904870400
1391 >>> len(dd)
1392 12
1393 >>> k ** (1 / 12)
1394 15.354984483655889
1395 >>> 15.354984483655889 ** 12
1396 171787904870399.62
1397 >>> 15.354984483655894 ** 12
1398 171787904870400.34
1399 >>> 15.354984483655892 ** 12
1400 171787904870400.1
1402 >>> print(s.stddev)
1403 10.917042556563484
1404 >>> print(str(stat_stddev(dd))[:-1])
1405 10.91704255656348
1407 >>> dd = [375977836981734264856247621159545315,
1408 ... 1041417453269301410322718941408784761,
1409 ... 2109650311556162106262064987699051941]
1410 >>> s = SampleStatistics.from_samples(dd)
1411 >>> print(s.minimum)
1412 375977836981734264856247621159545315
1413 >>> print(s.maximum)
1414 2109650311556162106262064987699051941
1415 >>> print(s.mean_arith)
1416 1175681867269065927147010516755794006
1417 >>> stat_mean(dd)
1418 1.1756818672690659e+36
1419 >>> print(s.median)
1420 1041417453269301410322718941408784761
1422 >>> print(s.mean_geom)
1423 938280139276529201997232316081385153
1424 >>> stat_geomean(dd)
1425 9.38280139276522e+35
1427 >>> str(dd[0] * dd[1] * dd[2])[:60]
1428 '826033329443972563356247815302467930409182372405786485790679'
1429 >>> str(938280139276529201997232316081385153 ** 3)[:60]
1430 '826033329443972563356247815302467929164458081790138679285598'
1431 >>> str(int(9.38280139276522e+35) ** 3)[:60]
1432 '826033329443953666416831847378532327244986484162191539691938'
1434 >>> print(s.stddev)
1435 874600058269081159245960567663054887
1436 >>> stat_stddev(dd)
1437 8.746000582690812e+35
1439 >>> dd = [104275295274308290135253194482044160663473778025704,
1440 ... 436826861307375084714000787588311944456580437896461,
1441 ... 482178404791292289021955619498303854464057392180997,
1442 ... 521745351662201002493923306143082542601267608373030,
1443 ... 676289718505789968602970820038005797309334755525626]
1444 >>> s = SampleStatistics.from_samples(dd)
1445 >>> print(s.minimum)
1446 104275295274308290135253194482044160663473778025704
1447 >>> print(s.maximum)
1448 676289718505789968602970820038005797309334755525626
1449 >>> print(s.mean_arith)
1450 444263126308193326993620745549949659898942794400364
1451 >>> stat_mean(dd)
1452 4.442631263081933e+50
1453 >>> print(s.median)
1454 482178404791292289021955619498303854464057392180997
1456 >>> print(s.mean_geom)
1457 378318848166864995660791573439112525534046591591759
1458 >>> stat_geomean(dd)
1459 3.78318848166862e+50
1461 >>> print(s.stddev)
1462 210311926886813737006941586539087921260462032505870
1463 >>> stat_stddev(dd)
1464 2.1031192688681374e+50
1466 >>> dd = [4, 5, 5, 6, 6, 6, 6, 6, 8, 8]
1467 >>> s = SampleStatistics.from_samples(dd)
1468 >>> print(s.mean_geom)
1469 5.884283961687533
1470 >>> print(stat_geomean(dd))
1471 5.884283961687533
1473 >>> dd = [4, 4, 4, 5, 5, 8]
1474 >>> s = SampleStatistics.from_samples(dd)
1475 >>> print(s.mean_geom)
1476 4.836542350243914
1477 >>> print(stat_geomean(dd))
1478 4.8365423502439135
1480 >>> dd = [2, 8, 11, 17, 26, 30, 32]
1481 >>> s = SampleStatistics.from_samples(dd)
1482 >>> print(s.mean_geom)
1483 13.327348017053906
1484 >>> print(stat_geomean(dd))
1485 13.327348017053906
1487 >>> dd = [2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4]
1488 >>> s = SampleStatistics.from_samples(dd)
1489 >>> print(s.mean_geom)
1490 3.4710522375429465
1491 >>> print(stat_geomean(dd))
1492 3.471052237542947
1494 >>> dd = [3, 4, 4, 5, 6, 8, 8, 8, 8]
1495 >>> s = SampleStatistics.from_samples(dd)
1496 >>> print(s.mean_geom)
1497 5.653305998922543
1498 >>> print(stat_geomean(dd))
1499 5.653305998922543
1501 >>> dd = [16, 17, 19, 20, 20, 21, 22, 23, 24, 24, 25, 26, 29, 31,
1502 ... 31, 31, 32, 32, 32]
1503 >>> s = SampleStatistics.from_samples(dd)
1504 >>> print(s.mean_geom)
1505 24.419566831650357
1506 >>> print(stat_geomean(dd))
1507 24.41956683165036
1509 >>> dd = [66, 68, 69, 70, 72, 73, 73, 79, 81, 87, 94, 99, 100,
1510 ... 102, 103, 112, 118, 119, 123, 123]
1511 >>> s = SampleStatistics.from_samples(dd)
1512 >>> print(s.mean_geom)
1513 89.45680043258344
1514 >>> print(stat_geomean(dd))
1515 89.45680043258346
1517 >>> dd = [44, 63, 63, 68, 68, 68, 70, 74, 74, 80, 95, 108, 110, 128]
1518 >>> s = SampleStatistics.from_samples(dd)
1519 >>> print(s.mean_geom)
1520 76.68646417360762
1521 >>> print(stat_geomean(dd))
1522 76.68646417360763
1524 >>> try:
1525 ... SampleStatistics.from_samples(None)
1526 ... except TypeError as te:
1527 ... print(te)
1528 source should be an instance of typing.Iterable but is None.
1530 >>> SampleStatistics.from_samples((int("3432135447287235494201\
153193506618248802478442\
1532545733127827402743350092428341563721880022852900744775368104117201410\
153341"), int("4543178800835483269512609282884075126142677531600199807725\
15340558561959304806690567285991174956892786401583087254156"), int("35473\
1535203294104466229269097724582630304968924904656920211268628173495602053\
1536843032960943121516556362641127137000879"))).mean_arith
1537 38408781925110551288804847071749420604746651597990567009597840581\
1538565913672301929416406528849308895284373981465359
1540 Corner cases where the standard deviation resulting from compact
1541 fractions deviates from the standard deviation resulting from
1542 normalized fractions:
1544 >>> dd = [-7.737125245533627e+25] * 28
1545 >>> dd[2] = -7.737125245533626e+25
1546 >>> s = SampleStatistics.from_samples(dd)
1547 >>> s.stddev
1548 1623345050.6245058
1549 >>> stat_stddev(dd)
1550 1623345050.6245058
1551 >>> ddx = tuple(map(_to_frac, dd))
1552 >>> ds = sum(ddx)
1553 >>> dss = sum(ddy * ddy for ddy in ddx)
1554 >>> from math import sqrt
1555 >>> sqrt((dss - (ds * ds / 28)) / 27)
1556 1623345050.6245055
1558 Here the standard deviation becomes meaningless.
1559 If you compute it based on converting all values to floats, you get
1560 something like 0.435.
1561 You get the same result if you represent all values directly as
1562 Fractions.
1563 However, if you represent the float values as more compact Fractions,
1564 i.e., as Fractions that map to the exactly same floats but have smaller
1565 denominators, you get a standard deviation of 9.32+64.
1566 Basically, the difference is 65 orders of magnitude.
1567 But the source numbers would be exactly the same...
1568 The reason is the limited range of floats.
1570 >>> dd = (7.588550360256754e+81, int("75885503602567541832791480735\
1571293707\
157229071901715047420004889892225542594864082845697"), int("758855036025675418327\
15739148073529370729071901715047420004889892225542594864082845697"), \
15747.588550360256754e+81, 7.588550360256754e+81, 7.588550360256754e+81, \
1575int("7588550360256754183279148073529370729071901715047420004889892225\
1576542594864082845696"), 7.588550360256754e+81, 7.588550360256754e+81, \
15777.588550360256754e+81, 7.588550360256754e+81, int("758855036025675418\
15783279148073529370729071901715047420004889892225542594864082845696"), int("7588\
157955036025675418327914807352937072907190171504742000488989222554259486408284569\
15807"), int("7588550360256754183279148073529370729071901715047420004889892225542\
1581594864082845696"), int("75885503602567541832791480735293707290719017150474200\
158204889892225542594864082845696"), int("758855036025675418327914807352937072907\
15831901715047420004889892225542594864082845697"), 7.588550360256754e+81,\
1584int("7588550360256754183279148073529370729071901715047420004889892225\
1585542594864082845697"), int("75885503602567541832791480735293707290719017150474\
158620004889892225542594864082845697"), int("758855036025675418327914807352937072\
15879071901715047420004889892225542594864082845697"), 7.588550360256754e+81, \
1588int("7588550360256754183279148073529370729071901715047420004889892225\
1589542594864082845696"), int("75885503602567541832791480735293707290719017150474\
159020004889892225542594864082845696"), 7.588550360256754e+81, \
15917.588550360256754e+81, int("75885503602567541832791480735293707290719\
159201715047420004889892225542594864082845696"), 7.588550360256754e+81, \
15937.588550360256754e+81, 7.588550360256754e+81)
1594 >>> s = SampleStatistics.from_samples(dd)
1595 >>> s.stddev
1596 0.4354941703556927
1597 >>> stat_stddev(dd)
1598 0.4354941703556927
1599 >>> ddx = tuple(map(_to_frac, dd))
1600 >>> ds = sum(ddx)
1601 >>> dss = sum(ddy * ddy for ddy in ddx)
1602 >>> _limited_root((dss - (ds * ds / len(dd))) / (len(dd) - 1), 2)
1603 93206175962530968626911348905791729797971161757128018983942059951
1604 >>> ddx = tuple(map(Fraction, dd))
1605 >>> ds = sum(ddx)
1606 >>> dss = sum(ddy * ddy for ddy in ddx)
1607 >>> _limited_root((dss - (ds * ds / len(dd))) / (len(dd) - 1), 2)
1608 0.4354941703556927
1610 >>> try:
1611 ... SampleStatistics.from_samples(1)
1612 ... except TypeError as te:
1613 ... print(te)
1614 source should be an instance of typing.Iterable but is int, namely 1.
1616 >>> try:
1617 ... SampleStatistics.from_samples([])
1618 ... except ValueError as ve:
1619 ... print(ve)
1620 Data source cannot be empty.
1621 """
1622 if not isinstance(source, Iterable):
1623 raise type_error(source, "source", Iterable)
1625 # The median function of statistics would do this anyway, so we may as
1626 # well do it now.
1627 data: Final[list[int | float]] = sorted(map(try_int, (
1628 xs for xs in source if xs is not None)))
1629 n: Final[int] = list.__len__(data)
1630 if n <= 0:
1631 raise ValueError("Data source cannot be empty.")
1633 minimum: int | float = data[0] # because data is now sorted
1634 maximum: int | float = data[-1] # because data is now sorted
1635 if (minimum >= maximum) or (n <= 1): # all data is the same
1636 return SampleStatistics.from_single_value(minimum, n)
1638 # Compute the median.
1639 middle: Final[int] = n >> 1
1640 median: Final[int | float] = data[middle] if (n & 1) == 1 else (
1641 _mean_of_two(data[middle - 1], data[middle]))
1643 # Is it possible, at this stage, that all data are integers?
1644 can_int: bool = isinstance(minimum, int) and isinstance(maximum, int)
1646 # If we have only two numbers, we also already have the mean.
1647 # Otherwise, if we have only integer data so far and we know that
1648 # regardless how we dice it, the sum of the data will never exceed
1649 # the range in which floats can accurately represent integers, then
1650 # we also know that we can compute the arithmetic mean exactly.
1651 mean_arith: int | float | None = median if n <= 2 else (
1652 try_int(stat_mean(data)) if can_int and (
1653 (n * (1 + max(maximum, 0) - min(minimum, 0)))
1654 < _DBL_INT_LIMIT_P_I) else None)
1655 mean_arith_frac: Fraction | None = None
1656 mean_geom: int | float | None = None # don't know the geometric mean
1657 # Go over the data once and see if we can treat it as all-integer.
1658 # If yes, then we can compute some statistics very precisely.
1659 # are all values integers?
1660 int_sum: int = 0 # the integer sum (for mean, stddev)
1661 int_sum_sqr: int = 0 # the sum of squares (for stddev)
1662 int_sum_sqr_2: int = 0 # the sum of squares (for stddev)
1663 int_prod: int = 1 # the integer product (for geom_mean)
1664 frac_sum: Fraction = _FRAC_0
1665 frac_sum_sqr: Fraction = frac_sum
1666 frac_prod: Fraction = _FRAC_1
1668 # The following is *only* used if we have *only* integer data.
1669 # stddev((a, b, ...)) = stddev((a-x, b-x, ...))
1670 # If we can shift the whole data such that its center is around 0,
1671 # then the difference that we have to add up become smaller, and thus
1672 # the floating point arithmetic that we may need to use becomes more
1673 # accurate. If we know the mean, then shifting the data by the mean
1674 # will lead to the smallest sum of deviations. If we know only the
1675 # median, then this is better than nothing.
1676 shift: Final[int] = int(median) if mean_arith is None \
1677 else (mean_arith if isinstance(mean_arith, int)
1678 else round(mean_arith))
1680 for ii, ee in enumerate(data): # iterate over all data
1681 if can_int and (not isinstance(ee, int)):
1682 frac_sum = Fraction(int_sum + ii * shift)
1683 frac_sum_sqr = Fraction(int_sum_sqr_2)
1684 frac_prod = Fraction(int_prod)
1685 can_int = False
1686 if can_int: # == ee must be int
1687 int_sum_sqr_2 += ee * ee # type: ignore
1688 int_prod *= ee # type: ignore
1689 e: int = ee - shift # type: ignore
1690 int_sum += e # so we can sum exactly
1691 int_sum_sqr += e * e # and compute the sum of squares
1692 else:
1693 eef = Fraction(ee)
1694 frac_sum += eef
1695 frac_sum_sqr += eef * eef
1696 frac_prod *= eef
1698 if n > 2: # mean_arith is None or an approximation
1699 mean_arith_frac = (Fraction(int_sum, n) + shift) \
1700 if can_int else (frac_sum / n)
1701 mean_arith = _from_frac(mean_arith_frac)
1702 stddev: Final[int | float] = _limited_root(((int_sum_sqr - Fraction(
1703 int_sum * int_sum, n)) if can_int else (frac_sum_sqr - (
1704 frac_sum * frac_sum / n))) / (n - 1), 2)
1706 if minimum > 0: # geometric mean only defined for all-positive
1707 if can_int:
1708 frac_prod = Fraction(int_prod)
1709 # # mean_geom always <= mean_arith
1710 mean_geom = _limited_root(
1711 frac_prod, n, _to_frac(minimum), min(
1712 _to_frac(maximum), (Fraction(mean_arith) if isinstance(
1713 mean_arith, int) else Fraction(nextafter(
1714 mean_arith, inf))) if (mean_arith_frac is None)
1715 else mean_arith_frac))
1717 if (mean_geom is None) and (minimum > 0):
1718 mean_geom = stat_geomean(data)
1720 if mean_geom is not None:
1721 # Deal with errors that may have arisen due to
1722 # numerical imprecision.
1723 if mean_geom < minimum:
1724 if _almost_le(minimum, mean_geom):
1725 mean_geom = minimum
1726 else:
1727 raise ValueError(
1728 f"mean_geom={mean_geom} but min={minimum}")
1729 if mean_arith < mean_geom:
1730 if _almost_le(mean_geom, mean_arith):
1731 mean_geom = mean_arith
1732 else:
1733 raise ValueError(
1734 f"mean_geom={mean_geom} but mean_arith={mean_arith}")
1736 return SampleStatistics(minimum=minimum, median=median,
1737 mean_arith=mean_arith, mean_geom=mean_geom,
1738 maximum=maximum, stddev=stddev, n=n)
1741class CsvReader(CsvReaderBase[SampleStatistics]):
1742 """
1743 A csv parser for sample statistics.
1745 >>> from pycommons.io.csv import csv_read
1746 >>> csv = ["n;min;mean;med;geom;max;sd",
1747 ... "3;2;3;4;3;10;5", "6;2;;;;;0", "1;;;2;;;", "3;;;;;0;",
1748 ... "4;5;12;32;11;33;7"]
1749 >>> for p in csv_read(csv, CsvReader, CsvReader.parse_row):
1750 ... print(p)
1751 3;2;4;3;3;10;5
1752 6;2;2;2;2;2;0
1753 1;2;2;2;2;2;None
1754 3;0;0;0;None;0;0
1755 4;5;32;12;11;33;7
1757 >>> csv = ["value", "1", "3", "0", "-5", "7"]
1758 >>> for p in csv_read(csv, CsvReader, CsvReader.parse_row):
1759 ... print(p)
1760 1;1;1;1;1;1;None
1761 1;3;3;3;3;3;None
1762 1;0;0;0;None;0;None
1763 1;-5;-5;-5;None;-5;None
1764 1;7;7;7;7;7;None
1766 >>> csv = ["n;m;sd", "1;3;", "3;5;0"]
1767 >>> for p in csv_read(csv, CsvReader, CsvReader.parse_row):
1768 ... print(p)
1769 1;3;3;3;3;3;None
1770 3;5;5;5;5;5;0
1772 >>> csv = ["n;m", "1;3", "3;5"]
1773 >>> for p in csv_read(csv, CsvReader, CsvReader.parse_row):
1774 ... print(p)
1775 1;3;3;3;3;3;None
1776 3;5;5;5;5;5;0
1777 """
1779 def __init__(self, columns: dict[str, int]) -> None:
1780 """
1781 Create a CSV parser for :class:`SampleStatistics`.
1783 :param columns: the columns
1785 >>> try:
1786 ... CsvReader(None)
1787 ... except TypeError as te:
1788 ... print(te)
1789 columns should be an instance of dict but is None.
1791 >>> try:
1792 ... CsvReader(1)
1793 ... except TypeError as te:
1794 ... print(te)
1795 columns should be an instance of dict but is int, namely 1.
1797 >>> try:
1798 ... CsvReader(dict())
1799 ... except ValueError as ve:
1800 ... print(ve)
1801 No useful keys remain in {}.
1803 >>> try:
1804 ... CsvReader({"a": 1, "b": 2})
1805 ... except ValueError as ve:
1806 ... print(ve)
1807 No useful keys remain in {'a': 1, 'b': 2}.
1809 >>> try:
1810 ... CsvReader({KEY_N: 1, "b": 2, "c": 3})
1811 ... except ValueError as ve:
1812 ... print(ve)
1813 No useful keys remain in {'b': 2, 'c': 3}.
1815 >>> try:
1816 ... CsvReader({KEY_MINIMUM: 1, "b": 2, "c": 3})
1817 ... except ValueError as ve:
1818 ... print(ve)
1819 Found strange keys in {'b': 2, 'c': 3}.
1820 """
1821 super().__init__(columns)
1823 #: the index of the number of elements
1824 self.idx_n: Final[int | None] = csv_column_or_none(
1825 columns, KEY_N)
1827 has: int = 0
1828 has_idx: int = -1
1830 #: the index of the minimum
1831 self.__idx_min: int | None = csv_column_or_none(
1832 columns, KEY_MINIMUM)
1833 if self.__idx_min is not None:
1834 has += 1
1835 has_idx = self.__idx_min
1837 #: the index for the arithmetic mean
1838 self.__idx_mean_arith: int | None = csv_column_or_none(
1839 columns, KEY_MEAN_ARITH)
1840 if self.__idx_mean_arith is not None:
1841 has += 1
1842 has_idx = self.__idx_mean_arith
1844 #: the index for the median
1845 self.__idx_median: int | None = csv_column_or_none(
1846 columns, KEY_MEDIAN)
1847 if self.__idx_median is not None:
1848 has += 1
1849 has_idx = self.__idx_median
1851 #: the index for the geometric mean
1852 self.__idx_mean_geom: int | None = csv_column_or_none(
1853 columns, KEY_MEAN_GEOM)
1854 if self.__idx_mean_geom is not None:
1855 has += 1
1856 has_idx = self.__idx_mean_geom
1858 #: the index for the maximum
1859 self.__idx_max: int | None = csv_column_or_none(
1860 columns, KEY_MAXIMUM)
1861 if self.__idx_max is not None:
1862 has += 1
1863 has_idx = self.__idx_max
1865 #: the index for the standard deviation
1866 self.__idx_sd: Final[int | None] = csv_column_or_none(
1867 columns, KEY_STDDEV)
1869 if has <= 0:
1870 if dict.__len__(columns) == 1:
1871 self.__idx_min = has_idx = csv_column(
1872 columns, next(iter(columns.keys())), True)
1873 has = 1
1874 else:
1875 raise ValueError(f"No useful keys remain in {columns!r}.")
1876 if dict.__len__(columns) > 1:
1877 raise ValueError(f"Found strange keys in {columns!r}.")
1879 #: is this a parser for single number statistics?
1880 self.__is_single: Final[bool] = (self.__idx_sd is None) and (has == 1)
1882 if self.__is_single:
1883 self.__idx_min = self.__idx_max = self.__idx_median \
1884 = self.__idx_mean_arith = has_idx
1886 def parse_row(self, data: list[str]) -> SampleStatistics:
1887 """
1888 Parse a row of data.
1890 :param data: the data row
1891 :returns: the sample statistics
1893 >>> cc = CsvReader({KEY_MINIMUM: 0, KEY_MEAN_ARITH: 1, KEY_MAXIMUM: 2,
1894 ... KEY_STDDEV: 3, KEY_MEDIAN: 4, KEY_MEAN_GEOM: 5,
1895 ... KEY_N: 6})
1896 >>> try:
1897 ... cc.parse_row([None, None, None, None, None, None, "5"])
1898 ... except ValueError as ve:
1899 ... print(str(ve)[:20])
1900 No value defined for
1901 """
1902 n: Final[int] = 1 if self.idx_n is None else int(data[self.idx_n])
1903 mi: int | float | None = csv_val_or_none(
1904 data, self.__idx_min, str_to_num)
1906 if self.__is_single:
1907 return SampleStatistics(
1908 n=n, minimum=mi, median=mi, mean_arith=mi,
1909 mean_geom=mi if (mi > 0) or (self.__idx_mean_geom is not None)
1910 else None, maximum=mi, stddev=None if n <= 1 else 0)
1912 ar: int | float | None = csv_val_or_none(
1913 data, self.__idx_mean_arith, str_to_num)
1914 me: int | float | None = csv_val_or_none(
1915 data, self.__idx_median, str_to_num)
1916 ge: int | float | None = csv_val_or_none(
1917 data, self.__idx_mean_geom, str_to_num)
1918 ma: int | float | None = csv_val_or_none(
1919 data, self.__idx_max, str_to_num)
1920 sd: int | float | None = csv_val_or_none(
1921 data, self.__idx_sd, str_to_num)
1923 if mi is None:
1924 if ar is not None:
1925 mi = ar
1926 elif me is not None:
1927 mi = me
1928 elif ge is not None:
1929 mi = ge
1930 elif ma is not None:
1931 mi = ma
1932 else:
1933 raise ValueError(
1934 f"No value defined for min@{self.__idx_min}={mi}, mean@"
1935 f"{self.__idx_mean_arith}={ar}, med@{self.__idx_median}="
1936 f"{me}, gmean@{self.__idx_mean_geom}={ge}, max@"
1937 f"{self.__idx_max}={ma} defined in {data!r}.")
1938 return SampleStatistics(
1939 n=n, minimum=mi, mean_arith=mi if ar is None else ar,
1940 median=mi if me is None else me, mean_geom=(
1941 mi if mi > 0 else None) if (ge is None) else ge,
1942 maximum=mi if ma is None else ma,
1943 stddev=(0 if (n > 1) else None) if sd is None else sd)
1945 def parse_optional_row(self, data: list[str] | None) \
1946 -> SampleStatistics | None:
1947 """
1948 Parse a row of data that may be empty.
1950 :param data: the row of data that may be empty
1951 :returns: the sample statistic, if the row contains data, else `None`
1953 >>> print(CsvReader.parse_optional_row(None, ["1"]))
1954 None
1955 >>> print(CsvReader.parse_optional_row(CsvReader({"v": 0}), ["1"]))
1956 1;1;1;1;1;1;None
1957 >>> print(CsvReader.parse_optional_row(CsvReader({"v": 0}), [""]))
1958 None
1959 """
1960 if (self is None) or (data is None):
1961 return None # trick to make this method usable pseudo-static
1962 # pylint: disable=R0916
1963 if (((self.__idx_min is not None) and (
1964 str.__len__(data[self.__idx_min]) > 0)) or (
1965 (self.__idx_mean_arith is not None) and (
1966 str.__len__(data[self.__idx_mean_arith]) > 0)) or (
1967 (self.__idx_median is not None) and (
1968 str.__len__(data[self.__idx_median]) > 0)) or (
1969 (self.__idx_mean_geom is not None) and (
1970 str.__len__(data[self.__idx_mean_geom]) > 0)) or (
1971 (self.__idx_max is not None) and (
1972 str.__len__(data[self.__idx_max]) > 0))):
1973 return self.parse_row(data)
1974 return None
1977class CsvWriter(CsvWriterBase[SampleStatistics]):
1978 """A class for CSV writing of :class:`SampleStatistics`."""
1980 def __init__(self,
1981 data: Iterable[SampleStatistics],
1982 scope: str | None = None,
1983 n_not_needed: bool = False,
1984 what_short: str | None = None,
1985 what_long: str | None = None) -> None:
1986 """
1987 Initialize the csv writer.
1989 :param data: the data to use
1990 :param scope: the prefix to be pre-pended to all columns
1991 :param n_not_needed: should we omit the `n` column?
1992 :param what_short: the short description of what the statistics is
1993 about
1994 :param what_long: the long statistics of what the statistics is about
1996 >>> try:
1997 ... CsvWriter([], None, n_not_needed=None)
1998 ... except TypeError as te:
1999 ... print(te)
2000 n_not_needed should be an instance of bool but is None.
2002 >>> try:
2003 ... CsvWriter([])
2004 ... except ValueError as ve:
2005 ... s = str(ve)
2006 ... print(s[s.index(' ') + 1:])
2007 CsvWriter did not see any data.
2009 >>> try:
2010 ... CsvWriter([1])
2011 ... except TypeError as te:
2012 ... print(str(te)[:32])
2013 data[0] should be an instance of
2014 """
2015 super().__init__(data, scope, n_not_needed, what_short, what_long,
2016 SampleStatistics)
2019class _SampleStats(StreamStatisticsAggregate[SampleStatistics]):
2020 """The internal sample statistics aggregate."""
2022 def __init__(self) -> None:
2023 """Initialize the stream statistics."""
2024 #: the internal data list
2025 self.__lst: Final[list[int | float]] = []
2027 def reset(self) -> None:
2028 """Reset the sample statistics."""
2029 self.__lst.clear()
2031 def add(self, value: int | float) -> None:
2032 """
2033 Add a value to the statistics.
2035 :param value: the value
2036 """
2037 self.__lst.append(try_int(value))
2039 def update(self, data: Iterable[int | float | None]) -> None:
2040 """
2041 Add a stream of data.
2043 :param data: the data stream
2044 """
2045 self.__lst.extend(xs for xs in data if xs is not None)
2047 def result(self) -> SampleStatistics:
2048 """
2049 Get the arithmetic mean.
2051 :return: the arithmetic mean or `None` if no value was added yet
2052 """
2053 return SampleStatistics.from_samples(self.__lst)
2055 def result_or_none(self) -> SampleStatistics | None:
2056 """
2057 Get the result if any data was collected, otherwise `None`.
2059 :return: The return value of :meth:`result` if any data was collected,
2060 otherwise `None`
2061 """
2062 return self.result() if list.__len__(self.__lst) > 0 else None