Coverage for pycommons / math / sample_statistics.py: 98%

328 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2025-12-11 03:04 +0000

1""" 

2A simple and immutable basic statistics record computed over a data sample. 

3 

4Here we provide records of statistics that are computed over a fully available 

5sample of data. 

6Such records are instances of class 

7:class:`~pycommons.math.sample_statistics.SampleStatistics`. 

8They offer the 

9:attr:`~pycommons.math.stream_statistics.StreamStatistics.minimum` and 

10:attr:`~pycommons.math.stream_statistics.StreamStatistics.maximum` of the data 

11as well as the number 

12:attr:`~pycommons.math.stream_statistics.StreamStatistics.n` of observed 

13samples. 

14They also offer approximations of the arithmetic mean as attribute 

15:attr:`~pycommons.math.stream_statistics.StreamStatistics.mean_arith` and 

16the approximation of the standard deviation as attribute 

17:attr:`~pycommons.math.stream_statistics.StreamStatistics.stddev`. 

18Additionally, they provide the sample 

19:attr:`~pycommons.math.sample_statistics.SampleStatistics.median` 

20and an approximation 

21:attr:`~pycommons.math.sample_statistics.SampleStatistics.mean_geom` of the 

22geometric mean. 

23 

24This class is an extension of class 

25:class:`~pycommons.math.stream_statistics.StreamStatistics`. 

26Stream statistics are less accurate and do not provide the median or geometric 

27mean. 

28They, however, can be applied to a stream of data and do not require that all 

29the data be available as a complete chunk at once. 

30Sample statistics require access to the full data, but also offer higher 

31accuracy. 

32 

33There is an absolute order defined upon these records. 

34They are hashable and immutable. 

35We provide methods to store them to CSV format via the class 

36:class:`~pycommons.math.sample_statistics.CsvWriter` 

37and to load them from CSV data via the class 

38:class:`~pycommons.math.sample_statistics.CsvReader`. 

39Functions that access attributes can be obtained via 

40:meth:`~pycommons.math.stream_statistics.StreamStatistics.getter`. 

41 

42>>> ag = SampleStatistics.aggregate() 

43>>> ag.update((1, 2, 3)) 

44>>> ag.add(4) 

45>>> ag.add(5) 

46>>> r1 = ag.result() 

47>>> repr(r1) 

48'SampleStatistics(n=5, minimum=1, mean_arith=3, maximum=5, \ 

49stddev=1.5811388300841898, median=3, mean_geom=2.6051710846973517)' 

50>>> str(r1) 

51'5;1;3;3;2.6051710846973517;5;1.5811388300841898' 

52 

53>>> r2 = SampleStatistics.from_samples((1, 2, 3, 4, 5)) 

54>>> r1 == r2 

55True 

56 

57>>> ag.reset() 

58>>> try: 

59... ag.result() 

60... except ValueError as ve: 

61... print(ve) 

62Data source cannot be empty. 

63 

64>>> print(ag.result_or_none()) 

65None 

66""" 

67 

68from contextlib import suppress 

69from dataclasses import dataclass 

70from fractions import Fraction 

71from math import ceil, inf, isfinite, nan, nextafter 

72from statistics import geometric_mean as stat_geomean 

73from statistics import mean as stat_mean 

74from typing import Final, Iterable, Union 

75 

76from pycommons.io.csv import ( 

77 CSV_SEPARATOR, 

78 csv_column, 

79 csv_column_or_none, 

80 csv_val_or_none, 

81) 

82from pycommons.io.csv import CsvReader as CsvReaderBase 

83from pycommons.math.int_math import __DBL_INT_LIMIT_P_I as _DBL_INT_LIMIT_P_I 

84from pycommons.math.int_math import ( 

85 ceil_div, 

86 float_to_frac, 

87 try_int, 

88 try_int_div, 

89) 

90from pycommons.math.stream_statistics import ( 

91 KEY_MAXIMUM, 

92 KEY_MEAN_ARITH, 

93 KEY_MEAN_GEOM, 

94 KEY_MEDIAN, 

95 KEY_MINIMUM, 

96 KEY_N, 

97 KEY_STDDEV, 

98 StreamStatistics, 

99 StreamStatisticsAggregate, 

100) 

101from pycommons.math.stream_statistics import CsvWriter as CsvWriterBase 

102from pycommons.strings.string_conv import ( 

103 str_to_num, 

104) 

105from pycommons.types import check_int_range, type_error 

106 

107 

108def _mean_of_two(a: int | float, b: int | float) -> int | float: 

109 """ 

110 Compute the mean of two numbers. 

111 

112 :param a: the first number 

113 :param b: the second number 

114 :returns: the mean 

115 

116 >>> _mean_of_two(1, 1) 

117 1 

118 >>> _mean_of_two(1.0, 1.0) 

119 1 

120 >>> _mean_of_two(1, 2) 

121 1.5 

122 >>> _mean_of_two(1, 3) 

123 2 

124 >>> _mean_of_two(1.5, 1.7) 

125 1.6 

126 

127 >>> _mean_of_two(-1, -1) 

128 -1 

129 >>> _mean_of_two(-1.0, -1.0) 

130 -1 

131 >>> _mean_of_two(-1, -2) 

132 -1.5 

133 >>> _mean_of_two(-1, -3) 

134 -2 

135 >>> _mean_of_two(-1.5, -1.7) 

136 -1.6 

137 

138 >>> _mean_of_two(1, -1) 

139 0 

140 >>> _mean_of_two(-1.0, 1.0) 

141 0 

142 >>> _mean_of_two(1, -2) 

143 -0.5 

144 >>> _mean_of_two(1, -3) 

145 -1 

146 >>> _mean_of_two(1.5, -1.7) 

147 -0.09999999999999998 

148 >>> _mean_of_two(-1.5, 1.7) 

149 0.09999999999999998 

150 

151 >>> _mean_of_two(1.7976931348623157e+308, 1.7976931348623157e+308) 

152 1.7976931348623157e+308 

153 >>> _mean_of_two(1.7976931348623155e+308, 1.7976931348623157e+308) 

154 1.7976931348623155e+308 

155 """ 

156 a = try_int(a) 

157 b = try_int(b) 

158 if a == b: 

159 return a 

160 if isinstance(a, int) and isinstance(b, int): 

161 return try_int_div(a + b, 2) 

162 

163 res: float = a + b 

164 return (0.5 * res) if isfinite(res) else ((0.5 * a) + (0.5 * b)) 

165 

166 

167def _almost_le(a: int | float, b: int | float) -> bool: 

168 """ 

169 Check if `a <= b` holds approximately. 

170 

171 `a <= b` holds if, well, `a` is less than or equal to `b`. It holds almost 

172 if `a` is just a tiny bit larger than `b`. 

173 

174 :param a: the first value 

175 :param b: the second value 

176 :returns: `True` if we can say: `a` is approximately less or equal than `b` 

177 and any deviation from this probably results from numerical issues. 

178 

179 >>> _almost_le(1, 0) 

180 False 

181 >>> _almost_le(0, 0) 

182 True 

183 >>> _almost_le(1.1, 1.09) 

184 False 

185 >>> _almost_le(1.1, 1.099999) 

186 False 

187 >>> _almost_le(1.1, 1.09999999) 

188 False 

189 >>> _almost_le(1.1, 1.0999999999) 

190 False 

191 >>> _almost_le(1.1, 1.099999999999) 

192 False 

193 >>> _almost_le(1.099999999999, 1.1) 

194 True 

195 >>> _almost_le(1.1, 1.0999999999999) 

196 True 

197 >>> _almost_le(1.0999999999999, 1.1) 

198 True 

199 

200 >>> _almost_le(0, -1) 

201 False 

202 >>> _almost_le(-1.09, -1.1) 

203 False 

204 >>> _almost_le(-1.099999, -1.1) 

205 False 

206 >>> _almost_le(-1.09999999, -1.1) 

207 False 

208 >>> _almost_le(-1.0999999999, -1.1) 

209 False 

210 >>> _almost_le(-1.099999999999, -1.1) 

211 False 

212 >>> _almost_le(-1.1, -1.099999999999) 

213 True 

214 >>> _almost_le(-1.0999999999999, -1.1) 

215 True 

216 >>> _almost_le(-1.1, -1.0999999999999) 

217 True 

218 

219 >>> _almost_le(23384026197294446691258957323460528314494920687616, 

220 ... 2.3384026197294286e+49) 

221 True 

222 >>> _almost_le(nextafter(5, inf), nextafter(5, -inf)) 

223 True 

224 >>> _almost_le(nextafter(nextafter(5, inf), inf), 

225 ... nextafter(nextafter(5, -inf), -inf)) 

226 True 

227 >>> _almost_le(nextafter(nextafter(nextafter(5, inf), inf), inf), 

228 ... nextafter(nextafter(nextafter(5, -inf), -inf), -inf)) 

229 True 

230 >>> _almost_le(nextafter(nextafter(nextafter(nextafter(5, inf), inf), 

231 ... inf), inf), nextafter(nextafter(nextafter(5, -inf), 

232 ... -inf), -inf)) 

233 True 

234 >>> _almost_le(5.114672824837722e+148, 5.1146728248374894e+148) 

235 True 

236 

237 >>> _almost_le(-1.7976931348623157e+308, 

238 ... -int(1.7976931348623157e+308) * 10) 

239 False 

240 >>> _almost_le(-int(1.7976931348623157e+308) * 10, 

241 ... -1.7976931348623157e+308) 

242 True 

243 >>> _almost_le(1e-302, 0) 

244 True 

245 >>> _almost_le(1e-200, 0) 

246 False 

247 """ 

248 if a <= b: 

249 return True 

250 

251 if a < 0: 

252 a, b = -b, -a # maybe: a = -19, b = -20 -> maybe: a = 20, b = 19 

253 elif b <= 0: 

254 return (b >= 0) and (a <= 1e-300) 

255 

256 with suppress(OverflowError): 

257 use_a: int | float = a 

258 use_b: int | float = b 

259 for _ in range(3): 

260 use_a = nextafter(use_a, -inf) 

261 use_b = nextafter(use_b, inf) 

262 if use_a <= use_b: 

263 return True 

264 try: 

265 return (b / a) > 0.9999999999999 

266 except OverflowError: 

267 a_int: Final[int] = int(a) 

268 b_int: Final[int] = int(b) 

269 return (9999999999999 * a_int) <= (b_int * 10000000000000) 

270 

271 

272def _to_frac(a: int | float) -> Fraction: 

273 """ 

274 Convert a number to a fraction. 

275 

276 :param a: the number 

277 :returns: the fraction 

278 

279 >>> _to_frac(23) 

280 Fraction(23, 1) 

281 >>> _to_frac(2.34) 

282 Fraction(117, 50) 

283 """ 

284 return Fraction(a) if isinstance(a, int) else Fraction(*float_to_frac(a)) 

285 

286 

287def _from_frac(a: int | float | Fraction) -> int | float: 

288 """ 

289 Convert a fraction to either an integer or a float. 

290 

291 :param a: the fraction 

292 :returns: the integer or float value 

293 

294 >>> _from_frac(1.6) 

295 1.6 

296 >>> _from_frac(123) 

297 123 

298 >>> _from_frac(Fraction(7, 8)) 

299 0.875 

300 >>> _from_frac(Fraction(1237, 1)) 

301 1237 

302 """ 

303 if isinstance(a, int): 

304 return a 

305 if isinstance(a, float): 

306 return try_int(a) 

307 num: Final[int] = a.numerator 

308 denom: Final[int] = a.denominator 

309 if denom == 1: 

310 return num 

311 return try_int_div(num, denom) 

312 

313 

314#: the 0 fraction 

315_FRAC_0: Final[Fraction] = Fraction(0, 1) 

316#: the 1 fraction 

317_FRAC_1: Final[Fraction] = Fraction(1, 1) 

318 

319 

320def _int_root_bound_lower(base: int, root: int) -> int: 

321 """ 

322 Compute a lower bound for a root. 

323 

324 We use that `log(a ** b) = log(a) * b`. 

325 In binary, this means that: `a ** b == 2 ** (log2(a) * b)`, or, for roots 

326 `a ** (1/b) == 2 ** (log2(a) / b`. 

327 In bits, `2 ** x == 1 << x` and `floor(log2(x)) == x.bit_length() - 1`. 

328 Therefore, we know that `a ** (1/b) >= 1 << ((a.bit_length() // b) - 1)`. 

329 Similarly, we can have an upper bound by rounding up at each step 

330 `a ** (1/b) <= 1 << (1 + ((b.bit_length() + 1) // root) 

331 

332 :param base: the base number 

333 :param root: the root 

334 :returns: the lower bound 

335 

336 >>> _int_root_bound_lower(8, 3) 

337 1 

338 

339 >>> _int_root_bound_lower(8, 2) 

340 2 

341 

342 >>> _int_root_bound_lower(25, 3) 

343 1 

344 """ 

345 logdiv: Final[int] = base.bit_length() // root 

346 return (1 << (logdiv - 1)) if logdiv > 0 else (0 if base < 1 else 1) 

347 

348 

349def _int_root_bound_upper(base: int, root: int) -> int: 

350 """ 

351 Compute an upper bound for a root. 

352 

353 :param base: the base number 

354 :param root: the root 

355 :returns: the upper bound 

356 

357 >>> _int_root_bound_upper(8, 3) 

358 4 

359 

360 >>> _int_root_bound_upper(8, 2) 

361 4 

362 

363 >>> _int_root_bound_upper(25, 3) 

364 8 

365 """ 

366 return base if root == 1 else min(1 << (1 + ceil_div( 

367 base.bit_length() + 1, root)), (base // 2) + (1 if base < 6 else 0)) 

368 

369 

370def _frac_root_bound_lower(base: Fraction, root: int) -> Fraction: 

371 """ 

372 Compute a lower bound for a root. 

373 

374 :param base: the base number 

375 :param root: the root 

376 :returns: the lower bound 

377 

378 >>> _frac_root_bound_lower(Fraction(8), 3) 

379 Fraction(1, 1) 

380 

381 >>> _frac_root_bound_lower(Fraction(8), 2) 

382 Fraction(2, 1) 

383 

384 >>> _frac_root_bound_lower(Fraction(25), 3) 

385 Fraction(1, 1) 

386 

387 >>> _frac_root_bound_lower(Fraction(3, 8), 3) 

388 Fraction(1, 2) 

389 

390 >>> _frac_root_bound_lower(Fraction(11, 8), 2) 

391 Fraction(1, 1) 

392 

393 >>> _frac_root_bound_lower(Fraction(11, 25), 3) 

394 Fraction(1, 2) 

395 """ 

396 return _FRAC_0 if base <= _FRAC_0 else ( 

397 Fraction(1, _int_root_bound_upper(ceil_div( 

398 base.denominator, base.numerator), root)) 

399 if base < _FRAC_1 else ( 

400 _FRAC_1 if base == _FRAC_1 else Fraction( 

401 _int_root_bound_lower(int(base), root)))) 

402 

403 

404def _frac_root_bound_upper(base: Fraction, root: int) -> Fraction: 

405 """ 

406 Compute an upper bound for a root. 

407 

408 :param base: the base number 

409 :param root: the root 

410 :returns: the upper bound 

411 

412 >>> _frac_root_bound_upper(Fraction(8), 3) 

413 Fraction(4, 1) 

414 

415 >>> _frac_root_bound_upper(Fraction(8), 2) 

416 Fraction(4, 1) 

417 

418 >>> _frac_root_bound_upper(Fraction(25), 3) 

419 Fraction(8, 1) 

420 

421 >>> _frac_root_bound_upper(Fraction(3, 8), 3) 

422 Fraction(1, 1) 

423 

424 >>> _frac_root_bound_upper(Fraction(11, 8), 2) 

425 Fraction(2, 1) 

426 

427 >>> _frac_root_bound_upper(Fraction(11, 25), 3) 

428 Fraction(1, 1) 

429 """ 

430 return _FRAC_0 if base <= _FRAC_0 else ( 

431 Fraction(1, _int_root_bound_lower( 

432 base.denominator // base.numerator, root)) 

433 if base < _FRAC_1 else ( 

434 _FRAC_1 if base == _FRAC_1 else Fraction( 

435 _int_root_bound_upper(ceil(base), root)))) 

436 

437 

438def _limited_root(base: Fraction, root: int, 

439 mini: Fraction = _FRAC_0, 

440 maxi: Fraction | None = None) -> int | float: 

441 """ 

442 Try to compute a root at a precision so exact that no digits are lost. 

443 

444 :param base: the base 

445 :param root: the exponent 

446 :param mini: a limit for the smallest possible result 

447 :param maxi: a maximum value, the limit for the largest possible result, 

448 or `None` if no upper limit is known 

449 :returns: the power 

450 

451 >>> from math import sqrt 

452 >>> sqrt(3) 

453 1.7320508075688772 

454 >>> _limited_root(Fraction(3, 1), 2) 

455 1.7320508075688772 

456 >>> _limited_root(Fraction(4, 1), 2) 

457 2 

458 

459 >>> _limited_root(Fraction(3 ** 3, 1), 3) 

460 3 

461 >>> type(_limited_root(Fraction(3 ** 3, 1), 3)) 

462 <class 'int'> 

463 

464 >>> _limited_root(Fraction(3 ** 333, 1), 333) 

465 3 

466 

467 >>> _limited_root(Fraction(9000 ** 1000, 1), 1000) 

468 9000 

469 

470 >>> _limited_root(Fraction((10 ** 8) ** 100, 1), 35) 

471 71968567300115201992879 

472 

473 >>> 0.456 ** (1 / 25) 

474 0.9690776862089129 

475 >>> _limited_root(Fraction(456, 1000), 25) 

476 0.9690776862089129 

477 

478 >>> _limited_root(Fraction(2, 1), 2) 

479 1.4142135623730951 

480 >>> sqrt(2) 

481 1.4142135623730951 

482 """ 

483 lower: Fraction | None = None 

484 upper: Fraction | None = None 

485 if base.denominator == 1: 

486 ibase = base.numerator 

487 if ibase <= 1: 

488 return ibase 

489 

490 ilower: int = max(int(mini), _int_root_bound_lower(ibase, root)) 

491 iupper: int = _int_root_bound_upper(ibase, root) 

492 if maxi is not None: 

493 iupper = min(int(maxi) + 1, iupper) 

494 imid: int = ilower 

495 while ilower <= iupper: 

496 imid = (ilower + iupper) >> 1 

497 imid_exp = imid ** root 

498 if imid_exp > ibase: 

499 iupper = imid - 1 

500 elif imid_exp < ibase: 

501 ilower = imid + 1 

502 else: 

503 return imid # We got an exact integer result 

504 # No exact integer result, but at least new limits 

505 upper = Fraction(imid + 1) 

506 lower = Fraction(max(0, imid - 1)) 

507 

508 # Now we do binary search using fractions 

509 if upper is None: 

510 upper = max(base, _FRAC_1) 

511 if maxi is not None: 

512 upper = min(upper, maxi) 

513 upper = min(upper, _frac_root_bound_upper(base, root)) 

514 if lower is None: 

515 lower = _FRAC_0 

516 lower = max(mini, lower) 

517 lower = max(lower, _frac_root_bound_lower(base, root)) 

518 

519 # Now compute the root using binary search within the limits. 

520 guess: int | float = nan 

521 equal_steps: int = 4 

522 while equal_steps > 0: 

523 last_guess: int | float = guess 

524 mid: Fraction = (lower + upper) / 2 

525 mid_exp = mid ** root 

526 if mid_exp > base: 

527 upper = mid 

528 elif mid_exp < base: 

529 lower = mid 

530 else: 

531 return _from_frac(mid) 

532 

533 guess = _from_frac(mid) 

534 if (type(guess) is type(last_guess)) and (guess == last_guess): 

535 equal_steps -= 1 

536 else: 

537 equal_steps = 4 

538 return guess 

539 

540 

541@dataclass(frozen=True, init=False, order=False, eq=False) 

542class SampleStatistics(StreamStatistics): 

543 """An immutable record with sample statistics of one quantity.""" 

544 

545 #: The median, i.e., the value in the middle of the sorted list of 

546 #: :attr:`~pycommons.math.stream_statistics.StreamStatistics.n` data 

547 # samples. 

548 median: int | float 

549 #: The geometric mean value, if defined. This is the 

550 #: :attr:`~pycommons.math.stream_statistics.StreamStatistics.n`-th root 

551 #: of the product of all data samples. 

552 #: This value will be `None` if there was any sample which is not greater 

553 #: than 0. 

554 mean_geom: int | float | None 

555 

556 def __init__(self, n: int, minimum: int | float, median: int | float, 

557 mean_arith: int | float, mean_geom: int | float | None, 

558 maximum: int | float, stddev: int | float | None): 

559 """ 

560 Create a sample statistics record. 

561 

562 :param n: the sample size, must be `n >= 1` 

563 :param minimum: the minimum 

564 :param median: the median 

565 :param mean_arith: the arithmetic mean 

566 :param mean_geom: the geometric mean, or `None` if it is undefined 

567 :param maximum: the maximum 

568 :param stddev: the standard deviation, must be `None` if `n == 0` 

569 

570 >>> s1 = SampleStatistics(2, 1, 2, 4.0, 3, 6, 0.2) 

571 >>> s1.n 

572 2 

573 >>> s1.minimum 

574 1 

575 >>> s1.median 

576 2 

577 >>> s1.mean_arith 

578 4 

579 >>> s1.mean_geom 

580 3 

581 >>> s1.maximum 

582 6 

583 >>> s1.stddev 

584 0.2 

585 >>> hash(s1) 

586 8839096310731950625 

587 

588 >>> s2 = SampleStatistics(1, 0, 0.0, 0, None, 0.0, None) 

589 >>> s2.n 

590 1 

591 >>> s2.minimum 

592 0 

593 >>> s2.median 

594 0 

595 >>> s2.mean_arith 

596 0 

597 >>> print(s2.mean_geom) 

598 None 

599 >>> s2.maximum 

600 0 

601 >>> print(s2.stddev) 

602 None 

603 >>> hash(s2) == hash((0, 0, 0, inf, 0, inf, 1, 1)) 

604 True 

605 

606 >>> s3 = SampleStatistics(n=3, minimum=5, median=5, maximum=5, 

607 ... mean_arith=5, mean_geom=5, stddev=0.0) 

608 >>> s3.stddev 

609 0 

610 >>> hash(s3) 

611 1175763770956004139 

612 

613 >>> sset = {s1, s1, s2, s1, s3, s3, s2, s1} 

614 >>> len(sset) 

615 3 

616 >>> print(list(sss.n for sss in sorted(sset))) 

617 [1, 2, 3] 

618 >>> print(list(sss.minimum for sss in sorted(sset))) 

619 [0, 1, 5] 

620 

621 >>> try: 

622 ... SampleStatistics(n=1, minimum=5, median=6, maximum=5, 

623 ... mean_arith=5, mean_geom=5, stddev=None) 

624 ... except ValueError as ve: 

625 ... print(ve) 

626 median (6) must equal minimum (5) if n=1. 

627 

628 >>> try: 

629 ... SampleStatistics(n=2, minimum=5, median=4, maximum=5, 

630 ... mean_arith=5, mean_geom=5, stddev=0) 

631 ... except ValueError as ve: 

632 ... print(ve) 

633 median (4) must be >= minimum (5) if n>1. 

634 

635 >>> try: 

636 ... SampleStatistics(n=1, minimum=5, median=5, maximum=6, 

637 ... mean_arith=5, mean_geom=5, stddev=None) 

638 ... except ValueError as ve: 

639 ... print(ve) 

640 maximum (6) must equal minimum (5) if n=1. 

641 

642 >>> try: 

643 ... SampleStatistics(n=2, minimum=5, median=6, maximum=5, 

644 ... mean_arith=5, mean_geom=5, stddev=0) 

645 ... except ValueError as ve: 

646 ... print(ve) 

647 maximum (5) must be >= med (6) if n>1. 

648 

649 >>> try: 

650 ... SampleStatistics(n=1, minimum=5, median=5, maximum=5, 

651 ... mean_arith=4, mean_geom=5, stddev=None) 

652 ... except ValueError as ve: 

653 ... print(ve) 

654 mean_arith (4) must equal minimum (5) if n=1. 

655 

656 >>> try: 

657 ... SampleStatistics(n=2, minimum=5, median=6, maximum=6, 

658 ... mean_arith=4, mean_geom=5, stddev=None) 

659 ... except ValueError as ve: 

660 ... print(ve) 

661 minimum<=mean_arith<=maximum must hold, but got 5, 4, and 6. 

662 

663 >>> try: 

664 ... SampleStatistics(n=1, minimum=5, median=5, maximum=5, 

665 ... mean_arith=5, mean_geom=None, stddev=None) 

666 ... except ValueError as ve: 

667 ... print(ve) 

668 If minimum (5) > 0, then mean_geom must be defined, but it is None. 

669 

670 >>> try: 

671 ... SampleStatistics(n=1, minimum=0, median=0, maximum=0, 

672 ... mean_arith=0, mean_geom=0, stddev=None) 

673 ... except ValueError as ve: 

674 ... print(ve) 

675 If minimum (0) <= 0, then mean_geom is undefined, but it is 0. 

676 

677 >>> try: 

678 ... SampleStatistics(n=1, minimum=5, median=5, maximum=5, 

679 ... mean_arith=5, mean_geom=6, stddev=None) 

680 ... except ValueError as ve: 

681 ... print(ve) 

682 mean_geom (6) must equal minimum (5) if n=1. 

683 

684 >>> try: 

685 ... SampleStatistics(n=3, minimum=5, median=6, maximum=7, 

686 ... mean_arith=6, mean_geom=6.1, stddev=1) 

687 ... except ValueError as ve: 

688 ... print(ve) 

689 mean_geom (6.1) must be <= mean_arith (6). 

690 

691 >>> try: 

692 ... SampleStatistics(n=3, minimum=5, median=6, maximum=7, 

693 ... mean_arith=6, mean_geom=6, stddev=-1) 

694 ... except ValueError as ve: 

695 ... print(ve) 

696 stddev must be >= 0, but is -1. 

697 

698 >>> try: 

699 ... SampleStatistics(n=3, minimum=5, median=6, maximum=7, 

700 ... mean_arith=6, mean_geom=6, stddev=0) 

701 ... except ValueError as ve: 

702 ... print(str(ve)[:59]) 

703 If stddev (0) is 0, then minimum (5) must equal maximum (7) 

704 

705 >>> try: 

706 ... SampleStatistics(n=3, minimum=5, median=5, maximum=5, 

707 ... mean_arith=5, mean_geom=5, stddev=1) 

708 ... except ValueError as ve: 

709 ... print(str(ve)[:59]) 

710 If stddev (1) is 0, then minimum (5) must equal maximum (5) 

711 

712 >>> try: 

713 ... SampleStatistics(n=3, minimum=5, median=5, maximum=5, 

714 ... mean_arith=5, mean_geom=5, stddev=None) 

715 ... except ValueError as ve: 

716 ... print(ve) 

717 If n=1, stddev=None and vice versa, but got n=3 and stddev=None. 

718 

719 >>> try: 

720 ... SampleStatistics(n=1, minimum=5, median=5, maximum=5, 

721 ... mean_arith=5, mean_geom=5, stddev=1) 

722 ... except ValueError as ve: 

723 ... print(ve) 

724 If n=1, stddev=None and vice versa, but got n=1 and stddev=1. 

725 

726 >>> try: 

727 ... SampleStatistics(n=2, minimum=5, median=5, maximum=6, 

728 ... mean_arith=6, mean_geom=7, stddev=1) 

729 ... except ValueError as ve: 

730 ... print(ve) 

731 minimum<=mean_geom<=maximum must hold, but got 5, 7, and 6. 

732 """ 

733 super().__init__(n, minimum, mean_arith, maximum, stddev) 

734 

735 # check minimum 

736 median = try_int(median) 

737 if n == 1: 

738 if median != self.minimum: 

739 raise ValueError(f"median ({median}) must equal " 

740 f"minimum ({self.minimum}) if n=1.") 

741 elif median < self.minimum: 

742 raise ValueError(f"median ({median}) must be >= minimum (" 

743 f"{self.minimum}) if n>1.") 

744 

745 # check maximum 

746 if self.maximum < median: 

747 raise ValueError( 

748 f"maximum ({self.maximum}) must be >= med ({median}) if n>1.") 

749 

750 # check geometric mean 

751 if mean_geom is None: 

752 if self.minimum > 0: 

753 raise ValueError( 

754 f"If minimum ({self.minimum}) > 0, then mean_geom must be" 

755 f" defined, but it is {mean_geom}.") 

756 else: 

757 if self.minimum <= 0: 

758 raise ValueError( 

759 f"If minimum ({self.minimum}) <= 0, then mean_geom is " 

760 f"undefined, but it is {mean_geom}.") 

761 mean_geom = try_int(mean_geom) 

762 if n == 1: 

763 if mean_geom != self.minimum: 

764 raise ValueError(f"mean_geom ({mean_geom}) must equal " 

765 f"minimum ({self.minimum}) if n=1.") 

766 else: 

767 if not self.minimum <= mean_geom <= self.maximum: 

768 raise ValueError( 

769 "minimum<=mean_geom<=maximum must hold, but got " 

770 f"{self.minimum}, {mean_geom}, and {self.maximum}.") 

771 if mean_geom > self.mean_arith: 

772 raise ValueError( 

773 f"mean_geom ({mean_geom}) must be <= " 

774 f"mean_arith ({self.mean_arith}).") 

775 

776 object.__setattr__(self, "median", median) 

777 object.__setattr__(self, "mean_geom", mean_geom) 

778 

779 def __str__(self) -> str: 

780 """ 

781 Get a string representation of this object. 

782 

783 :returns: the string 

784 """ 

785 return CSV_SEPARATOR.join(map(str, ( 

786 self.n, self.minimum, self.median, self.mean_arith, 

787 self.mean_geom, self.maximum, self.stddev))) 

788 

789 def min_mean(self) -> int | float: 

790 """ 

791 Obtain the smallest of the three mean values. 

792 

793 :returns: the smallest of `mean_arith`, `mean_geom`, and `median` 

794 

795 >>> SampleStatistics(1, 0, 0.0, 0, None, 0.0, None).min_mean() 

796 0 

797 >>> SampleStatistics(2, 1, 2, 4.0, 3, 6, 0.2).min_mean() 

798 2 

799 >>> SampleStatistics(2, 1, 3.2, 4.0, 3, 6, 0.2).min_mean() 

800 3 

801 >>> SampleStatistics(2, 1, 5.2, 4.0, 3, 6, 0.2).min_mean() 

802 3 

803 """ 

804 if self.mean_geom is None: # geometric mean is always <= arithmean 

805 return min(self.mean_arith, self.median) 

806 return min(self.mean_geom, self.median) 

807 

808 def max_mean(self) -> int | float: 

809 """ 

810 Obtain the largest of the three mean values. 

811 

812 :returns: the largest of `mean_arith`, `mean_geom`, and `median` 

813 

814 >>> SampleStatistics(1, 0, 0.0, 0, None, 0.0, None).max_mean() 

815 0 

816 >>> SampleStatistics(2, 1, 2, 4.0, 3, 6, 0.2).max_mean() 

817 4 

818 >>> SampleStatistics(2, 1, 3.2, 4.0, 3, 6, 0.2).max_mean() 

819 4 

820 >>> SampleStatistics(2, 1, 5.2, 4.0, 3, 6, 0.2).max_mean() 

821 5.2 

822 """ 

823 return max(self.mean_arith, self.median) 

824 

825 def compact(self, needs_n: bool = True) \ 

826 -> "int | float | SampleStatistics": 

827 """ 

828 Try to represent this object as single number, if possible. 

829 

830 :param needs_n: if this is `True`, the default, then the object is 

831 only turned into a single number if alsp `n==1`. Otherwise, `n` 

832 is ignored 

833 :returns: an integer or float if this objects minimum equals its 

834 maximum, the object itself otherwise 

835 

836 >>> s = SampleStatistics.from_single_value(10, 1) 

837 >>> s.compact() == 10 

838 True 

839 >>> s.compact() == s.compact(True) 

840 True 

841 

842 >>> s = SampleStatistics.from_single_value(10, 2) 

843 >>> s.compact() is s 

844 True 

845 >>> s.compact() == s.compact(True) 

846 True 

847 

848 >>> s = SampleStatistics.from_single_value(10, 2) 

849 >>> s.compact(False) == 10 

850 True 

851 

852 >>> s = SampleStatistics(2, 1, 2, 4, 3, 5, 3) 

853 >>> s.compact() is s 

854 True 

855 

856 >>> s = SampleStatistics(2, 1, 2, 4, 3, 5, 3) 

857 >>> s.compact(False) is s 

858 True 

859 

860 >>> try: 

861 ... s.compact(1) 

862 ... except TypeError as te: 

863 ... print(te) 

864 needs_n should be an instance of bool but is int, namely 1. 

865 

866 >>> try: 

867 ... s.compact(None) 

868 ... except TypeError as te: 

869 ... print(te) 

870 needs_n should be an instance of bool but is None. 

871 """ 

872 if not isinstance(needs_n, bool): 

873 raise type_error(needs_n, "needs_n", bool) 

874 mi: Final[int | float] = self.minimum 

875 return self if (mi < self.maximum) or ( 

876 needs_n and (self.n > 1)) else mi 

877 

878 def _key(self) -> tuple[int | float, int | float, int | float, 

879 int | float, int | float, int | float, int, int]: 

880 r""" 

881 Get a comparison and hash key. 

882 

883 :returns: the comparison key 

884 

885 >>> SampleStatistics(2, 1, 2, 4.0, 3, 6, 0.2)._key() 

886 (1, 2, 4, 3, 6, 0.2, 2, 1) 

887 

888 >>> SampleStatistics(1, 0, 0, 0, None, 0, None)._key() 

889 (0, 0, 0, inf, 0, inf, 1, 1) 

890 """ 

891 return (self.minimum, self.median, self.mean_arith, 

892 inf if self.mean_geom is None else self.mean_geom, 

893 self.maximum, inf if self.stddev is None else self.stddev, 

894 self.n, 1) 

895 

896 def get_mean_geom(self) -> int | float | None: 

897 """ 

898 Get the geometric mean (:attr:`~SampleStatistics.mean_geom`). 

899 

900 :returns: the geometric mean (:attr:`~SampleStatistics.mean_geom`) of 

901 all the samples, `None` if the geometric mean is not defined. 

902 :raises TypeError: if an object of the wrong type is passed in as self 

903 

904 >>> SampleStatistics(5, 3, 5, 6, 4, 7, 2).get_mean_geom() 

905 4 

906 

907 >>> try: 

908 ... SampleStatistics.get_mean_geom(None) 

909 ... except TypeError as te: 

910 ... print(str(te)[:20]) 

911 self should be an in 

912 """ 

913 if not isinstance(self, SampleStatistics): 

914 raise type_error(self, "self", SampleStatistics) 

915 return self.mean_geom 

916 

917 def get_median(self) -> int | float: 

918 """ 

919 Get the :attr:`~SampleStatistics.median` of all the samples. 

920 

921 :returns: the :attr:`~SampleStatistics.median` of all the samples. 

922 :raises TypeError: if an object of the wrong type is passed in as self 

923 

924 >>> SampleStatistics(5, 3, 5, 6, 4, 7, 2).get_median() 

925 5 

926 

927 >>> try: 

928 ... SampleStatistics.get_median(None) 

929 ... except TypeError as te: 

930 ... print(str(te)[:20]) 

931 self should be an in 

932 """ 

933 if not isinstance(self, SampleStatistics): 

934 raise type_error(self, "self", SampleStatistics) 

935 return self.median 

936 

937 @classmethod 

938 def from_single_value(cls, value: Union[ 

939 int, float, "StreamStatistics"], n: int = 1) -> "SampleStatistics": 

940 r""" 

941 Create a sample statistics from a single number. 

942 

943 :param value: the single value 

944 :param n: the number of samples, i.e., the number of times this value 

945 occurred 

946 :returns: the sample statistics 

947 

948 >>> s = SampleStatistics.from_single_value(10, 2) 

949 >>> print(s.stddev) 

950 0 

951 >>> s.minimum == s.maximum == s.mean_arith == s.mean_geom \ 

952 ... == s.median == 10 

953 True 

954 >>> s is SampleStatistics.from_single_value(s, s.n) 

955 True 

956 

957 >>> s = SampleStatistics.from_single_value(10, 1) 

958 >>> print(s.stddev) 

959 None 

960 >>> s.minimum == s.maximum == s.mean_arith == s.mean_geom \ 

961 ... == s.median == 10 

962 True 

963 >>> s is SampleStatistics.from_single_value(s, s.n) 

964 True 

965 

966 >>> s = SampleStatistics.from_single_value(-10, 2) 

967 >>> print(s.stddev) 

968 0 

969 >>> s.minimum == s.maximum == s.mean_arith == s.median == -10 

970 True 

971 >>> print(s.mean_geom) 

972 None 

973 >>> s is SampleStatistics.from_single_value(s, s.n) 

974 True 

975 

976 >>> s = SampleStatistics.from_single_value(-10, 1) 

977 >>> print(s.stddev) 

978 None 

979 >>> s.minimum == s.maximum == s.mean_arith == s.median == -10 

980 True 

981 >>> print(s.mean_geom) 

982 None 

983 >>> s is SampleStatistics.from_single_value(s, s.n) 

984 True 

985 

986 >>> s = SampleStatistics.from_single_value(10.5, 2) 

987 >>> print(s.stddev) 

988 0 

989 >>> s.minimum == s.maximum == s.mean_arith == s.mean_geom \ 

990 ... == s.median == 10.5 

991 True 

992 >>> s is SampleStatistics.from_single_value(s, s.n) 

993 True 

994 

995 >>> s = SampleStatistics.from_single_value(10.5, 1) 

996 >>> print(s.stddev) 

997 None 

998 >>> s.minimum == s.maximum == s.mean_arith == s.mean_geom \ 

999 ... == s.median == 10.5 

1000 True 

1001 >>> s is SampleStatistics.from_single_value(s, s.n) 

1002 True 

1003 

1004 >>> s = SampleStatistics.from_single_value(-10.5, 2) 

1005 >>> print(s.stddev) 

1006 0 

1007 >>> s.minimum == s.maximum == s.mean_arith == s.median == -10.5 

1008 True 

1009 >>> print(s.mean_geom) 

1010 None 

1011 >>> s is SampleStatistics.from_single_value(s, s.n) 

1012 True 

1013 

1014 >>> s = SampleStatistics.from_single_value(-10.5, 1) 

1015 >>> print(s.stddev) 

1016 None 

1017 >>> s.minimum == s.maximum == s.mean_arith == s.median == -10.5 

1018 True 

1019 >>> print(s.mean_geom) 

1020 None 

1021 >>> s is SampleStatistics.from_single_value(s, s.n) 

1022 True 

1023 

1024 >>> print(SampleStatistics.from_single_value( 

1025 ... StreamStatistics(5, 1, 1, 1, 0), 5)) 

1026 5;1;1;1;1;1;0 

1027 

1028 >>> try: 

1029 ... SampleStatistics.from_single_value(StreamStatistics( 

1030 ... 5, 1, 2, 3, 5), 5) 

1031 ... except ValueError as ve: 

1032 ... print(ve) 

1033 Cannot create SampleStatistics from 5;1;2;3;5. 

1034 

1035 >>> try: 

1036 ... SampleStatistics.from_single_value(None) 

1037 ... except TypeError as te: 

1038 ... print(str(te)[:20]) 

1039 value should be an i 

1040 

1041 >>> try: 

1042 ... SampleStatistics.from_single_value("a") 

1043 ... except TypeError as te: 

1044 ... print(str(te)[:20]) 

1045 value should be an i 

1046 

1047 >>> try: 

1048 ... SampleStatistics.from_single_value(1, None) 

1049 ... except TypeError as te: 

1050 ... print(str(te)[:20]) 

1051 n should be an insta 

1052 

1053 >>> try: 

1054 ... SampleStatistics.from_single_value(1, "a") 

1055 ... except TypeError as te: 

1056 ... print(str(te)[:20]) 

1057 n should be an insta 

1058 

1059 >>> try: 

1060 ... SampleStatistics.from_single_value(s, 12) 

1061 ... except ValueError as ve: 

1062 ... print(str(ve)[:20]) 

1063 Incompatible numbers 

1064 

1065 >>> try: 

1066 ... SampleStatistics.from_single_value(inf) 

1067 ... except ValueError as ve: 

1068 ... print(str(ve)[:20]) 

1069 value=inf is not fin 

1070 """ 

1071 n = check_int_range(n, "n", 1, 1_000_000_000_000_000_000) 

1072 

1073 if isinstance(value, StreamStatistics): 

1074 if value.n != n: 

1075 raise ValueError( # noqa: TRY004 

1076 f"Incompatible numbers of values {n} and {value}.") 

1077 if isinstance(value, SampleStatistics): 

1078 return value 

1079 if value.maximum != value.minimum: 

1080 raise ValueError( 

1081 f"Cannot create SampleStatistics from {value}.") 

1082 value = value.maximum 

1083 if not isinstance(value, int | float): 

1084 raise type_error(value, "value", (int, float, SampleStatistics)) 

1085 if not isfinite(value): 

1086 raise ValueError(f"value={value} is not finite.") 

1087 return SampleStatistics( 

1088 n=n, minimum=value, median=value, mean_arith=value, 

1089 mean_geom=None if value <= 0 else value, maximum=value, 

1090 stddev=None if n <= 1 else 0) 

1091 

1092 @classmethod 

1093 def aggregate(cls) -> StreamStatisticsAggregate["SampleStatistics"]: 

1094 """ 

1095 Get an aggregate suitable for this statistics type. 

1096 

1097 :return: the aggregate 

1098 

1099 >>> ag = SampleStatistics.aggregate() 

1100 >>> ag.update((1, 2, 3, 4)) 

1101 >>> ag.result() 

1102 SampleStatistics(n=4, minimum=1, mean_arith=2.5, maximum=4, \ 

1103stddev=1.2909944487358056, median=2.5, mean_geom=2.213363839400643) 

1104 >>> ag.reset() 

1105 >>> ag.add(4) 

1106 >>> ag.add(5) 

1107 >>> ag.add(6) 

1108 >>> ag.add(7) 

1109 >>> ag.result() 

1110 SampleStatistics(n=4, minimum=4, mean_arith=5.5, maximum=7, \ 

1111stddev=1.2909944487358056, median=5.5, mean_geom=5.383563270955295) 

1112 """ 

1113 return _SampleStats() 

1114 

1115 @classmethod 

1116 def from_samples(cls, source: Iterable[ 

1117 int | float | None]) -> "SampleStatistics": 

1118 """ 

1119 Create a statistics object from an iterable of integers or floats. 

1120 

1121 As bottom line, this function will forward computations to the 

1122 :mod:`statistics` routines that ship with Python if nothing else works. 

1123 However, sometimes, something else may work: In particular, if the data 

1124 consists of only integers. In this case, it just might be possible to 

1125 compute the statistics very accurately with integer precision, where 

1126 possible. Also, otherwise, we can often accummulate the data using 

1127 instances of :class:`fractions.Fraction`. Indeed, even the 

1128 :mod:`statistics` routines may do this, but they convert to `float` in 

1129 cases of non-1 denominators, even if the integer presentation was much 

1130 more accurate. 

1131 

1132 :param source: the source 

1133 :returns: a statistics representing the statistics over `source` 

1134 

1135 >>> s = SampleStatistics.from_samples([0.0]) 

1136 >>> s.n 

1137 1 

1138 >>> s.minimum 

1139 0 

1140 >>> s.maximum 

1141 0 

1142 >>> print(s.mean_geom) 

1143 None 

1144 >>> s.median 

1145 0 

1146 >>> print(s.stddev) 

1147 None 

1148 

1149 >>> s = SampleStatistics.from_samples([1.0]) 

1150 >>> s.n 

1151 1 

1152 >>> s.minimum 

1153 1 

1154 >>> s.maximum 

1155 1 

1156 >>> print(s.mean_geom) 

1157 1 

1158 >>> s.median 

1159 1 

1160 >>> print(s.stddev) 

1161 None 

1162 

1163 >>> s = SampleStatistics.from_samples([1.0, 1]) 

1164 >>> s.n 

1165 2 

1166 >>> s.minimum 

1167 1 

1168 >>> s.maximum 

1169 1 

1170 >>> print(s.mean_geom) 

1171 1 

1172 >>> s.median 

1173 1 

1174 >>> print(s.stddev) 

1175 0 

1176 

1177 >>> s = SampleStatistics.from_samples([0, 0.0]) 

1178 >>> s.n 

1179 2 

1180 >>> s.minimum 

1181 0 

1182 >>> s.maximum 

1183 0 

1184 >>> print(s.mean_geom) 

1185 None 

1186 >>> s.median 

1187 0 

1188 >>> print(s.stddev) 

1189 0 

1190 

1191 >>> from statistics import stdev as stat_stddev 

1192 >>> dd = [1.5, 2.5] 

1193 >>> s = SampleStatistics.from_samples(dd) 

1194 >>> s.n 

1195 2 

1196 >>> s.minimum 

1197 1.5 

1198 >>> s.maximum 

1199 2.5 

1200 >>> print(s.mean_geom) 

1201 1.9364916731037085 

1202 >>> stat_geomean(dd) 

1203 1.9364916731037085 

1204 >>> s.median 

1205 2 

1206 >>> print(s.stddev) 

1207 0.7071067811865476 

1208 >>> stat_stddev(dd) 

1209 0.7071067811865476 

1210 

1211 >>> dd = [1.0, 2.0] 

1212 >>> s = SampleStatistics.from_samples(dd) 

1213 >>> s.n 

1214 2 

1215 >>> s.minimum 

1216 1 

1217 >>> s.maximum 

1218 2 

1219 >>> print(s.mean_geom) 

1220 1.4142135623730951 

1221 >>> (1 * 2) ** 0.5 

1222 1.4142135623730951 

1223 >>> stat_geomean(dd) 

1224 1.414213562373095 

1225 >>> s.median 

1226 1.5 

1227 >>> print(s.stddev) 

1228 0.7071067811865476 

1229 >>> stat_stddev(dd) 

1230 0.7071067811865476 

1231 

1232 >>> dd = [1.0, 2.0, 3.0] 

1233 >>> s = SampleStatistics.from_samples(dd) 

1234 >>> s.n 

1235 3 

1236 >>> s.minimum 

1237 1 

1238 >>> s.maximum 

1239 3 

1240 >>> print(s.mean_geom) 

1241 1.8171205928321397 

1242 >>> (1 * 2 * 3) ** (1 / 3) 

1243 1.8171205928321397 

1244 >>> stat_geomean(dd) 

1245 1.8171205928321397 

1246 >>> s.median 

1247 2 

1248 >>> print(s.stddev) 

1249 1 

1250 >>> stat_stddev(dd) 

1251 1.0 

1252 

1253 >>> dd = [1.0, 0, 3.0] 

1254 >>> s = SampleStatistics.from_samples(dd) 

1255 >>> s.n 

1256 3 

1257 >>> s.minimum 

1258 0 

1259 >>> s.maximum 

1260 3 

1261 >>> print(s.mean_geom) 

1262 None 

1263 >>> s.median 

1264 1 

1265 >>> print(s.stddev) 

1266 1.5275252316519468 

1267 >>> stat_stddev(dd) 

1268 1.5275252316519468 

1269 

1270 >>> dd = [1.0, -2, 3.0] 

1271 >>> s = SampleStatistics.from_samples(dd) 

1272 >>> s.n 

1273 3 

1274 >>> s.minimum 

1275 -2 

1276 >>> s.maximum 

1277 3 

1278 >>> print(s.mean_geom) 

1279 None 

1280 >>> s.median 

1281 1 

1282 >>> print(s.stddev) 

1283 2.516611478423583 

1284 >>> stat_stddev(dd) 

1285 2.516611478423583 

1286 

1287 >>> dd = [1e5, 2e7, 3e9] 

1288 >>> s = SampleStatistics.from_samples(dd) 

1289 >>> s.n 

1290 3 

1291 >>> s.minimum 

1292 100000 

1293 >>> s.maximum 

1294 3000000000 

1295 >>> print(s.mean_geom) 

1296 18171205.928321395 

1297 >>> (100000 * 20000000 * 3000000000) ** (1 / 3) 

1298 18171205.92832138 

1299 >>> 100000 * (((100000 // 100000) * (20000000 // 100000) * ( 

1300 ... 3000000000 // 100000)) ** (1 / 3)) 

1301 18171205.92832139 

1302 >>> print(s.mean_geom ** 3) 

1303 5.999999999999999e+21 

1304 >>> print(18171205.92832139 ** 3) 

1305 5.999999999999995e+21 

1306 >>> s.median 

1307 20000000 

1308 >>> print(s.stddev) 

1309 1726277112.7487035 

1310 >>> stat_stddev(dd) 

1311 1726277112.7487035 

1312 

1313 >>> dd = [3.3, 2.5, 3.7, 4.9] 

1314 >>> s = SampleStatistics.from_samples(dd) 

1315 >>> s.n 

1316 4 

1317 >>> s.minimum 

1318 2.5 

1319 >>> s.maximum 

1320 4.9 

1321 >>> print(s.mean_geom) 

1322 3.4971393519216964 

1323 >>> 3.4971393519216964 ** 4 

1324 149.5725 

1325 >>> (3.3 * 2.5 * 3.7 * 4.9) ** 0.25 

1326 3.497139351921697 

1327 >>> s.median 

1328 3.5 

1329 >>> s.stddev 

1330 1.0000000000000002 

1331 >>> stat_stddev(dd) 

1332 1.0000000000000002 

1333 

1334 >>> dd = [3, 1, 2, 5] 

1335 >>> s = SampleStatistics.from_samples(dd) 

1336 >>> print(s.minimum) 

1337 1 

1338 >>> print(s.maximum) 

1339 5 

1340 >>> print(s.mean_arith) 

1341 2.75 

1342 >>> print(s.median) 

1343 2.5 

1344 >>> print(f"{s.mean_geom:.4f}") 

1345 2.3403 

1346 >>> print(f"{s.min_mean():.4f}") 

1347 2.3403 

1348 >>> print(f"{s.max_mean()}") 

1349 2.75 

1350 

1351 >>> dd = [8, 8, 8, 8, 9, 10, 10, 11, 11, 12, 12, 12, 12, 13, 

1352 ... 13, 13, 14, 14, 14, 15, 15, 15, 15, 15, 15, 16, 16, 16] 

1353 >>> s = SampleStatistics.from_samples(dd) 

1354 >>> print(s.minimum) 

1355 8 

1356 >>> print(s.maximum) 

1357 16 

1358 >>> print(s.mean_arith) 

1359 12.5 

1360 >>> print(s.median) 

1361 13 

1362 >>> print(s.mean_geom) 

1363 12.197150265022891 

1364 >>> stat_geomean(dd) 

1365 12.19715026502289 

1366 >>> print(s.stddev) 

1367 2.673602092336881 

1368 >>> stat_stddev(dd) 

1369 2.673602092336881 

1370 

1371 >>> dd = [3, 4, 7, 14, 15, 16, 26, 28, 29, 30, 31, 31] 

1372 >>> s = SampleStatistics.from_samples(dd) 

1373 >>> print(s.minimum) 

1374 3 

1375 >>> print(s.maximum) 

1376 31 

1377 >>> print(s.mean_arith) 

1378 19.5 

1379 >>> print(s.median) 

1380 21 

1381 

1382 >>> print(s.mean_geom) 

1383 15.354984483655892 

1384 >>> stat_geomean(dd) 

1385 15.354984483655894 

1386 >>> k = 1 

1387 >>> for i in dd: 

1388 ... k *= i 

1389 >>> k 

1390 171787904870400 

1391 >>> len(dd) 

1392 12 

1393 >>> k ** (1 / 12) 

1394 15.354984483655889 

1395 >>> 15.354984483655889 ** 12 

1396 171787904870399.62 

1397 >>> 15.354984483655894 ** 12 

1398 171787904870400.34 

1399 >>> 15.354984483655892 ** 12 

1400 171787904870400.1 

1401 

1402 >>> print(s.stddev) 

1403 10.917042556563484 

1404 >>> print(str(stat_stddev(dd))[:-1]) 

1405 10.91704255656348 

1406 

1407 >>> dd = [375977836981734264856247621159545315, 

1408 ... 1041417453269301410322718941408784761, 

1409 ... 2109650311556162106262064987699051941] 

1410 >>> s = SampleStatistics.from_samples(dd) 

1411 >>> print(s.minimum) 

1412 375977836981734264856247621159545315 

1413 >>> print(s.maximum) 

1414 2109650311556162106262064987699051941 

1415 >>> print(s.mean_arith) 

1416 1175681867269065927147010516755794006 

1417 >>> stat_mean(dd) 

1418 1.1756818672690659e+36 

1419 >>> print(s.median) 

1420 1041417453269301410322718941408784761 

1421 

1422 >>> print(s.mean_geom) 

1423 938280139276529201997232316081385153 

1424 >>> stat_geomean(dd) 

1425 9.38280139276522e+35 

1426 

1427 >>> str(dd[0] * dd[1] * dd[2])[:60] 

1428 '826033329443972563356247815302467930409182372405786485790679' 

1429 >>> str(938280139276529201997232316081385153 ** 3)[:60] 

1430 '826033329443972563356247815302467929164458081790138679285598' 

1431 >>> str(int(9.38280139276522e+35) ** 3)[:60] 

1432 '826033329443953666416831847378532327244986484162191539691938' 

1433 

1434 >>> print(s.stddev) 

1435 874600058269081159245960567663054887 

1436 >>> stat_stddev(dd) 

1437 8.746000582690812e+35 

1438 

1439 >>> dd = [104275295274308290135253194482044160663473778025704, 

1440 ... 436826861307375084714000787588311944456580437896461, 

1441 ... 482178404791292289021955619498303854464057392180997, 

1442 ... 521745351662201002493923306143082542601267608373030, 

1443 ... 676289718505789968602970820038005797309334755525626] 

1444 >>> s = SampleStatistics.from_samples(dd) 

1445 >>> print(s.minimum) 

1446 104275295274308290135253194482044160663473778025704 

1447 >>> print(s.maximum) 

1448 676289718505789968602970820038005797309334755525626 

1449 >>> print(s.mean_arith) 

1450 444263126308193326993620745549949659898942794400364 

1451 >>> stat_mean(dd) 

1452 4.442631263081933e+50 

1453 >>> print(s.median) 

1454 482178404791292289021955619498303854464057392180997 

1455 

1456 >>> print(s.mean_geom) 

1457 378318848166864995660791573439112525534046591591759 

1458 >>> stat_geomean(dd) 

1459 3.78318848166862e+50 

1460 

1461 >>> print(s.stddev) 

1462 210311926886813737006941586539087921260462032505870 

1463 >>> stat_stddev(dd) 

1464 2.1031192688681374e+50 

1465 

1466 >>> dd = [4, 5, 5, 6, 6, 6, 6, 6, 8, 8] 

1467 >>> s = SampleStatistics.from_samples(dd) 

1468 >>> print(s.mean_geom) 

1469 5.884283961687533 

1470 >>> print(stat_geomean(dd)) 

1471 5.884283961687533 

1472 

1473 >>> dd = [4, 4, 4, 5, 5, 8] 

1474 >>> s = SampleStatistics.from_samples(dd) 

1475 >>> print(s.mean_geom) 

1476 4.836542350243914 

1477 >>> print(stat_geomean(dd)) 

1478 4.8365423502439135 

1479 

1480 >>> dd = [2, 8, 11, 17, 26, 30, 32] 

1481 >>> s = SampleStatistics.from_samples(dd) 

1482 >>> print(s.mean_geom) 

1483 13.327348017053906 

1484 >>> print(stat_geomean(dd)) 

1485 13.327348017053906 

1486 

1487 >>> dd = [2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4] 

1488 >>> s = SampleStatistics.from_samples(dd) 

1489 >>> print(s.mean_geom) 

1490 3.4710522375429465 

1491 >>> print(stat_geomean(dd)) 

1492 3.471052237542947 

1493 

1494 >>> dd = [3, 4, 4, 5, 6, 8, 8, 8, 8] 

1495 >>> s = SampleStatistics.from_samples(dd) 

1496 >>> print(s.mean_geom) 

1497 5.653305998922543 

1498 >>> print(stat_geomean(dd)) 

1499 5.653305998922543 

1500 

1501 >>> dd = [16, 17, 19, 20, 20, 21, 22, 23, 24, 24, 25, 26, 29, 31, 

1502 ... 31, 31, 32, 32, 32] 

1503 >>> s = SampleStatistics.from_samples(dd) 

1504 >>> print(s.mean_geom) 

1505 24.419566831650357 

1506 >>> print(stat_geomean(dd)) 

1507 24.41956683165036 

1508 

1509 >>> dd = [66, 68, 69, 70, 72, 73, 73, 79, 81, 87, 94, 99, 100, 

1510 ... 102, 103, 112, 118, 119, 123, 123] 

1511 >>> s = SampleStatistics.from_samples(dd) 

1512 >>> print(s.mean_geom) 

1513 89.45680043258344 

1514 >>> print(stat_geomean(dd)) 

1515 89.45680043258346 

1516 

1517 >>> dd = [44, 63, 63, 68, 68, 68, 70, 74, 74, 80, 95, 108, 110, 128] 

1518 >>> s = SampleStatistics.from_samples(dd) 

1519 >>> print(s.mean_geom) 

1520 76.68646417360762 

1521 >>> print(stat_geomean(dd)) 

1522 76.68646417360763 

1523 

1524 >>> try: 

1525 ... SampleStatistics.from_samples(None) 

1526 ... except TypeError as te: 

1527 ... print(te) 

1528 source should be an instance of typing.Iterable but is None. 

1529 

1530 >>> SampleStatistics.from_samples((int("3432135447287235494201\ 

153193506618248802478442\ 

1532545733127827402743350092428341563721880022852900744775368104117201410\ 

153341"), int("4543178800835483269512609282884075126142677531600199807725\ 

15340558561959304806690567285991174956892786401583087254156"), int("35473\ 

1535203294104466229269097724582630304968924904656920211268628173495602053\ 

1536843032960943121516556362641127137000879"))).mean_arith 

1537 38408781925110551288804847071749420604746651597990567009597840581\ 

1538565913672301929416406528849308895284373981465359 

1539 

1540 Corner cases where the standard deviation resulting from compact 

1541 fractions deviates from the standard deviation resulting from 

1542 normalized fractions: 

1543 

1544 >>> dd = [-7.737125245533627e+25] * 28 

1545 >>> dd[2] = -7.737125245533626e+25 

1546 >>> s = SampleStatistics.from_samples(dd) 

1547 >>> s.stddev 

1548 1623345050.6245058 

1549 >>> stat_stddev(dd) 

1550 1623345050.6245058 

1551 >>> ddx = tuple(map(_to_frac, dd)) 

1552 >>> ds = sum(ddx) 

1553 >>> dss = sum(ddy * ddy for ddy in ddx) 

1554 >>> from math import sqrt 

1555 >>> sqrt((dss - (ds * ds / 28)) / 27) 

1556 1623345050.6245055 

1557 

1558 Here the standard deviation becomes meaningless. 

1559 If you compute it based on converting all values to floats, you get 

1560 something like 0.435. 

1561 You get the same result if you represent all values directly as 

1562 Fractions. 

1563 However, if you represent the float values as more compact Fractions, 

1564 i.e., as Fractions that map to the exactly same floats but have smaller 

1565 denominators, you get a standard deviation of 9.32+64. 

1566 Basically, the difference is 65 orders of magnitude. 

1567 But the source numbers would be exactly the same... 

1568 The reason is the limited range of floats. 

1569 

1570 >>> dd = (7.588550360256754e+81, int("75885503602567541832791480735\ 

1571293707\ 

157229071901715047420004889892225542594864082845697"), int("758855036025675418327\ 

15739148073529370729071901715047420004889892225542594864082845697"), \ 

15747.588550360256754e+81, 7.588550360256754e+81, 7.588550360256754e+81, \ 

1575int("7588550360256754183279148073529370729071901715047420004889892225\ 

1576542594864082845696"), 7.588550360256754e+81, 7.588550360256754e+81, \ 

15777.588550360256754e+81, 7.588550360256754e+81, int("758855036025675418\ 

15783279148073529370729071901715047420004889892225542594864082845696"), int("7588\ 

157955036025675418327914807352937072907190171504742000488989222554259486408284569\ 

15807"), int("7588550360256754183279148073529370729071901715047420004889892225542\ 

1581594864082845696"), int("75885503602567541832791480735293707290719017150474200\ 

158204889892225542594864082845696"), int("758855036025675418327914807352937072907\ 

15831901715047420004889892225542594864082845697"), 7.588550360256754e+81,\ 

1584int("7588550360256754183279148073529370729071901715047420004889892225\ 

1585542594864082845697"), int("75885503602567541832791480735293707290719017150474\ 

158620004889892225542594864082845697"), int("758855036025675418327914807352937072\ 

15879071901715047420004889892225542594864082845697"), 7.588550360256754e+81, \ 

1588int("7588550360256754183279148073529370729071901715047420004889892225\ 

1589542594864082845696"), int("75885503602567541832791480735293707290719017150474\ 

159020004889892225542594864082845696"), 7.588550360256754e+81, \ 

15917.588550360256754e+81, int("75885503602567541832791480735293707290719\ 

159201715047420004889892225542594864082845696"), 7.588550360256754e+81, \ 

15937.588550360256754e+81, 7.588550360256754e+81) 

1594 >>> s = SampleStatistics.from_samples(dd) 

1595 >>> s.stddev 

1596 0.4354941703556927 

1597 >>> stat_stddev(dd) 

1598 0.4354941703556927 

1599 >>> ddx = tuple(map(_to_frac, dd)) 

1600 >>> ds = sum(ddx) 

1601 >>> dss = sum(ddy * ddy for ddy in ddx) 

1602 >>> _limited_root((dss - (ds * ds / len(dd))) / (len(dd) - 1), 2) 

1603 93206175962530968626911348905791729797971161757128018983942059951 

1604 >>> ddx = tuple(map(Fraction, dd)) 

1605 >>> ds = sum(ddx) 

1606 >>> dss = sum(ddy * ddy for ddy in ddx) 

1607 >>> _limited_root((dss - (ds * ds / len(dd))) / (len(dd) - 1), 2) 

1608 0.4354941703556927 

1609 

1610 >>> try: 

1611 ... SampleStatistics.from_samples(1) 

1612 ... except TypeError as te: 

1613 ... print(te) 

1614 source should be an instance of typing.Iterable but is int, namely 1. 

1615 

1616 >>> try: 

1617 ... SampleStatistics.from_samples([]) 

1618 ... except ValueError as ve: 

1619 ... print(ve) 

1620 Data source cannot be empty. 

1621 """ 

1622 if not isinstance(source, Iterable): 

1623 raise type_error(source, "source", Iterable) 

1624 

1625 # The median function of statistics would do this anyway, so we may as 

1626 # well do it now. 

1627 data: Final[list[int | float]] = sorted(map(try_int, ( 

1628 xs for xs in source if xs is not None))) 

1629 n: Final[int] = list.__len__(data) 

1630 if n <= 0: 

1631 raise ValueError("Data source cannot be empty.") 

1632 

1633 minimum: int | float = data[0] # because data is now sorted 

1634 maximum: int | float = data[-1] # because data is now sorted 

1635 if (minimum >= maximum) or (n <= 1): # all data is the same 

1636 return SampleStatistics.from_single_value(minimum, n) 

1637 

1638 # Compute the median. 

1639 middle: Final[int] = n >> 1 

1640 median: Final[int | float] = data[middle] if (n & 1) == 1 else ( 

1641 _mean_of_two(data[middle - 1], data[middle])) 

1642 

1643 # Is it possible, at this stage, that all data are integers? 

1644 can_int: bool = isinstance(minimum, int) and isinstance(maximum, int) 

1645 

1646 # If we have only two numbers, we also already have the mean. 

1647 # Otherwise, if we have only integer data so far and we know that 

1648 # regardless how we dice it, the sum of the data will never exceed 

1649 # the range in which floats can accurately represent integers, then 

1650 # we also know that we can compute the arithmetic mean exactly. 

1651 mean_arith: int | float | None = median if n <= 2 else ( 

1652 try_int(stat_mean(data)) if can_int and ( 

1653 (n * (1 + max(maximum, 0) - min(minimum, 0))) 

1654 < _DBL_INT_LIMIT_P_I) else None) 

1655 mean_arith_frac: Fraction | None = None 

1656 mean_geom: int | float | None = None # don't know the geometric mean 

1657 # Go over the data once and see if we can treat it as all-integer. 

1658 # If yes, then we can compute some statistics very precisely. 

1659 # are all values integers? 

1660 int_sum: int = 0 # the integer sum (for mean, stddev) 

1661 int_sum_sqr: int = 0 # the sum of squares (for stddev) 

1662 int_sum_sqr_2: int = 0 # the sum of squares (for stddev) 

1663 int_prod: int = 1 # the integer product (for geom_mean) 

1664 frac_sum: Fraction = _FRAC_0 

1665 frac_sum_sqr: Fraction = frac_sum 

1666 frac_prod: Fraction = _FRAC_1 

1667 

1668 # The following is *only* used if we have *only* integer data. 

1669 # stddev((a, b, ...)) = stddev((a-x, b-x, ...)) 

1670 # If we can shift the whole data such that its center is around 0, 

1671 # then the difference that we have to add up become smaller, and thus 

1672 # the floating point arithmetic that we may need to use becomes more 

1673 # accurate. If we know the mean, then shifting the data by the mean 

1674 # will lead to the smallest sum of deviations. If we know only the 

1675 # median, then this is better than nothing. 

1676 shift: Final[int] = int(median) if mean_arith is None \ 

1677 else (mean_arith if isinstance(mean_arith, int) 

1678 else round(mean_arith)) 

1679 

1680 for ii, ee in enumerate(data): # iterate over all data 

1681 if can_int and (not isinstance(ee, int)): 

1682 frac_sum = Fraction(int_sum + ii * shift) 

1683 frac_sum_sqr = Fraction(int_sum_sqr_2) 

1684 frac_prod = Fraction(int_prod) 

1685 can_int = False 

1686 if can_int: # == ee must be int 

1687 int_sum_sqr_2 += ee * ee # type: ignore 

1688 int_prod *= ee # type: ignore 

1689 e: int = ee - shift # type: ignore 

1690 int_sum += e # so we can sum exactly 

1691 int_sum_sqr += e * e # and compute the sum of squares 

1692 else: 

1693 eef = Fraction(ee) 

1694 frac_sum += eef 

1695 frac_sum_sqr += eef * eef 

1696 frac_prod *= eef 

1697 

1698 if n > 2: # mean_arith is None or an approximation 

1699 mean_arith_frac = (Fraction(int_sum, n) + shift) \ 

1700 if can_int else (frac_sum / n) 

1701 mean_arith = _from_frac(mean_arith_frac) 

1702 stddev: Final[int | float] = _limited_root(((int_sum_sqr - Fraction( 

1703 int_sum * int_sum, n)) if can_int else (frac_sum_sqr - ( 

1704 frac_sum * frac_sum / n))) / (n - 1), 2) 

1705 

1706 if minimum > 0: # geometric mean only defined for all-positive 

1707 if can_int: 

1708 frac_prod = Fraction(int_prod) 

1709 # # mean_geom always <= mean_arith 

1710 mean_geom = _limited_root( 

1711 frac_prod, n, _to_frac(minimum), min( 

1712 _to_frac(maximum), (Fraction(mean_arith) if isinstance( 

1713 mean_arith, int) else Fraction(nextafter( 

1714 mean_arith, inf))) if (mean_arith_frac is None) 

1715 else mean_arith_frac)) 

1716 

1717 if (mean_geom is None) and (minimum > 0): 

1718 mean_geom = stat_geomean(data) 

1719 

1720 if mean_geom is not None: 

1721 # Deal with errors that may have arisen due to 

1722 # numerical imprecision. 

1723 if mean_geom < minimum: 

1724 if _almost_le(minimum, mean_geom): 

1725 mean_geom = minimum 

1726 else: 

1727 raise ValueError( 

1728 f"mean_geom={mean_geom} but min={minimum}") 

1729 if mean_arith < mean_geom: 

1730 if _almost_le(mean_geom, mean_arith): 

1731 mean_geom = mean_arith 

1732 else: 

1733 raise ValueError( 

1734 f"mean_geom={mean_geom} but mean_arith={mean_arith}") 

1735 

1736 return SampleStatistics(minimum=minimum, median=median, 

1737 mean_arith=mean_arith, mean_geom=mean_geom, 

1738 maximum=maximum, stddev=stddev, n=n) 

1739 

1740 

1741class CsvReader(CsvReaderBase[SampleStatistics]): 

1742 """ 

1743 A csv parser for sample statistics. 

1744 

1745 >>> from pycommons.io.csv import csv_read 

1746 >>> csv = ["n;min;mean;med;geom;max;sd", 

1747 ... "3;2;3;4;3;10;5", "6;2;;;;;0", "1;;;2;;;", "3;;;;;0;", 

1748 ... "4;5;12;32;11;33;7"] 

1749 >>> for p in csv_read(csv, CsvReader, CsvReader.parse_row): 

1750 ... print(p) 

1751 3;2;4;3;3;10;5 

1752 6;2;2;2;2;2;0 

1753 1;2;2;2;2;2;None 

1754 3;0;0;0;None;0;0 

1755 4;5;32;12;11;33;7 

1756 

1757 >>> csv = ["value", "1", "3", "0", "-5", "7"] 

1758 >>> for p in csv_read(csv, CsvReader, CsvReader.parse_row): 

1759 ... print(p) 

1760 1;1;1;1;1;1;None 

1761 1;3;3;3;3;3;None 

1762 1;0;0;0;None;0;None 

1763 1;-5;-5;-5;None;-5;None 

1764 1;7;7;7;7;7;None 

1765 

1766 >>> csv = ["n;m;sd", "1;3;", "3;5;0"] 

1767 >>> for p in csv_read(csv, CsvReader, CsvReader.parse_row): 

1768 ... print(p) 

1769 1;3;3;3;3;3;None 

1770 3;5;5;5;5;5;0 

1771 

1772 >>> csv = ["n;m", "1;3", "3;5"] 

1773 >>> for p in csv_read(csv, CsvReader, CsvReader.parse_row): 

1774 ... print(p) 

1775 1;3;3;3;3;3;None 

1776 3;5;5;5;5;5;0 

1777 """ 

1778 

1779 def __init__(self, columns: dict[str, int]) -> None: 

1780 """ 

1781 Create a CSV parser for :class:`SampleStatistics`. 

1782 

1783 :param columns: the columns 

1784 

1785 >>> try: 

1786 ... CsvReader(None) 

1787 ... except TypeError as te: 

1788 ... print(te) 

1789 columns should be an instance of dict but is None. 

1790 

1791 >>> try: 

1792 ... CsvReader(1) 

1793 ... except TypeError as te: 

1794 ... print(te) 

1795 columns should be an instance of dict but is int, namely 1. 

1796 

1797 >>> try: 

1798 ... CsvReader(dict()) 

1799 ... except ValueError as ve: 

1800 ... print(ve) 

1801 No useful keys remain in {}. 

1802 

1803 >>> try: 

1804 ... CsvReader({"a": 1, "b": 2}) 

1805 ... except ValueError as ve: 

1806 ... print(ve) 

1807 No useful keys remain in {'a': 1, 'b': 2}. 

1808 

1809 >>> try: 

1810 ... CsvReader({KEY_N: 1, "b": 2, "c": 3}) 

1811 ... except ValueError as ve: 

1812 ... print(ve) 

1813 No useful keys remain in {'b': 2, 'c': 3}. 

1814 

1815 >>> try: 

1816 ... CsvReader({KEY_MINIMUM: 1, "b": 2, "c": 3}) 

1817 ... except ValueError as ve: 

1818 ... print(ve) 

1819 Found strange keys in {'b': 2, 'c': 3}. 

1820 """ 

1821 super().__init__(columns) 

1822 

1823 #: the index of the number of elements 

1824 self.idx_n: Final[int | None] = csv_column_or_none( 

1825 columns, KEY_N) 

1826 

1827 has: int = 0 

1828 has_idx: int = -1 

1829 

1830 #: the index of the minimum 

1831 self.__idx_min: int | None = csv_column_or_none( 

1832 columns, KEY_MINIMUM) 

1833 if self.__idx_min is not None: 

1834 has += 1 

1835 has_idx = self.__idx_min 

1836 

1837 #: the index for the arithmetic mean 

1838 self.__idx_mean_arith: int | None = csv_column_or_none( 

1839 columns, KEY_MEAN_ARITH) 

1840 if self.__idx_mean_arith is not None: 

1841 has += 1 

1842 has_idx = self.__idx_mean_arith 

1843 

1844 #: the index for the median 

1845 self.__idx_median: int | None = csv_column_or_none( 

1846 columns, KEY_MEDIAN) 

1847 if self.__idx_median is not None: 

1848 has += 1 

1849 has_idx = self.__idx_median 

1850 

1851 #: the index for the geometric mean 

1852 self.__idx_mean_geom: int | None = csv_column_or_none( 

1853 columns, KEY_MEAN_GEOM) 

1854 if self.__idx_mean_geom is not None: 

1855 has += 1 

1856 has_idx = self.__idx_mean_geom 

1857 

1858 #: the index for the maximum 

1859 self.__idx_max: int | None = csv_column_or_none( 

1860 columns, KEY_MAXIMUM) 

1861 if self.__idx_max is not None: 

1862 has += 1 

1863 has_idx = self.__idx_max 

1864 

1865 #: the index for the standard deviation 

1866 self.__idx_sd: Final[int | None] = csv_column_or_none( 

1867 columns, KEY_STDDEV) 

1868 

1869 if has <= 0: 

1870 if dict.__len__(columns) == 1: 

1871 self.__idx_min = has_idx = csv_column( 

1872 columns, next(iter(columns.keys())), True) 

1873 has = 1 

1874 else: 

1875 raise ValueError(f"No useful keys remain in {columns!r}.") 

1876 if dict.__len__(columns) > 1: 

1877 raise ValueError(f"Found strange keys in {columns!r}.") 

1878 

1879 #: is this a parser for single number statistics? 

1880 self.__is_single: Final[bool] = (self.__idx_sd is None) and (has == 1) 

1881 

1882 if self.__is_single: 

1883 self.__idx_min = self.__idx_max = self.__idx_median \ 

1884 = self.__idx_mean_arith = has_idx 

1885 

1886 def parse_row(self, data: list[str]) -> SampleStatistics: 

1887 """ 

1888 Parse a row of data. 

1889 

1890 :param data: the data row 

1891 :returns: the sample statistics 

1892 

1893 >>> cc = CsvReader({KEY_MINIMUM: 0, KEY_MEAN_ARITH: 1, KEY_MAXIMUM: 2, 

1894 ... KEY_STDDEV: 3, KEY_MEDIAN: 4, KEY_MEAN_GEOM: 5, 

1895 ... KEY_N: 6}) 

1896 >>> try: 

1897 ... cc.parse_row([None, None, None, None, None, None, "5"]) 

1898 ... except ValueError as ve: 

1899 ... print(str(ve)[:20]) 

1900 No value defined for 

1901 """ 

1902 n: Final[int] = 1 if self.idx_n is None else int(data[self.idx_n]) 

1903 mi: int | float | None = csv_val_or_none( 

1904 data, self.__idx_min, str_to_num) 

1905 

1906 if self.__is_single: 

1907 return SampleStatistics( 

1908 n=n, minimum=mi, median=mi, mean_arith=mi, 

1909 mean_geom=mi if (mi > 0) or (self.__idx_mean_geom is not None) 

1910 else None, maximum=mi, stddev=None if n <= 1 else 0) 

1911 

1912 ar: int | float | None = csv_val_or_none( 

1913 data, self.__idx_mean_arith, str_to_num) 

1914 me: int | float | None = csv_val_or_none( 

1915 data, self.__idx_median, str_to_num) 

1916 ge: int | float | None = csv_val_or_none( 

1917 data, self.__idx_mean_geom, str_to_num) 

1918 ma: int | float | None = csv_val_or_none( 

1919 data, self.__idx_max, str_to_num) 

1920 sd: int | float | None = csv_val_or_none( 

1921 data, self.__idx_sd, str_to_num) 

1922 

1923 if mi is None: 

1924 if ar is not None: 

1925 mi = ar 

1926 elif me is not None: 

1927 mi = me 

1928 elif ge is not None: 

1929 mi = ge 

1930 elif ma is not None: 

1931 mi = ma 

1932 else: 

1933 raise ValueError( 

1934 f"No value defined for min@{self.__idx_min}={mi}, mean@" 

1935 f"{self.__idx_mean_arith}={ar}, med@{self.__idx_median}=" 

1936 f"{me}, gmean@{self.__idx_mean_geom}={ge}, max@" 

1937 f"{self.__idx_max}={ma} defined in {data!r}.") 

1938 return SampleStatistics( 

1939 n=n, minimum=mi, mean_arith=mi if ar is None else ar, 

1940 median=mi if me is None else me, mean_geom=( 

1941 mi if mi > 0 else None) if (ge is None) else ge, 

1942 maximum=mi if ma is None else ma, 

1943 stddev=(0 if (n > 1) else None) if sd is None else sd) 

1944 

1945 def parse_optional_row(self, data: list[str] | None) \ 

1946 -> SampleStatistics | None: 

1947 """ 

1948 Parse a row of data that may be empty. 

1949 

1950 :param data: the row of data that may be empty 

1951 :returns: the sample statistic, if the row contains data, else `None` 

1952 

1953 >>> print(CsvReader.parse_optional_row(None, ["1"])) 

1954 None 

1955 >>> print(CsvReader.parse_optional_row(CsvReader({"v": 0}), ["1"])) 

1956 1;1;1;1;1;1;None 

1957 >>> print(CsvReader.parse_optional_row(CsvReader({"v": 0}), [""])) 

1958 None 

1959 """ 

1960 if (self is None) or (data is None): 

1961 return None # trick to make this method usable pseudo-static 

1962 # pylint: disable=R0916 

1963 if (((self.__idx_min is not None) and ( 

1964 str.__len__(data[self.__idx_min]) > 0)) or ( 

1965 (self.__idx_mean_arith is not None) and ( 

1966 str.__len__(data[self.__idx_mean_arith]) > 0)) or ( 

1967 (self.__idx_median is not None) and ( 

1968 str.__len__(data[self.__idx_median]) > 0)) or ( 

1969 (self.__idx_mean_geom is not None) and ( 

1970 str.__len__(data[self.__idx_mean_geom]) > 0)) or ( 

1971 (self.__idx_max is not None) and ( 

1972 str.__len__(data[self.__idx_max]) > 0))): 

1973 return self.parse_row(data) 

1974 return None 

1975 

1976 

1977class CsvWriter(CsvWriterBase[SampleStatistics]): 

1978 """A class for CSV writing of :class:`SampleStatistics`.""" 

1979 

1980 def __init__(self, 

1981 data: Iterable[SampleStatistics], 

1982 scope: str | None = None, 

1983 n_not_needed: bool = False, 

1984 what_short: str | None = None, 

1985 what_long: str | None = None) -> None: 

1986 """ 

1987 Initialize the csv writer. 

1988 

1989 :param data: the data to use 

1990 :param scope: the prefix to be pre-pended to all columns 

1991 :param n_not_needed: should we omit the `n` column? 

1992 :param what_short: the short description of what the statistics is 

1993 about 

1994 :param what_long: the long statistics of what the statistics is about 

1995 

1996 >>> try: 

1997 ... CsvWriter([], None, n_not_needed=None) 

1998 ... except TypeError as te: 

1999 ... print(te) 

2000 n_not_needed should be an instance of bool but is None. 

2001 

2002 >>> try: 

2003 ... CsvWriter([]) 

2004 ... except ValueError as ve: 

2005 ... s = str(ve) 

2006 ... print(s[s.index(' ') + 1:]) 

2007 CsvWriter did not see any data. 

2008 

2009 >>> try: 

2010 ... CsvWriter([1]) 

2011 ... except TypeError as te: 

2012 ... print(str(te)[:32]) 

2013 data[0] should be an instance of 

2014 """ 

2015 super().__init__(data, scope, n_not_needed, what_short, what_long, 

2016 SampleStatistics) 

2017 

2018 

2019class _SampleStats(StreamStatisticsAggregate[SampleStatistics]): 

2020 """The internal sample statistics aggregate.""" 

2021 

2022 def __init__(self) -> None: 

2023 """Initialize the stream statistics.""" 

2024 #: the internal data list 

2025 self.__lst: Final[list[int | float]] = [] 

2026 

2027 def reset(self) -> None: 

2028 """Reset the sample statistics.""" 

2029 self.__lst.clear() 

2030 

2031 def add(self, value: int | float) -> None: 

2032 """ 

2033 Add a value to the statistics. 

2034 

2035 :param value: the value 

2036 """ 

2037 self.__lst.append(try_int(value)) 

2038 

2039 def update(self, data: Iterable[int | float | None]) -> None: 

2040 """ 

2041 Add a stream of data. 

2042 

2043 :param data: the data stream 

2044 """ 

2045 self.__lst.extend(xs for xs in data if xs is not None) 

2046 

2047 def result(self) -> SampleStatistics: 

2048 """ 

2049 Get the arithmetic mean. 

2050 

2051 :return: the arithmetic mean or `None` if no value was added yet 

2052 """ 

2053 return SampleStatistics.from_samples(self.__lst) 

2054 

2055 def result_or_none(self) -> SampleStatistics | None: 

2056 """ 

2057 Get the result if any data was collected, otherwise `None`. 

2058 

2059 :return: The return value of :meth:`result` if any data was collected, 

2060 otherwise `None` 

2061 """ 

2062 return self.result() if list.__len__(self.__lst) > 0 else None