Coverage for pycommons/math/sample

1"""

2A simple and immutable basic statistics record computed over a data sample.

4Here we provide records of statistics that are computed over a fully available

5sample of data.

6Such records are instances of class

7:class:`~pycommons.math.sample_statistics.SampleStatistics`.

8They offer the

9:attr:`~pycommons.math.stream_statistics.StreamStatistics.minimum` and

10:attr:`~pycommons.math.stream_statistics.StreamStatistics.maximum` of the data

11as well as the number

12:attr:`~pycommons.math.stream_statistics.StreamStatistics.n` of observed

13samples.

14They also offer approximations of the arithmetic mean as attribute

15:attr:`~pycommons.math.stream_statistics.StreamStatistics.mean_arith` and

16the approximation of the standard deviation as attribute

17:attr:`~pycommons.math.stream_statistics.StreamStatistics.stddev`.

18Additionally, they provide the sample

19:attr:`~pycommons.math.sample_statistics.SampleStatistics.median`

20and an approximation

21:attr:`~pycommons.math.sample_statistics.SampleStatistics.mean_geom` of the

22geometric mean.

24This class is an extension of class

25:class:`~pycommons.math.stream_statistics.StreamStatistics`.

26Stream statistics are less accurate and do not provide the median or geometric

27mean.

28They, however, can be applied to a stream of data and do not require that all

29the data be available as a complete chunk at once.

30Sample statistics require access to the full data, but also offer higher

31accuracy.

33There is an absolute order defined upon these records.

34They are hashable and immutable.

35We provide methods to store them to CSV format via the class

36:class:`~pycommons.math.sample_statistics.CsvWriter`

37and to load them from CSV data via the class

38:class:`~pycommons.math.sample_statistics.CsvReader`.

39Functions that access attributes can be obtained via

40:meth:`~pycommons.math.stream_statistics.StreamStatistics.getter`.

42>>> ag = SampleStatistics.aggregate()

43>>> ag.update((1, 2, 3))

44>>> ag.add(4)

45>>> ag.add(5)

46>>> r1 = ag.result()

47>>> repr(r1)

48'SampleStatistics(n=5, minimum=1, mean_arith=3, maximum=5, \

49stddev=1.5811388300841898, median=3, mean_geom=2.6051710846973517)'

50>>> str(r1)

51'5;1;3;3;2.6051710846973517;5;1.5811388300841898'

53>>> r2 = SampleStatistics.from_samples((1, 2, 3, 4, 5))

54>>> r1 == r2

55True

57>>> ag.reset()

58>>> try:

59... ag.result()

60... except ValueError as ve:

61... print(ve)

62Data source cannot be empty.

64>>> print(ag.result_or_none())

65None

66"""

68from contextlib import suppress

69from dataclasses import dataclass

70from fractions import Fraction

71from math import ceil, inf, isfinite, nan, nextafter

72from statistics import geometric_mean as stat_geomean

73from statistics import mean as stat_mean

74from typing import Final, Iterable, Union

76from pycommons.io.csv import (

77 CSV_SEPARATOR,

78 csv_column,

79 csv_column_or_none,

80 csv_val_or_none,

81)

82from pycommons.io.csv import CsvReader as CsvReaderBase

83from pycommons.math.int_math import __DBL_INT_LIMIT_P_I as _DBL_INT_LIMIT_P_I

84from pycommons.math.int_math import (

85 ceil_div,

86 float_to_frac,

87 try_int,

88 try_int_div,

89)

90from pycommons.math.stream_statistics import (

91 KEY_MAXIMUM,

92 KEY_MEAN_ARITH,

93 KEY_MEAN_GEOM,

94 KEY_MEDIAN,

95 KEY_MINIMUM,

96 KEY_N,

97 KEY_STDDEV,

98 StreamStatistics,

99 StreamStatisticsAggregate,

100)

101from pycommons.math.stream_statistics import CsvWriter as CsvWriterBase

102from pycommons.strings.string_conv import (

103 str_to_num,

104)

105from pycommons.types import check_int_range, type_error

106

107

108def _mean_of_two(a: int | float, b: int | float) -> int | float:

109 """

110 Compute the mean of two numbers.

111

112 :param a: the first number

113 :param b: the second number

114 :returns: the mean

115

116 >>> _mean_of_two(1, 1)

117 1

118 >>> _mean_of_two(1.0, 1.0)

119 1

120 >>> _mean_of_two(1, 2)

121 1.5

122 >>> _mean_of_two(1, 3)

123 2

124 >>> _mean_of_two(1.5, 1.7)

125 1.6

126

127 >>> _mean_of_two(-1, -1)

128 -1

129 >>> _mean_of_two(-1.0, -1.0)

130 -1

131 >>> _mean_of_two(-1, -2)

132 -1.5

133 >>> _mean_of_two(-1, -3)

134 -2

135 >>> _mean_of_two(-1.5, -1.7)

136 -1.6

137

138 >>> _mean_of_two(1, -1)

139 0

140 >>> _mean_of_two(-1.0, 1.0)

141 0

142 >>> _mean_of_two(1, -2)

143 -0.5

144 >>> _mean_of_two(1, -3)

145 -1

146 >>> _mean_of_two(1.5, -1.7)

147 -0.09999999999999998

148 >>> _mean_of_two(-1.5, 1.7)

149 0.09999999999999998

150

151 >>> _mean_of_two(1.7976931348623157e+308, 1.7976931348623157e+308)

152 1.7976931348623157e+308

153 >>> _mean_of_two(1.7976931348623155e+308, 1.7976931348623157e+308)

154 1.7976931348623155e+308

155 """

156 a = try_int(a)

157 b = try_int(b)

158 if a == b:

159 return a

160 if isinstance(a, int) and isinstance(b, int):

161 return try_int_div(a + b, 2)

162

163 res: float = a + b

164 return (0.5 * res) if isfinite(res) else ((0.5 * a) + (0.5 * b))

165

166

167def _almost_le(a: int | float, b: int | float) -> bool:

168 """

169 Check if `a <= b` holds approximately.

170

171 `a <= b` holds if, well, `a` is less than or equal to `b`. It holds almost

172 if `a` is just a tiny bit larger than `b`.

173

174 :param a: the first value

175 :param b: the second value

176 :returns: `True` if we can say: `a` is approximately less or equal than `b`

177 and any deviation from this probably results from numerical issues.

178

179 >>> _almost_le(1, 0)

180 False

181 >>> _almost_le(0, 0)

182 True

183 >>> _almost_le(1.1, 1.09)

184 False

185 >>> _almost_le(1.1, 1.099999)

186 False

187 >>> _almost_le(1.1, 1.09999999)

188 False

189 >>> _almost_le(1.1, 1.0999999999)

190 False

191 >>> _almost_le(1.1, 1.099999999999)

192 False

193 >>> _almost_le(1.099999999999, 1.1)

194 True

195 >>> _almost_le(1.1, 1.0999999999999)

196 True

197 >>> _almost_le(1.0999999999999, 1.1)

198 True

199

200 >>> _almost_le(0, -1)

201 False

202 >>> _almost_le(-1.09, -1.1)

203 False

204 >>> _almost_le(-1.099999, -1.1)

205 False

206 >>> _almost_le(-1.09999999, -1.1)

207 False

208 >>> _almost_le(-1.0999999999, -1.1)

209 False

210 >>> _almost_le(-1.099999999999, -1.1)

211 False

212 >>> _almost_le(-1.1, -1.099999999999)

213 True

214 >>> _almost_le(-1.0999999999999, -1.1)

215 True

216 >>> _almost_le(-1.1, -1.0999999999999)

217 True

218

219 >>> _almost_le(23384026197294446691258957323460528314494920687616,

220 ... 2.3384026197294286e+49)

221 True

222 >>> _almost_le(nextafter(5, inf), nextafter(5, -inf))

223 True

224 >>> _almost_le(nextafter(nextafter(5, inf), inf),

225 ... nextafter(nextafter(5, -inf), -inf))

226 True

227 >>> _almost_le(nextafter(nextafter(nextafter(5, inf), inf), inf),

228 ... nextafter(nextafter(nextafter(5, -inf), -inf), -inf))

229 True

230 >>> _almost_le(nextafter(nextafter(nextafter(nextafter(5, inf), inf),

231 ... inf), inf), nextafter(nextafter(nextafter(5, -inf),

232 ... -inf), -inf))

233 True

234 >>> _almost_le(5.114672824837722e+148, 5.1146728248374894e+148)

235 True

236

237 >>> _almost_le(-1.7976931348623157e+308,

238 ... -int(1.7976931348623157e+308) * 10)

239 False

240 >>> _almost_le(-int(1.7976931348623157e+308) * 10,

241 ... -1.7976931348623157e+308)

242 True

243 >>> _almost_le(1e-302, 0)

244 True

245 >>> _almost_le(1e-200, 0)

246 False

247 """

248 if a <= b:

249 return True

250

251 if a < 0:

252 a, b = -b, -a # maybe: a = -19, b = -20 -> maybe: a = 20, b = 19

253 elif b <= 0:

254 return (b >= 0) and (a <= 1e-300)

255

256 with suppress(OverflowError):

257 use_a: int | float = a

258 use_b: int | float = b

259 for _ in range(3):

260 use_a = nextafter(use_a, -inf)

261 use_b = nextafter(use_b, inf)

262 if use_a <= use_b:

263 return True

264 try:

265 return (b / a) > 0.9999999999999

266 except OverflowError:

267 a_int: Final[int] = int(a)

268 b_int: Final[int] = int(b)

269 return (9999999999999 * a_int) <= (b_int * 10000000000000)

270

271

272def _to_frac(a: int | float) -> Fraction:

273 """

274 Convert a number to a fraction.

275

276 :param a: the number

277 :returns: the fraction

278

279 >>> _to_frac(23)

280 Fraction(23, 1)

281 >>> _to_frac(2.34)

282 Fraction(117, 50)

283 """

284 return Fraction(a) if isinstance(a, int) else Fraction(*float_to_frac(a))

285

286

287def _from_frac(a: int | float | Fraction) -> int | float:

288 """

289 Convert a fraction to either an integer or a float.

290

291 :param a: the fraction

292 :returns: the integer or float value

293

294 >>> _from_frac(1.6)

295 1.6

296 >>> _from_frac(123)

297 123

298 >>> _from_frac(Fraction(7, 8))

299 0.875

300 >>> _from_frac(Fraction(1237, 1))

301 1237

302 """

303 if isinstance(a, int):

304 return a

305 if isinstance(a, float):

306 return try_int(a)

307 num: Final[int] = a.numerator

308 denom: Final[int] = a.denominator

309 if denom == 1:

310 return num

311 return try_int_div(num, denom)

312

313

314#: the 0 fraction

315_FRAC_0: Final[Fraction] = Fraction(0, 1)

316#: the 1 fraction

317_FRAC_1: Final[Fraction] = Fraction(1, 1)

318

319

320def _int_root_bound_lower(base: int, root: int) -> int:

321 """

322 Compute a lower bound for a root.

323

324 We use that `log(a ** b) = log(a) * b`.

325 In binary, this means that: `a ** b == 2 ** (log2(a) * b)`, or, for roots

326 `a ** (1/b) == 2 ** (log2(a) / b`.

327 In bits, `2 ** x == 1 << x` and `floor(log2(x)) == x.bit_length() - 1`.

328 Therefore, we know that `a ** (1/b) >= 1 << ((a.bit_length() // b) - 1)`.

329 Similarly, we can have an upper bound by rounding up at each step

330 `a ** (1/b) <= 1 << (1 + ((b.bit_length() + 1) // root)

331

332 :param base: the base number

333 :param root: the root

334 :returns: the lower bound

335

336 >>> _int_root_bound_lower(8, 3)

337 1

338

339 >>> _int_root_bound_lower(8, 2)

340 2

341

342 >>> _int_root_bound_lower(25, 3)

343 1

344 """

345 logdiv: Final[int] = base.bit_length() // root

346 return (1 << (logdiv - 1)) if logdiv > 0 else (0 if base < 1 else 1)

347

348

349def _int_root_bound_upper(base: int, root: int) -> int:

350 """

351 Compute an upper bound for a root.

352

353 :param base: the base number

354 :param root: the root

355 :returns: the upper bound

356

357 >>> _int_root_bound_upper(8, 3)

358 4

359

360 >>> _int_root_bound_upper(8, 2)

361 4

362

363 >>> _int_root_bound_upper(25, 3)

364 8

365 """

366 return base if root == 1 else min(1 << (1 + ceil_div(

367 base.bit_length() + 1, root)), (base // 2) + (1 if base < 6 else 0))

368

369

370def _frac_root_bound_lower(base: Fraction, root: int) -> Fraction:

371 """

372 Compute a lower bound for a root.

373

374 :param base: the base number

375 :param root: the root

376 :returns: the lower bound

377

378 >>> _frac_root_bound_lower(Fraction(8), 3)

379 Fraction(1, 1)

380

381 >>> _frac_root_bound_lower(Fraction(8), 2)

382 Fraction(2, 1)

383

384 >>> _frac_root_bound_lower(Fraction(25), 3)

385 Fraction(1, 1)

386

387 >>> _frac_root_bound_lower(Fraction(3, 8), 3)

388 Fraction(1, 2)

389

390 >>> _frac_root_bound_lower(Fraction(11, 8), 2)

391 Fraction(1, 1)

392

393 >>> _frac_root_bound_lower(Fraction(11, 25), 3)

394 Fraction(1, 2)

395 """

396 return _FRAC_0 if base <= _FRAC_0 else (

397 Fraction(1, _int_root_bound_upper(ceil_div(

398 base.denominator, base.numerator), root))

399 if base < _FRAC_1 else (

400 _FRAC_1 if base == _FRAC_1 else Fraction(

401 _int_root_bound_lower(int(base), root))))

402

403

404def _frac_root_bound_upper(base: Fraction, root: int) -> Fraction:

405 """

406 Compute an upper bound for a root.

407

408 :param base: the base number

409 :param root: the root

410 :returns: the upper bound

411

412 >>> _frac_root_bound_upper(Fraction(8), 3)

413 Fraction(4, 1)

414

415 >>> _frac_root_bound_upper(Fraction(8), 2)

416 Fraction(4, 1)

417

418 >>> _frac_root_bound_upper(Fraction(25), 3)

419 Fraction(8, 1)

420

421 >>> _frac_root_bound_upper(Fraction(3, 8), 3)

422 Fraction(1, 1)

423

424 >>> _frac_root_bound_upper(Fraction(11, 8), 2)

425 Fraction(2, 1)

426

427 >>> _frac_root_bound_upper(Fraction(11, 25), 3)

428 Fraction(1, 1)

429 """

430 return _FRAC_0 if base <= _FRAC_0 else (

431 Fraction(1, _int_root_bound_lower(

432 base.denominator // base.numerator, root))

433 if base < _FRAC_1 else (

434 _FRAC_1 if base == _FRAC_1 else Fraction(

435 _int_root_bound_upper(ceil(base), root))))

436

437

438def _limited_root(base: Fraction, root: int,

439 mini: Fraction = _FRAC_0,

440 maxi: Fraction | None = None) -> int | float:

441 """

442 Try to compute a root at a precision so exact that no digits are lost.

443

444 :param base: the base

445 :param root: the exponent

446 :param mini: a limit for the smallest possible result

447 :param maxi: a maximum value, the limit for the largest possible result,

448 or `None` if no upper limit is known

449 :returns: the power

450

451 >>> from math import sqrt

452 >>> sqrt(3)

453 1.7320508075688772

454 >>> _limited_root(Fraction(3, 1), 2)

455 1.7320508075688772

456 >>> _limited_root(Fraction(4, 1), 2)

457 2

458

459 >>> _limited_root(Fraction(3 ** 3, 1), 3)

460 3

461 >>> type(_limited_root(Fraction(3 ** 3, 1), 3))

462 <class 'int'>

463

464 >>> _limited_root(Fraction(3 ** 333, 1), 333)

465 3

466

467 >>> _limited_root(Fraction(9000 ** 1000, 1), 1000)

468 9000

469

470 >>> _limited_root(Fraction((10 ** 8) ** 100, 1), 35)

471 71968567300115201992879

472

473 >>> 0.456 ** (1 / 25)

474 0.9690776862089129

475 >>> _limited_root(Fraction(456, 1000), 25)

476 0.9690776862089129

477

478 >>> _limited_root(Fraction(2, 1), 2)

479 1.4142135623730951

480 >>> sqrt(2)

481 1.4142135623730951

482 """

483 lower: Fraction | None = None

484 upper: Fraction | None = None

485 if base.denominator == 1:

486 ibase = base.numerator

487 if ibase <= 1:

488 return ibase

489

490 ilower: int = max(int(mini), _int_root_bound_lower(ibase, root))

491 iupper: int = _int_root_bound_upper(ibase, root)

492 if maxi is not None:

493 iupper = min(int(maxi) + 1, iupper)

494 imid: int = ilower

495 while ilower <= iupper:

496 imid = (ilower + iupper) >> 1

497 imid_exp = imid ** root

498 if imid_exp > ibase:

499 iupper = imid - 1

500 elif imid_exp < ibase:

501 ilower = imid + 1

502 else:

503 return imid # We got an exact integer result

504 # No exact integer result, but at least new limits

505 upper = Fraction(imid + 1)

506 lower = Fraction(max(0, imid - 1))

507

508 # Now we do binary search using fractions

509 if upper is None:

510 upper = max(base, _FRAC_1)

511 if maxi is not None:

512 upper = min(upper, maxi)

513 upper = min(upper, _frac_root_bound_upper(base, root))

514 if lower is None:

515 lower = _FRAC_0

516 lower = max(mini, lower)

517 lower = max(lower, _frac_root_bound_lower(base, root))

518

519 # Now compute the root using binary search within the limits.

520 guess: int | float = nan

521 equal_steps: int = 4

522 while equal_steps > 0:

523 last_guess: int | float = guess

524 mid: Fraction = (lower + upper) / 2

525 mid_exp = mid ** root

526 if mid_exp > base:

527 upper = mid

528 elif mid_exp < base:

529 lower = mid

530 else:

531 return _from_frac(mid)

532

533 guess = _from_frac(mid)

534 if (type(guess) is type(last_guess)) and (guess == last_guess):

535 equal_steps -= 1

536 else:

537 equal_steps = 4

538 return guess

539

540

541@dataclass(frozen=True, init=False, order=False, eq=False)

542class SampleStatistics(StreamStatistics):

543 """An immutable record with sample statistics of one quantity."""

544

545 #: The median, i.e., the value in the middle of the sorted list of

546 #: :attr:`~pycommons.math.stream_statistics.StreamStatistics.n` data

547 # samples.

548 median: int | float

549 #: The geometric mean value, if defined. This is the

550 #: :attr:`~pycommons.math.stream_statistics.StreamStatistics.n`-th root

551 #: of the product of all data samples.

552 #: This value will be `None` if there was any sample which is not greater

553 #: than 0.

554 mean_geom: int | float | None

555

556 def __init__(self, n: int, minimum: int | float, median: int | float,

557 mean_arith: int | float, mean_geom: int | float | None,

558 maximum: int | float, stddev: int | float | None):

559 """

560 Create a sample statistics record.

561

562 :param n: the sample size, must be `n >= 1`

563 :param minimum: the minimum

564 :param median: the median

565 :param mean_arith: the arithmetic mean

566 :param mean_geom: the geometric mean, or `None` if it is undefined

567 :param maximum: the maximum

568 :param stddev: the standard deviation, must be `None` if `n == 0`

569

570 >>> s1 = SampleStatistics(2, 1, 2, 4.0, 3, 6, 0.2)

571 >>> s1.n

572 2

573 >>> s1.minimum

574 1

575 >>> s1.median

576 2

577 >>> s1.mean_arith

578 4

579 >>> s1.mean_geom

580 3

581 >>> s1.maximum

582 6

583 >>> s1.stddev

584 0.2

585 >>> hash(s1)

586 8839096310731950625

587

588 >>> s2 = SampleStatistics(1, 0, 0.0, 0, None, 0.0, None)

589 >>> s2.n

590 1

591 >>> s2.minimum

592 0

593 >>> s2.median

594 0

595 >>> s2.mean_arith

596 0

597 >>> print(s2.mean_geom)

598 None

599 >>> s2.maximum

600 0

601 >>> print(s2.stddev)

602 None

603 >>> hash(s2) == hash((0, 0, 0, inf, 0, inf, 1, 1))

604 True

605

606 >>> s3 = SampleStatistics(n=3, minimum=5, median=5, maximum=5,

607 ... mean_arith=5, mean_geom=5, stddev=0.0)

608 >>> s3.stddev

609 0

610 >>> hash(s3)

611 1175763770956004139

612

613 >>> sset = {s1, s1, s2, s1, s3, s3, s2, s1}

614 >>> len(sset)

615 3

616 >>> print(list(sss.n for sss in sorted(sset)))

617 [1, 2, 3]

618 >>> print(list(sss.minimum for sss in sorted(sset)))

619 [0, 1, 5]

620

621 >>> try:

622 ... SampleStatistics(n=1, minimum=5, median=6, maximum=5,

623 ... mean_arith=5, mean_geom=5, stddev=None)

624 ... except ValueError as ve:

625 ... print(ve)

626 median (6) must equal minimum (5) if n=1.

627

628 >>> try:

629 ... SampleStatistics(n=2, minimum=5, median=4, maximum=5,

630 ... mean_arith=5, mean_geom=5, stddev=0)

631 ... except ValueError as ve:

632 ... print(ve)

633 median (4) must be >= minimum (5) if n>1.

634

635 >>> try:

636 ... SampleStatistics(n=1, minimum=5, median=5, maximum=6,

637 ... mean_arith=5, mean_geom=5, stddev=None)

638 ... except ValueError as ve:

639 ... print(ve)

640 maximum (6) must equal minimum (5) if n=1.

641

642 >>> try:

643 ... SampleStatistics(n=2, minimum=5, median=6, maximum=5,

644 ... mean_arith=5, mean_geom=5, stddev=0)

645 ... except ValueError as ve:

646 ... print(ve)

647 maximum (5) must be >= med (6) if n>1.

648

649 >>> try:

650 ... SampleStatistics(n=1, minimum=5, median=5, maximum=5,

651 ... mean_arith=4, mean_geom=5, stddev=None)

652 ... except ValueError as ve:

653 ... print(ve)

654 mean_arith (4) must equal minimum (5) if n=1.

655

656 >>> try:

657 ... SampleStatistics(n=2, minimum=5, median=6, maximum=6,

658 ... mean_arith=4, mean_geom=5, stddev=None)

659 ... except ValueError as ve:

660 ... print(ve)

661 minimum<=mean_arith<=maximum must hold, but got 5, 4, and 6.

662

663 >>> try:

664 ... SampleStatistics(n=1, minimum=5, median=5, maximum=5,

665 ... mean_arith=5, mean_geom=None, stddev=None)

666 ... except ValueError as ve:

667 ... print(ve)

668 If minimum (5) > 0, then mean_geom must be defined, but it is None.

669

670 >>> try:

671 ... SampleStatistics(n=1, minimum=0, median=0, maximum=0,

672 ... mean_arith=0, mean_geom=0, stddev=None)

673 ... except ValueError as ve:

674 ... print(ve)

675 If minimum (0) <= 0, then mean_geom is undefined, but it is 0.

676

677 >>> try:

678 ... SampleStatistics(n=1, minimum=5, median=5, maximum=5,

679 ... mean_arith=5, mean_geom=6, stddev=None)

680 ... except ValueError as ve:

681 ... print(ve)

682 mean_geom (6) must equal minimum (5) if n=1.

683

684 >>> try:

685 ... SampleStatistics(n=3, minimum=5, median=6, maximum=7,

686 ... mean_arith=6, mean_geom=6.1, stddev=1)

687 ... except ValueError as ve:

688 ... print(ve)

689 mean_geom (6.1) must be <= mean_arith (6).

690

691 >>> try:

692 ... SampleStatistics(n=3, minimum=5, median=6, maximum=7,

693 ... mean_arith=6, mean_geom=6, stddev=-1)

694 ... except ValueError as ve:

695 ... print(ve)

696 stddev must be >= 0, but is -1.

697

698 >>> try:

699 ... SampleStatistics(n=3, minimum=5, median=6, maximum=7,

700 ... mean_arith=6, mean_geom=6, stddev=0)

701 ... except ValueError as ve:

702 ... print(str(ve)[:59])

703 If stddev (0) is 0, then minimum (5) must equal maximum (7)

704

705 >>> try:

706 ... SampleStatistics(n=3, minimum=5, median=5, maximum=5,

707 ... mean_arith=5, mean_geom=5, stddev=1)

708 ... except ValueError as ve:

709 ... print(str(ve)[:59])

710 If stddev (1) is 0, then minimum (5) must equal maximum (5)

711

712 >>> try:

713 ... SampleStatistics(n=3, minimum=5, median=5, maximum=5,

714 ... mean_arith=5, mean_geom=5, stddev=None)

715 ... except ValueError as ve:

716 ... print(ve)

717 If n=1, stddev=None and vice versa, but got n=3 and stddev=None.

718

719 >>> try:

720 ... SampleStatistics(n=1, minimum=5, median=5, maximum=5,

721 ... mean_arith=5, mean_geom=5, stddev=1)

722 ... except ValueError as ve:

723 ... print(ve)

724 If n=1, stddev=None and vice versa, but got n=1 and stddev=1.

725

726 >>> try:

727 ... SampleStatistics(n=2, minimum=5, median=5, maximum=6,

728 ... mean_arith=6, mean_geom=7, stddev=1)

729 ... except ValueError as ve:

730 ... print(ve)

731 minimum<=mean_geom<=maximum must hold, but got 5, 7, and 6.

732 """

733 super().__init__(n, minimum, mean_arith, maximum, stddev)

734

735 # check minimum

736 median = try_int(median)

737 if n == 1:

738 if median != self.minimum:

739 raise ValueError(f"median ({median}) must equal "

740 f"minimum ({self.minimum}) if n=1.")

741 elif median < self.minimum:

742 raise ValueError(f"median ({median}) must be >= minimum ("

743 f"{self.minimum}) if n>1.")

744

745 # check maximum

746 if self.maximum < median:

747 raise ValueError(

748 f"maximum ({self.maximum}) must be >= med ({median}) if n>1.")

749

750 # check geometric mean

751 if mean_geom is None:

752 if self.minimum > 0:

753 raise ValueError(

754 f"If minimum ({self.minimum}) > 0, then mean_geom must be"

755 f" defined, but it is {mean_geom}.")

756 else:

757 if self.minimum <= 0:

758 raise ValueError(

759 f"If minimum ({self.minimum}) <= 0, then mean_geom is "

760 f"undefined, but it is {mean_geom}.")

761 mean_geom = try_int(mean_geom)

762 if n == 1:

763 if mean_geom != self.minimum:

764 raise ValueError(f"mean_geom ({mean_geom}) must equal "

765 f"minimum ({self.minimum}) if n=1.")

766 else:

767 if not self.minimum <= mean_geom <= self.maximum:

768 raise ValueError(

769 "minimum<=mean_geom<=maximum must hold, but got "

770 f"{self.minimum}, {mean_geom}, and {self.maximum}.")

771 if mean_geom > self.mean_arith:

772 raise ValueError(

773 f"mean_geom ({mean_geom}) must be <= "

774 f"mean_arith ({self.mean_arith}).")

775

776 object.__setattr__(self, "median", median)

777 object.__setattr__(self, "mean_geom", mean_geom)

778

779 def __str__(self) -> str:

780 """

781 Get a string representation of this object.

782

783 :returns: the string

784 """

785 return CSV_SEPARATOR.join(map(str, (

786 self.n, self.minimum, self.median, self.mean_arith,

787 self.mean_geom, self.maximum, self.stddev)))

788

789 def min_mean(self) -> int | float:

790 """

791 Obtain the smallest of the three mean values.

792

793 :returns: the smallest of `mean_arith`, `mean_geom`, and `median`

794

795 >>> SampleStatistics(1, 0, 0.0, 0, None, 0.0, None).min_mean()

796 0

797 >>> SampleStatistics(2, 1, 2, 4.0, 3, 6, 0.2).min_mean()

798 2

799 >>> SampleStatistics(2, 1, 3.2, 4.0, 3, 6, 0.2).min_mean()

800 3

801 >>> SampleStatistics(2, 1, 5.2, 4.0, 3, 6, 0.2).min_mean()

802 3

803 """

804 if self.mean_geom is None: # geometric mean is always <= arithmean

805 return min(self.mean_arith, self.median)

806 return min(self.mean_geom, self.median)

807

808 def max_mean(self) -> int | float:

809 """

810 Obtain the largest of the three mean values.

811

812 :returns: the largest of `mean_arith`, `mean_geom`, and `median`

813

814 >>> SampleStatistics(1, 0, 0.0, 0, None, 0.0, None).max_mean()

815 0

816 >>> SampleStatistics(2, 1, 2, 4.0, 3, 6, 0.2).max_mean()

817 4

818 >>> SampleStatistics(2, 1, 3.2, 4.0, 3, 6, 0.2).max_mean()

819 4

820 >>> SampleStatistics(2, 1, 5.2, 4.0, 3, 6, 0.2).max_mean()

821 5.2

822 """

823 return max(self.mean_arith, self.median)

824

825 def compact(self, needs_n: bool = True) \

826 -> "int | float | SampleStatistics":

827 """

828 Try to represent this object as single number, if possible.

829

830 :param needs_n: if this is `True`, the default, then the object is

831 only turned into a single number if alsp `n==1`. Otherwise, `n`

832 is ignored

833 :returns: an integer or float if this objects minimum equals its

834 maximum, the object itself otherwise

835

836 >>> s = SampleStatistics.from_single_value(10, 1)

837 >>> s.compact() == 10

838 True

839 >>> s.compact() == s.compact(True)

840 True

841

842 >>> s = SampleStatistics.from_single_value(10, 2)

843 >>> s.compact() is s

844 True

845 >>> s.compact() == s.compact(True)

846 True

847

848 >>> s = SampleStatistics.from_single_value(10, 2)

849 >>> s.compact(False) == 10

850 True

851

852 >>> s = SampleStatistics(2, 1, 2, 4, 3, 5, 3)

853 >>> s.compact() is s

854 True

855

856 >>> s = SampleStatistics(2, 1, 2, 4, 3, 5, 3)

857 >>> s.compact(False) is s

858 True

859

860 >>> try:

861 ... s.compact(1)

862 ... except TypeError as te:

863 ... print(te)

864 needs_n should be an instance of bool but is int, namely 1.

865

866 >>> try:

867 ... s.compact(None)

868 ... except TypeError as te:

869 ... print(te)

870 needs_n should be an instance of bool but is None.

871 """

872 if not isinstance(needs_n, bool):

873 raise type_error(needs_n, "needs_n", bool)

874 mi: Final[int | float] = self.minimum

875 return self if (mi < self.maximum) or (

876 needs_n and (self.n > 1)) else mi

877

878 def _key(self) -> tuple[int | float, int | float, int | float,

879 int | float, int | float, int | float, int, int]:

880 r"""

881 Get a comparison and hash key.

882

883 :returns: the comparison key

884

885 >>> SampleStatistics(2, 1, 2, 4.0, 3, 6, 0.2)._key()

886 (1, 2, 4, 3, 6, 0.2, 2, 1)

887

888 >>> SampleStatistics(1, 0, 0, 0, None, 0, None)._key()

889 (0, 0, 0, inf, 0, inf, 1, 1)

890 """

891 return (self.minimum, self.median, self.mean_arith,

892 inf if self.mean_geom is None else self.mean_geom,

893 self.maximum, inf if self.stddev is None else self.stddev,

894 self.n, 1)

895

896 def get_mean_geom(self) -> int | float | None:

897 """

898 Get the geometric mean (:attr:`~SampleStatistics.mean_geom`).

899

900 :returns: the geometric mean (:attr:`~SampleStatistics.mean_geom`) of

901 all the samples, `None` if the geometric mean is not defined.

902 :raises TypeError: if an object of the wrong type is passed in as self

903

904 >>> SampleStatistics(5, 3, 5, 6, 4, 7, 2).get_mean_geom()

905 4

906

907 >>> try:

908 ... SampleStatistics.get_mean_geom(None)

909 ... except TypeError as te:

910 ... print(str(te)[:20])

911 self should be an in

912 """

913 if not isinstance(self, SampleStatistics):

914 raise type_error(self, "self", SampleStatistics)

915 return self.mean_geom

916

917 def get_median(self) -> int | float:

918 """

919 Get the :attr:`~SampleStatistics.median` of all the samples.

920

921 :returns: the :attr:`~SampleStatistics.median` of all the samples.

922 :raises TypeError: if an object of the wrong type is passed in as self

923

924 >>> SampleStatistics(5, 3, 5, 6, 4, 7, 2).get_median()

925 5

926

927 >>> try:

928 ... SampleStatistics.get_median(None)

929 ... except TypeError as te:

930 ... print(str(te)[:20])

931 self should be an in

932 """

933 if not isinstance(self, SampleStatistics):

934 raise type_error(self, "self", SampleStatistics)

935 return self.median

936

937 @classmethod

938 def from_single_value(cls, value: Union[

939 int, float, "StreamStatistics"], n: int = 1) -> "SampleStatistics":

940 r"""

941 Create a sample statistics from a single number.

942

943 :param value: the single value

944 :param n: the number of samples, i.e., the number of times this value

945 occurred

946 :returns: the sample statistics

947

948 >>> s = SampleStatistics.from_single_value(10, 2)

949 >>> print(s.stddev)

950 0

951 >>> s.minimum == s.maximum == s.mean_arith == s.mean_geom \

952 ... == s.median == 10

953 True

954 >>> s is SampleStatistics.from_single_value(s, s.n)

955 True

956

957 >>> s = SampleStatistics.from_single_value(10, 1)

958 >>> print(s.stddev)

959 None

960 >>> s.minimum == s.maximum == s.mean_arith == s.mean_geom \

961 ... == s.median == 10

962 True

963 >>> s is SampleStatistics.from_single_value(s, s.n)

964 True

965

966 >>> s = SampleStatistics.from_single_value(-10, 2)

967 >>> print(s.stddev)

968 0

969 >>> s.minimum == s.maximum == s.mean_arith == s.median == -10

970 True

971 >>> print(s.mean_geom)

972 None

973 >>> s is SampleStatistics.from_single_value(s, s.n)

974 True

975

976 >>> s = SampleStatistics.from_single_value(-10, 1)

977 >>> print(s.stddev)

978 None

979 >>> s.minimum == s.maximum == s.mean_arith == s.median == -10

980 True

981 >>> print(s.mean_geom)

982 None

983 >>> s is SampleStatistics.from_single_value(s, s.n)

984 True

985

986 >>> s = SampleStatistics.from_single_value(10.5, 2)

987 >>> print(s.stddev)

988 0

989 >>> s.minimum == s.maximum == s.mean_arith == s.mean_geom \

990 ... == s.median == 10.5

991 True

992 >>> s is SampleStatistics.from_single_value(s, s.n)

993 True

994

995 >>> s = SampleStatistics.from_single_value(10.5, 1)

996 >>> print(s.stddev)

997 None

998 >>> s.minimum == s.maximum == s.mean_arith == s.mean_geom \

999 ... == s.median == 10.5

1000 True

1001 >>> s is SampleStatistics.from_single_value(s, s.n)

1002 True

1003

1004 >>> s = SampleStatistics.from_single_value(-10.5, 2)

1005 >>> print(s.stddev)

1006 0

1007 >>> s.minimum == s.maximum == s.mean_arith == s.median == -10.5

1008 True

1009 >>> print(s.mean_geom)

1010 None

1011 >>> s is SampleStatistics.from_single_value(s, s.n)

1012 True

1013

1014 >>> s = SampleStatistics.from_single_value(-10.5, 1)

1015 >>> print(s.stddev)

1016 None

1017 >>> s.minimum == s.maximum == s.mean_arith == s.median == -10.5

1018 True

1019 >>> print(s.mean_geom)

1020 None

1021 >>> s is SampleStatistics.from_single_value(s, s.n)

1022 True

1023

1024 >>> print(SampleStatistics.from_single_value(

1025 ... StreamStatistics(5, 1, 1, 1, 0), 5))

1026 5;1;1;1;1;1;0

1027

1028 >>> try:

1029 ... SampleStatistics.from_single_value(StreamStatistics(

1030 ... 5, 1, 2, 3, 5), 5)

1031 ... except ValueError as ve:

1032 ... print(ve)

1033 Cannot create SampleStatistics from 5;1;2;3;5.

1034

1035 >>> try:

1036 ... SampleStatistics.from_single_value(None)

1037 ... except TypeError as te:

1038 ... print(str(te)[:20])

1039 value should be an i

1040

1041 >>> try:

1042 ... SampleStatistics.from_single_value("a")

1043 ... except TypeError as te:

1044 ... print(str(te)[:20])

1045 value should be an i

1046

1047 >>> try:

1048 ... SampleStatistics.from_single_value(1, None)

1049 ... except TypeError as te:

1050 ... print(str(te)[:20])

1051 n should be an insta

1052

1053 >>> try:

1054 ... SampleStatistics.from_single_value(1, "a")

1055 ... except TypeError as te:

1056 ... print(str(te)[:20])

1057 n should be an insta

1058

1059 >>> try:

1060 ... SampleStatistics.from_single_value(s, 12)

1061 ... except ValueError as ve:

1062 ... print(str(ve)[:20])

1063 Incompatible numbers

1064

1065 >>> try:

1066 ... SampleStatistics.from_single_value(inf)

1067 ... except ValueError as ve:

1068 ... print(str(ve)[:20])

1069 value=inf is not fin

1070 """

1071 n = check_int_range(n, "n", 1, 1_000_000_000_000_000_000)

1072

1073 if isinstance(value, StreamStatistics):

1074 if value.n != n:

1075 raise ValueError( # noqa: TRY004

1076 f"Incompatible numbers of values {n} and {value}.")

1077 if isinstance(value, SampleStatistics):

1078 return value

1079 if value.maximum != value.minimum:

1080 raise ValueError(

1081 f"Cannot create SampleStatistics from {value}.")

1082 value = value.maximum

1083 if not isinstance(value, int | float):

1084 raise type_error(value, "value", (int, float, SampleStatistics))

1085 if not isfinite(value):

1086 raise ValueError(f"value={value} is not finite.")

1087 return SampleStatistics(

1088 n=n, minimum=value, median=value, mean_arith=value,

1089 mean_geom=None if value <= 0 else value, maximum=value,

1090 stddev=None if n <= 1 else 0)

1091

1092 @classmethod

1093 def aggregate(cls) -> StreamStatisticsAggregate["SampleStatistics"]:

1094 """

1095 Get an aggregate suitable for this statistics type.

1096

1097 :return: the aggregate

1098

1099 >>> ag = SampleStatistics.aggregate()

1100 >>> ag.update((1, 2, 3, 4))

1101 >>> ag.result()

1102 SampleStatistics(n=4, minimum=1, mean_arith=2.5, maximum=4, \

1103stddev=1.2909944487358056, median=2.5, mean_geom=2.213363839400643)

1104 >>> ag.reset()

1105 >>> ag.add(4)

1106 >>> ag.add(5)

1107 >>> ag.add(6)

1108 >>> ag.add(7)

1109 >>> ag.result()

1110 SampleStatistics(n=4, minimum=4, mean_arith=5.5, maximum=7, \

1111stddev=1.2909944487358056, median=5.5, mean_geom=5.383563270955295)

1112 """

1113 return _SampleStats()

1114

1115 @classmethod

1116 def from_samples(cls, source: Iterable[

1117 int | float | None]) -> "SampleStatistics":

1118 """

1119 Create a statistics object from an iterable of integers or floats.

1120

1121 As bottom line, this function will forward computations to the

1122 :mod:`statistics` routines that ship with Python if nothing else works.

1123 However, sometimes, something else may work: In particular, if the data

1124 consists of only integers. In this case, it just might be possible to

1125 compute the statistics very accurately with integer precision, where

1126 possible. Also, otherwise, we can often accummulate the data using

1127 instances of :class:`fractions.Fraction`. Indeed, even the

1128 :mod:`statistics` routines may do this, but they convert to `float` in

1129 cases of non-1 denominators, even if the integer presentation was much

1130 more accurate.

1131

1132 :param source: the source

1133 :returns: a statistics representing the statistics over `source`

1134

1135 >>> s = SampleStatistics.from_samples([0.0])

1136 >>> s.n

1137 1

1138 >>> s.minimum

1139 0

1140 >>> s.maximum

1141 0

1142 >>> print(s.mean_geom)

1143 None

1144 >>> s.median

1145 0

1146 >>> print(s.stddev)

1147 None

1148

1149 >>> s = SampleStatistics.from_samples([1.0])

1150 >>> s.n

1151 1

1152 >>> s.minimum

1153 1

1154 >>> s.maximum

1155 1

1156 >>> print(s.mean_geom)

1157 1

1158 >>> s.median

1159 1

1160 >>> print(s.stddev)

1161 None

1162

1163 >>> s = SampleStatistics.from_samples([1.0, 1])

1164 >>> s.n

1165 2

1166 >>> s.minimum

1167 1

1168 >>> s.maximum

1169 1

1170 >>> print(s.mean_geom)

1171 1

1172 >>> s.median

1173 1

1174 >>> print(s.stddev)

1175 0

1176

1177 >>> s = SampleStatistics.from_samples([0, 0.0])

1178 >>> s.n

1179 2

1180 >>> s.minimum

1181 0

1182 >>> s.maximum

1183 0

1184 >>> print(s.mean_geom)

1185 None

1186 >>> s.median

1187 0

1188 >>> print(s.stddev)

1189 0

1190

1191 >>> from statistics import stdev as stat_stddev

1192 >>> dd = [1.5, 2.5]

1193 >>> s = SampleStatistics.from_samples(dd)

1194 >>> s.n

1195 2

1196 >>> s.minimum

1197 1.5

1198 >>> s.maximum

1199 2.5

1200 >>> print(s.mean_geom)

1201 1.9364916731037085

1202 >>> stat_geomean(dd)

1203 1.9364916731037085

1204 >>> s.median

1205 2

1206 >>> print(s.stddev)

1207 0.7071067811865476

1208 >>> stat_stddev(dd)

1209 0.7071067811865476

1210

1211 >>> dd = [1.0, 2.0]

1212 >>> s = SampleStatistics.from_samples(dd)

1213 >>> s.n

1214 2

1215 >>> s.minimum

1216 1

1217 >>> s.maximum

1218 2

1219 >>> print(s.mean_geom)

1220 1.4142135623730951

1221 >>> (1 * 2) ** 0.5

1222 1.4142135623730951

1223 >>> stat_geomean(dd)

1224 1.414213562373095

1225 >>> s.median

1226 1.5

1227 >>> print(s.stddev)

1228 0.7071067811865476

1229 >>> stat_stddev(dd)

1230 0.7071067811865476

1231

1232 >>> dd = [1.0, 2.0, 3.0]

1233 >>> s = SampleStatistics.from_samples(dd)

1234 >>> s.n

1235 3

1236 >>> s.minimum

1237 1

1238 >>> s.maximum

1239 3

1240 >>> print(s.mean_geom)

1241 1.8171205928321397

1242 >>> (1 * 2 * 3) ** (1 / 3)

1243 1.8171205928321397

1244 >>> stat_geomean(dd)

1245 1.8171205928321397

1246 >>> s.median

1247 2

1248 >>> print(s.stddev)

1249 1

1250 >>> stat_stddev(dd)

1251 1.0

1252

1253 >>> dd = [1.0, 0, 3.0]

1254 >>> s = SampleStatistics.from_samples(dd)

1255 >>> s.n

1256 3

1257 >>> s.minimum

1258 0

1259 >>> s.maximum

1260 3

1261 >>> print(s.mean_geom)

1262 None

1263 >>> s.median

1264 1

1265 >>> print(s.stddev)

1266 1.5275252316519468

1267 >>> stat_stddev(dd)

1268 1.5275252316519468

1269

1270 >>> dd = [1.0, -2, 3.0]

1271 >>> s = SampleStatistics.from_samples(dd)

1272 >>> s.n

1273 3

1274 >>> s.minimum

1275 -2

1276 >>> s.maximum

1277 3

1278 >>> print(s.mean_geom)

1279 None

1280 >>> s.median

1281 1

1282 >>> print(s.stddev)

1283 2.516611478423583

1284 >>> stat_stddev(dd)

1285 2.516611478423583

1286

1287 >>> dd = [1e5, 2e7, 3e9]

1288 >>> s = SampleStatistics.from_samples(dd)

1289 >>> s.n

1290 3

1291 >>> s.minimum

1292 100000

1293 >>> s.maximum

1294 3000000000

1295 >>> print(s.mean_geom)

1296 18171205.928321395

1297 >>> (100000 * 20000000 * 3000000000) ** (1 / 3)

1298 18171205.92832138

1299 >>> 100000 * (((100000 // 100000) * (20000000 // 100000) * (

1300 ... 3000000000 // 100000)) ** (1 / 3))

1301 18171205.92832139

1302 >>> print(s.mean_geom ** 3)

1303 5.999999999999999e+21

1304 >>> print(18171205.92832139 ** 3)

1305 5.999999999999995e+21

1306 >>> s.median

1307 20000000

1308 >>> print(s.stddev)

1309 1726277112.7487035

1310 >>> stat_stddev(dd)

1311 1726277112.7487035

1312

1313 >>> dd = [3.3, 2.5, 3.7, 4.9]

1314 >>> s = SampleStatistics.from_samples(dd)

1315 >>> s.n

1316 4

1317 >>> s.minimum

1318 2.5

1319 >>> s.maximum

1320 4.9

1321 >>> print(s.mean_geom)

1322 3.4971393519216964

1323 >>> 3.4971393519216964 ** 4

1324 149.5725

1325 >>> (3.3 * 2.5 * 3.7 * 4.9) ** 0.25

1326 3.497139351921697

1327 >>> s.median

1328 3.5

1329 >>> s.stddev

1330 1.0000000000000002

1331 >>> stat_stddev(dd)

1332 1.0000000000000002

1333

1334 >>> dd = [3, 1, 2, 5]

1335 >>> s = SampleStatistics.from_samples(dd)

1336 >>> print(s.minimum)

1337 1

1338 >>> print(s.maximum)

1339 5

1340 >>> print(s.mean_arith)

1341 2.75

1342 >>> print(s.median)

1343 2.5

1344 >>> print(f"{s.mean_geom:.4f}")

1345 2.3403

1346 >>> print(f"{s.min_mean():.4f}")

1347 2.3403

1348 >>> print(f"{s.max_mean()}")

1349 2.75

1350

1351 >>> dd = [8, 8, 8, 8, 9, 10, 10, 11, 11, 12, 12, 12, 12, 13,

1352 ... 13, 13, 14, 14, 14, 15, 15, 15, 15, 15, 15, 16, 16, 16]

1353 >>> s = SampleStatistics.from_samples(dd)

1354 >>> print(s.minimum)

1355 8

1356 >>> print(s.maximum)

1357 16

1358 >>> print(s.mean_arith)

1359 12.5

1360 >>> print(s.median)

1361 13

1362 >>> print(s.mean_geom)

1363 12.197150265022891

1364 >>> stat_geomean(dd)

1365 12.19715026502289

1366 >>> print(s.stddev)

1367 2.673602092336881

1368 >>> stat_stddev(dd)

1369 2.673602092336881

1370

1371 >>> dd = [3, 4, 7, 14, 15, 16, 26, 28, 29, 30, 31, 31]

1372 >>> s = SampleStatistics.from_samples(dd)

1373 >>> print(s.minimum)

1374 3

1375 >>> print(s.maximum)

1376 31

1377 >>> print(s.mean_arith)

1378 19.5

1379 >>> print(s.median)

1380 21

1381

1382 >>> print(s.mean_geom)

1383 15.354984483655892

1384 >>> stat_geomean(dd)

1385 15.354984483655894

1386 >>> k = 1

1387 >>> for i in dd:

1388 ... k *= i

1389 >>> k

1390 171787904870400

1391 >>> len(dd)

1392 12

1393 >>> k ** (1 / 12)

1394 15.354984483655889

1395 >>> 15.354984483655889 ** 12

1396 171787904870399.62

1397 >>> 15.354984483655894 ** 12

1398 171787904870400.34

1399 >>> 15.354984483655892 ** 12

1400 171787904870400.1

1401

1402 >>> print(s.stddev)

1403 10.917042556563484

1404 >>> print(str(stat_stddev(dd))[:-1])

1405 10.91704255656348

1406

1407 >>> dd = [375977836981734264856247621159545315,

1408 ... 1041417453269301410322718941408784761,

1409 ... 2109650311556162106262064987699051941]

1410 >>> s = SampleStatistics.from_samples(dd)

1411 >>> print(s.minimum)

1412 375977836981734264856247621159545315

1413 >>> print(s.maximum)

1414 2109650311556162106262064987699051941

1415 >>> print(s.mean_arith)

1416 1175681867269065927147010516755794006

1417 >>> stat_mean(dd)

1418 1.1756818672690659e+36

1419 >>> print(s.median)

1420 1041417453269301410322718941408784761

1421

1422 >>> print(s.mean_geom)

1423 938280139276529201997232316081385153

1424 >>> stat_geomean(dd)

1425 9.38280139276522e+35

1426

1427 >>> str(dd[0] * dd[1] * dd[2])[:60]

1428 '826033329443972563356247815302467930409182372405786485790679'

1429 >>> str(938280139276529201997232316081385153 ** 3)[:60]

1430 '826033329443972563356247815302467929164458081790138679285598'

1431 >>> str(int(9.38280139276522e+35) ** 3)[:60]

1432 '826033329443953666416831847378532327244986484162191539691938'

1433

1434 >>> print(s.stddev)

1435 874600058269081159245960567663054887

1436 >>> stat_stddev(dd)

1437 8.746000582690812e+35

1438

1439 >>> dd = [104275295274308290135253194482044160663473778025704,

1440 ... 436826861307375084714000787588311944456580437896461,

1441 ... 482178404791292289021955619498303854464057392180997,

1442 ... 521745351662201002493923306143082542601267608373030,

1443 ... 676289718505789968602970820038005797309334755525626]

1444 >>> s = SampleStatistics.from_samples(dd)

1445 >>> print(s.minimum)

1446 104275295274308290135253194482044160663473778025704

1447 >>> print(s.maximum)

1448 676289718505789968602970820038005797309334755525626

1449 >>> print(s.mean_arith)

1450 444263126308193326993620745549949659898942794400364

1451 >>> stat_mean(dd)

1452 4.442631263081933e+50

1453 >>> print(s.median)

1454 482178404791292289021955619498303854464057392180997

1455

1456 >>> print(s.mean_geom)

1457 378318848166864995660791573439112525534046591591759

1458 >>> stat_geomean(dd)

1459 3.78318848166862e+50

1460

1461 >>> print(s.stddev)

1462 210311926886813737006941586539087921260462032505870

1463 >>> stat_stddev(dd)

1464 2.1031192688681374e+50

1465

1466 >>> dd = [4, 5, 5, 6, 6, 6, 6, 6, 8, 8]

1467 >>> s = SampleStatistics.from_samples(dd)

1468 >>> print(s.mean_geom)

1469 5.884283961687533

1470 >>> print(stat_geomean(dd))

1471 5.884283961687533

1472

1473 >>> dd = [4, 4, 4, 5, 5, 8]

1474 >>> s = SampleStatistics.from_samples(dd)

1475 >>> print(s.mean_geom)

1476 4.836542350243914

1477 >>> print(stat_geomean(dd))

1478 4.8365423502439135

1479

1480 >>> dd = [2, 8, 11, 17, 26, 30, 32]

1481 >>> s = SampleStatistics.from_samples(dd)

1482 >>> print(s.mean_geom)

1483 13.327348017053906

1484 >>> print(stat_geomean(dd))

1485 13.327348017053906

1486

1487 >>> dd = [2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4]

1488 >>> s = SampleStatistics.from_samples(dd)

1489 >>> print(s.mean_geom)

1490 3.4710522375429465

1491 >>> print(stat_geomean(dd))

1492 3.471052237542947

1493

1494 >>> dd = [3, 4, 4, 5, 6, 8, 8, 8, 8]

1495 >>> s = SampleStatistics.from_samples(dd)

1496 >>> print(s.mean_geom)

1497 5.653305998922543

1498 >>> print(stat_geomean(dd))

1499 5.653305998922543

1500

1501 >>> dd = [16, 17, 19, 20, 20, 21, 22, 23, 24, 24, 25, 26, 29, 31,

1502 ... 31, 31, 32, 32, 32]

1503 >>> s = SampleStatistics.from_samples(dd)

1504 >>> print(s.mean_geom)

1505 24.419566831650357

1506 >>> print(stat_geomean(dd))

1507 24.41956683165036

1508

1509 >>> dd = [66, 68, 69, 70, 72, 73, 73, 79, 81, 87, 94, 99, 100,

1510 ... 102, 103, 112, 118, 119, 123, 123]

1511 >>> s = SampleStatistics.from_samples(dd)

1512 >>> print(s.mean_geom)

1513 89.45680043258344

1514 >>> print(stat_geomean(dd))

1515 89.45680043258346

1516

1517 >>> dd = [44, 63, 63, 68, 68, 68, 70, 74, 74, 80, 95, 108, 110, 128]

1518 >>> s = SampleStatistics.from_samples(dd)

1519 >>> print(s.mean_geom)

1520 76.68646417360762

1521 >>> print(stat_geomean(dd))

1522 76.68646417360763

1523

1524 >>> try:

1525 ... SampleStatistics.from_samples(None)

1526 ... except TypeError as te:

1527 ... print(te)

1528 source should be an instance of typing.Iterable but is None.

1529

1530 >>> SampleStatistics.from_samples((int("3432135447287235494201\

153193506618248802478442\

1532545733127827402743350092428341563721880022852900744775368104117201410\

153341"), int("4543178800835483269512609282884075126142677531600199807725\

15340558561959304806690567285991174956892786401583087254156"), int("35473\

1535203294104466229269097724582630304968924904656920211268628173495602053\

1536843032960943121516556362641127137000879"))).mean_arith

1537 38408781925110551288804847071749420604746651597990567009597840581\

1538565913672301929416406528849308895284373981465359

1539

1540 Corner cases where the standard deviation resulting from compact

1541 fractions deviates from the standard deviation resulting from

1542 normalized fractions:

1543

1544 >>> dd = [-7.737125245533627e+25] * 28

1545 >>> dd[2] = -7.737125245533626e+25

1546 >>> s = SampleStatistics.from_samples(dd)

1547 >>> s.stddev

1548 1623345050.6245058

1549 >>> stat_stddev(dd)

1550 1623345050.6245058

1551 >>> ddx = tuple(map(_to_frac, dd))

1552 >>> ds = sum(ddx)

1553 >>> dss = sum(ddy * ddy for ddy in ddx)

1554 >>> from math import sqrt

1555 >>> sqrt((dss - (ds * ds / 28)) / 27)

1556 1623345050.6245055

1557

1558 Here the standard deviation becomes meaningless.

1559 If you compute it based on converting all values to floats, you get

1560 something like 0.435.

1561 You get the same result if you represent all values directly as

1562 Fractions.

1563 However, if you represent the float values as more compact Fractions,

1564 i.e., as Fractions that map to the exactly same floats but have smaller

1565 denominators, you get a standard deviation of 9.32+64.

1566 Basically, the difference is 65 orders of magnitude.

1567 But the source numbers would be exactly the same...

1568 The reason is the limited range of floats.

1569

1570 >>> dd = (7.588550360256754e+81, int("75885503602567541832791480735\

1571293707\

157229071901715047420004889892225542594864082845697"), int("758855036025675418327\

15739148073529370729071901715047420004889892225542594864082845697"), \

15747.588550360256754e+81, 7.588550360256754e+81, 7.588550360256754e+81, \

1575int("7588550360256754183279148073529370729071901715047420004889892225\

1576542594864082845696"), 7.588550360256754e+81, 7.588550360256754e+81, \

15777.588550360256754e+81, 7.588550360256754e+81, int("758855036025675418\

15783279148073529370729071901715047420004889892225542594864082845696"), int("7588\

157955036025675418327914807352937072907190171504742000488989222554259486408284569\

15807"), int("7588550360256754183279148073529370729071901715047420004889892225542\

1581594864082845696"), int("75885503602567541832791480735293707290719017150474200\

158204889892225542594864082845696"), int("758855036025675418327914807352937072907\

15831901715047420004889892225542594864082845697"), 7.588550360256754e+81,\

1584int("7588550360256754183279148073529370729071901715047420004889892225\

1585542594864082845697"), int("75885503602567541832791480735293707290719017150474\

158620004889892225542594864082845697"), int("758855036025675418327914807352937072\

15879071901715047420004889892225542594864082845697"), 7.588550360256754e+81, \

1588int("7588550360256754183279148073529370729071901715047420004889892225\

1589542594864082845696"), int("75885503602567541832791480735293707290719017150474\

159020004889892225542594864082845696"), 7.588550360256754e+81, \

15917.588550360256754e+81, int("75885503602567541832791480735293707290719\

159201715047420004889892225542594864082845696"), 7.588550360256754e+81, \

15937.588550360256754e+81, 7.588550360256754e+81)

1594 >>> s = SampleStatistics.from_samples(dd)

1595 >>> s.stddev

1596 0.4354941703556927

1597 >>> stat_stddev(dd)

1598 0.4354941703556927

1599 >>> ddx = tuple(map(_to_frac, dd))

1600 >>> ds = sum(ddx)

1601 >>> dss = sum(ddy * ddy for ddy in ddx)

1602 >>> _limited_root((dss - (ds * ds / len(dd))) / (len(dd) - 1), 2)

1603 93206175962530968626911348905791729797971161757128018983942059951

1604 >>> ddx = tuple(map(Fraction, dd))

1605 >>> ds = sum(ddx)

1606 >>> dss = sum(ddy * ddy for ddy in ddx)

1607 >>> _limited_root((dss - (ds * ds / len(dd))) / (len(dd) - 1), 2)

1608 0.4354941703556927

1609

1610 >>> try:

1611 ... SampleStatistics.from_samples(1)

1612 ... except TypeError as te:

1613 ... print(te)

1614 source should be an instance of typing.Iterable but is int, namely 1.

1615

1616 >>> try:

1617 ... SampleStatistics.from_samples([])

1618 ... except ValueError as ve:

1619 ... print(ve)

1620 Data source cannot be empty.

1621 """

1622 if not isinstance(source, Iterable):

1623 raise type_error(source, "source", Iterable)

1624

1625 # The median function of statistics would do this anyway, so we may as

1626 # well do it now.

1627 data: Final[list[int | float]] = sorted(map(try_int, (

1628 xs for xs in source if xs is not None)))

1629 n: Final[int] = list.__len__(data)

1630 if n <= 0:

1631 raise ValueError("Data source cannot be empty.")

1632

1633 minimum: int | float = data[0] # because data is now sorted

1634 maximum: int | float = data[-1] # because data is now sorted

1635 if (minimum >= maximum) or (n <= 1): # all data is the same

1636 return SampleStatistics.from_single_value(minimum, n)

1637

1638 # Compute the median.

1639 middle: Final[int] = n >> 1

1640 median: Final[int | float] = data[middle] if (n & 1) == 1 else (

1641 _mean_of_two(data[middle - 1], data[middle]))

1642

1643 # Is it possible, at this stage, that all data are integers?

1644 can_int: bool = isinstance(minimum, int) and isinstance(maximum, int)

1645

1646 # If we have only two numbers, we also already have the mean.

1647 # Otherwise, if we have only integer data so far and we know that

1648 # regardless how we dice it, the sum of the data will never exceed

1649 # the range in which floats can accurately represent integers, then

1650 # we also know that we can compute the arithmetic mean exactly.

1651 mean_arith: int | float | None = median if n <= 2 else (

1652 try_int(stat_mean(data)) if can_int and (

1653 (n * (1 + max(maximum, 0) - min(minimum, 0)))

1654 < _DBL_INT_LIMIT_P_I) else None)

1655 mean_arith_frac: Fraction | None = None

1656 mean_geom: int | float | None = None # don't know the geometric mean

1657 # Go over the data once and see if we can treat it as all-integer.

1658 # If yes, then we can compute some statistics very precisely.

1659 # are all values integers?

1660 int_sum: int = 0 # the integer sum (for mean, stddev)

1661 int_sum_sqr: int = 0 # the sum of squares (for stddev)

1662 int_sum_sqr_2: int = 0 # the sum of squares (for stddev)

1663 int_prod: int = 1 # the integer product (for geom_mean)

1664 frac_sum: Fraction = _FRAC_0

1665 frac_sum_sqr: Fraction = frac_sum

1666 frac_prod: Fraction = _FRAC_1

1667

1668 # The following is *only* used if we have *only* integer data.

1669 # stddev((a, b, ...)) = stddev((a-x, b-x, ...))

1670 # If we can shift the whole data such that its center is around 0,

1671 # then the difference that we have to add up become smaller, and thus

1672 # the floating point arithmetic that we may need to use becomes more

1673 # accurate. If we know the mean, then shifting the data by the mean

1674 # will lead to the smallest sum of deviations. If we know only the

1675 # median, then this is better than nothing.

1676 shift: Final[int] = int(median) if mean_arith is None \

1677 else (mean_arith if isinstance(mean_arith, int)

1678 else round(mean_arith))

1679

1680 for ii, ee in enumerate(data): # iterate over all data

1681 if can_int and (not isinstance(ee, int)):

1682 frac_sum = Fraction(int_sum + ii * shift)

1683 frac_sum_sqr = Fraction(int_sum_sqr_2)

1684 frac_prod = Fraction(int_prod)

1685 can_int = False

1686 if can_int: # == ee must be int

1687 int_sum_sqr_2 += ee * ee # type: ignore

1688 int_prod *= ee # type: ignore

1689 e: int = ee - shift # type: ignore

1690 int_sum += e # so we can sum exactly

1691 int_sum_sqr += e * e # and compute the sum of squares

1692 else:

1693 eef = Fraction(ee)

1694 frac_sum += eef

1695 frac_sum_sqr += eef * eef

1696 frac_prod *= eef

1697

1698 if n > 2: # mean_arith is None or an approximation

1699 mean_arith_frac = (Fraction(int_sum, n) + shift) \

1700 if can_int else (frac_sum / n)

1701 mean_arith = _from_frac(mean_arith_frac)

1702 stddev: Final[int | float] = _limited_root(((int_sum_sqr - Fraction(

1703 int_sum * int_sum, n)) if can_int else (frac_sum_sqr - (

1704 frac_sum * frac_sum / n))) / (n - 1), 2)

1705

1706 if minimum > 0: # geometric mean only defined for all-positive

1707 if can_int:

1708 frac_prod = Fraction(int_prod)

1709 # # mean_geom always <= mean_arith

1710 mean_geom = _limited_root(

1711 frac_prod, n, _to_frac(minimum), min(

1712 _to_frac(maximum), (Fraction(mean_arith) if isinstance(

1713 mean_arith, int) else Fraction(nextafter(

1714 mean_arith, inf))) if (mean_arith_frac is None)

1715 else mean_arith_frac))

1716

1717 if (mean_geom is None) and (minimum > 0):

1718 mean_geom = stat_geomean(data)

1719

1720 if mean_geom is not None:

1721 # Deal with errors that may have arisen due to

1722 # numerical imprecision.

1723 if mean_geom < minimum:

1724 if _almost_le(minimum, mean_geom):

1725 mean_geom = minimum

1726 else:

1727 raise ValueError(

1728 f"mean_geom={mean_geom} but min={minimum}")

1729 if mean_arith < mean_geom:

1730 if _almost_le(mean_geom, mean_arith):

1731 mean_geom = mean_arith

1732 else:

1733 raise ValueError(

1734 f"mean_geom={mean_geom} but mean_arith={mean_arith}")

1735

1736 return SampleStatistics(minimum=minimum, median=median,

1737 mean_arith=mean_arith, mean_geom=mean_geom,

1738 maximum=maximum, stddev=stddev, n=n)

1739

1740

1741class CsvReader(CsvReaderBase[SampleStatistics]):

1742 """

1743 A csv parser for sample statistics.

1744

1745 >>> from pycommons.io.csv import csv_read

1746 >>> csv = ["n;min;mean;med;geom;max;sd",

1747 ... "3;2;3;4;3;10;5", "6;2;;;;;0", "1;;;2;;;", "3;;;;;0;",

1748 ... "4;5;12;32;11;33;7"]

1749 >>> for p in csv_read(csv, CsvReader, CsvReader.parse_row):

1750 ... print(p)

1751 3;2;4;3;3;10;5

1752 6;2;2;2;2;2;0

1753 1;2;2;2;2;2;None

1754 3;0;0;0;None;0;0

1755 4;5;32;12;11;33;7

1756

1757 >>> csv = ["value", "1", "3", "0", "-5", "7"]

1758 >>> for p in csv_read(csv, CsvReader, CsvReader.parse_row):

1759 ... print(p)

1760 1;1;1;1;1;1;None

1761 1;3;3;3;3;3;None

1762 1;0;0;0;None;0;None

1763 1;-5;-5;-5;None;-5;None

1764 1;7;7;7;7;7;None

1765

1766 >>> csv = ["n;m;sd", "1;3;", "3;5;0"]

1767 >>> for p in csv_read(csv, CsvReader, CsvReader.parse_row):

1768 ... print(p)

1769 1;3;3;3;3;3;None

1770 3;5;5;5;5;5;0

1771

1772 >>> csv = ["n;m", "1;3", "3;5"]

1773 >>> for p in csv_read(csv, CsvReader, CsvReader.parse_row):

1774 ... print(p)

1775 1;3;3;3;3;3;None

1776 3;5;5;5;5;5;0

1777 """

1778

1779 def __init__(self, columns: dict[str, int]) -> None:

1780 """

1781 Create a CSV parser for :class:`SampleStatistics`.

1782

1783 :param columns: the columns

1784

1785 >>> try:

1786 ... CsvReader(None)

1787 ... except TypeError as te:

1788 ... print(te)

1789 columns should be an instance of dict but is None.

1790

1791 >>> try:

1792 ... CsvReader(1)

1793 ... except TypeError as te:

1794 ... print(te)

1795 columns should be an instance of dict but is int, namely 1.

1796

1797 >>> try:

1798 ... CsvReader(dict())

1799 ... except ValueError as ve:

1800 ... print(ve)

1801 No useful keys remain in {}.

1802

1803 >>> try:

1804 ... CsvReader({"a": 1, "b": 2})

1805 ... except ValueError as ve:

1806 ... print(ve)

1807 No useful keys remain in {'a': 1, 'b': 2}.

1808

1809 >>> try:

1810 ... CsvReader({KEY_N: 1, "b": 2, "c": 3})

1811 ... except ValueError as ve:

1812 ... print(ve)

1813 No useful keys remain in {'b': 2, 'c': 3}.

1814

1815 >>> try:

1816 ... CsvReader({KEY_MINIMUM: 1, "b": 2, "c": 3})

1817 ... except ValueError as ve:

1818 ... print(ve)

1819 Found strange keys in {'b': 2, 'c': 3}.

1820 """

1821 super().__init__(columns)

1822

1823 #: the index of the number of elements

1824 self.idx_n: Final[int | None] = csv_column_or_none(

1825 columns, KEY_N)

1826

1827 has: int = 0

1828 has_idx: int = -1

1829

1830 #: the index of the minimum

1831 self.__idx_min: int | None = csv_column_or_none(

1832 columns, KEY_MINIMUM)

1833 if self.__idx_min is not None:

1834 has += 1

1835 has_idx = self.__idx_min

1836

1837 #: the index for the arithmetic mean

1838 self.__idx_mean_arith: int | None = csv_column_or_none(

1839 columns, KEY_MEAN_ARITH)

1840 if self.__idx_mean_arith is not None:

1841 has += 1

1842 has_idx = self.__idx_mean_arith

1843

1844 #: the index for the median

1845 self.__idx_median: int | None = csv_column_or_none(

1846 columns, KEY_MEDIAN)

1847 if self.__idx_median is not None:

1848 has += 1

1849 has_idx = self.__idx_median

1850

1851 #: the index for the geometric mean

1852 self.__idx_mean_geom: int | None = csv_column_or_none(

1853 columns, KEY_MEAN_GEOM)

1854 if self.__idx_mean_geom is not None:

1855 has += 1

1856 has_idx = self.__idx_mean_geom

1857

1858 #: the index for the maximum

1859 self.__idx_max: int | None = csv_column_or_none(

1860 columns, KEY_MAXIMUM)

1861 if self.__idx_max is not None:

1862 has += 1

1863 has_idx = self.__idx_max

1864

1865 #: the index for the standard deviation

1866 self.__idx_sd: Final[int | None] = csv_column_or_none(

1867 columns, KEY_STDDEV)

1868

1869 if has <= 0:

1870 if dict.__len__(columns) == 1:

1871 self.__idx_min = has_idx = csv_column(

1872 columns, next(iter(columns.keys())), True)

1873 has = 1

1874 else:

1875 raise ValueError(f"No useful keys remain in {columns!r}.")

1876 if dict.__len__(columns) > 1:

1877 raise ValueError(f"Found strange keys in {columns!r}.")

1878

1879 #: is this a parser for single number statistics?

1880 self.__is_single: Final[bool] = (self.__idx_sd is None) and (has == 1)

1881

1882 if self.__is_single:

1883 self.__idx_min = self.__idx_max = self.__idx_median \

1884 = self.__idx_mean_arith = has_idx

1885

1886 def parse_row(self, data: list[str]) -> SampleStatistics:

1887 """

1888 Parse a row of data.

1889

1890 :param data: the data row

1891 :returns: the sample statistics

1892

1893 >>> cc = CsvReader({KEY_MINIMUM: 0, KEY_MEAN_ARITH: 1, KEY_MAXIMUM: 2,

1894 ... KEY_STDDEV: 3, KEY_MEDIAN: 4, KEY_MEAN_GEOM: 5,

1895 ... KEY_N: 6})

1896 >>> try:

1897 ... cc.parse_row([None, None, None, None, None, None, "5"])

1898 ... except ValueError as ve:

1899 ... print(str(ve)[:20])

1900 No value defined for

1901 """

1902 n: Final[int] = 1 if self.idx_n is None else int(data[self.idx_n])

1903 mi: int | float | None = csv_val_or_none(

1904 data, self.__idx_min, str_to_num)

1905

1906 if self.__is_single:

1907 return SampleStatistics(

1908 n=n, minimum=mi, median=mi, mean_arith=mi,

1909 mean_geom=mi if (mi > 0) or (self.__idx_mean_geom is not None)

1910 else None, maximum=mi, stddev=None if n <= 1 else 0)

1911

1912 ar: int | float | None = csv_val_or_none(

1913 data, self.__idx_mean_arith, str_to_num)

1914 me: int | float | None = csv_val_or_none(

1915 data, self.__idx_median, str_to_num)

1916 ge: int | float | None = csv_val_or_none(

1917 data, self.__idx_mean_geom, str_to_num)

1918 ma: int | float | None = csv_val_or_none(

1919 data, self.__idx_max, str_to_num)

1920 sd: int | float | None = csv_val_or_none(

1921 data, self.__idx_sd, str_to_num)

1922

1923 if mi is None:

1924 if ar is not None:

1925 mi = ar

1926 elif me is not None:

1927 mi = me

1928 elif ge is not None:

1929 mi = ge

1930 elif ma is not None:

1931 mi = ma

1932 else:

1933 raise ValueError(

1934 f"No value defined for min@{self.__idx_min}={mi}, mean@"

1935 f"{self.__idx_mean_arith}={ar}, med@{self.__idx_median}="

1936 f"{me}, gmean@{self.__idx_mean_geom}={ge}, max@"

1937 f"{self.__idx_max}={ma} defined in {data!r}.")

1938 return SampleStatistics(

1939 n=n, minimum=mi, mean_arith=mi if ar is None else ar,

1940 median=mi if me is None else me, mean_geom=(

1941 mi if mi > 0 else None) if (ge is None) else ge,

1942 maximum=mi if ma is None else ma,

1943 stddev=(0 if (n > 1) else None) if sd is None else sd)

1944

1945 def parse_optional_row(self, data: list[str] | None) \

1946 -> SampleStatistics | None:

1947 """

1948 Parse a row of data that may be empty.

1949

1950 :param data: the row of data that may be empty

1951 :returns: the sample statistic, if the row contains data, else `None`

1952

1953 >>> print(CsvReader.parse_optional_row(None, ["1"]))

1954 None

1955 >>> print(CsvReader.parse_optional_row(CsvReader({"v": 0}), ["1"]))

1956 1;1;1;1;1;1;None

1957 >>> print(CsvReader.parse_optional_row(CsvReader({"v": 0}), [""]))

1958 None

1959 """

1960 if (self is None) or (data is None):

1961 return None # trick to make this method usable pseudo-static

1962 # pylint: disable=R0916

1963 if (((self.__idx_min is not None) and (

1964 str.__len__(data[self.__idx_min]) > 0)) or (

1965 (self.__idx_mean_arith is not None) and (

1966 str.__len__(data[self.__idx_mean_arith]) > 0)) or (

1967 (self.__idx_median is not None) and (

1968 str.__len__(data[self.__idx_median]) > 0)) or (

1969 (self.__idx_mean_geom is not None) and (

1970 str.__len__(data[self.__idx_mean_geom]) > 0)) or (

1971 (self.__idx_max is not None) and (

1972 str.__len__(data[self.__idx_max]) > 0))):

1973 return self.parse_row(data)

1974 return None

1975

1976

1977class CsvWriter(CsvWriterBase[SampleStatistics]):

1978 """A class for CSV writing of :class:`SampleStatistics`."""

1979

1980 def __init__(self,

1981 data: Iterable[SampleStatistics],

1982 scope: str | None = None,

1983 n_not_needed: bool = False,

1984 what_short: str | None = None,

1985 what_long: str | None = None) -> None:

1986 """

1987 Initialize the csv writer.

1988

1989 :param data: the data to use

1990 :param scope: the prefix to be pre-pended to all columns

1991 :param n_not_needed: should we omit the `n` column?

1992 :param what_short: the short description of what the statistics is

1993 about

1994 :param what_long: the long statistics of what the statistics is about

1995

1996 >>> try:

1997 ... CsvWriter([], None, n_not_needed=None)

1998 ... except TypeError as te:

1999 ... print(te)

2000 n_not_needed should be an instance of bool but is None.

2001

2002 >>> try:

2003 ... CsvWriter([])

2004 ... except ValueError as ve:

2005 ... s = str(ve)

2006 ... print(s[s.index(' ') + 1:])

2007 CsvWriter did not see any data.

2008

2009 >>> try:

2010 ... CsvWriter([1])

2011 ... except TypeError as te:

2012 ... print(str(te)[:32])

2013 data[0] should be an instance of

2014 """

2015 super().__init__(data, scope, n_not_needed, what_short, what_long,

2016 SampleStatistics)

2017

2018

2019class _SampleStats(StreamStatisticsAggregate[SampleStatistics]):

2020 """The internal sample statistics aggregate."""

2021

2022 def __init__(self) -> None:

2023 """Initialize the stream statistics."""

2024 #: the internal data list

2025 self.__lst: Final[list[int | float]] = []

2026

2027 def reset(self) -> None:

2028 """Reset the sample statistics."""

2029 self.__lst.clear()

2030

2031 def add(self, value: int | float) -> None:

2032 """

2033 Add a value to the statistics.

2034

2035 :param value: the value

2036 """

2037 self.__lst.append(try_int(value))

2038

2039 def update(self, data: Iterable[int | float | None]) -> None:

2040 """

2041 Add a stream of data.

2042

2043 :param data: the data stream

2044 """

2045 self.__lst.extend(xs for xs in data if xs is not None)

2046

2047 def result(self) -> SampleStatistics:

2048 """

2049 Get the arithmetic mean.

2050

2051 :return: the arithmetic mean or `None` if no value was added yet

2052 """

2053 return SampleStatistics.from_samples(self.__lst)

2054

2055 def result_or_none(self) -> SampleStatistics | None:

2056 """

2057 Get the result if any data was collected, otherwise `None`.

2058

2059 :return: The return value of :meth:`result` if any data was collected,

2060 otherwise `None`

2061 """

2062 return self.result() if list.__len__(self.__lst) > 0 else None

Coverage for pycommons / math / sample_statistics.py: 98%

328 statements