A demonstration of frequency distributions and probability distributions. This demonstration creates three frequency distributions with, and uses them to sample a random process with ``numsamples`` samples. Each frequency distribution is sampled ``numoutcomes`` times. These t
(numsamples=6, numoutcomes=500)
| 2454 | |
| 2455 | |
| 2456 | def demo(numsamples=6, numoutcomes=500): |
| 2457 | """ |
| 2458 | A demonstration of frequency distributions and probability |
| 2459 | distributions. This demonstration creates three frequency |
| 2460 | distributions with, and uses them to sample a random process with |
| 2461 | ``numsamples`` samples. Each frequency distribution is sampled |
| 2462 | ``numoutcomes`` times. These three frequency distributions are |
| 2463 | then used to build six probability distributions. Finally, the |
| 2464 | probability estimates of these distributions are compared to the |
| 2465 | actual probability of each sample. |
| 2466 | |
| 2467 | :type numsamples: int |
| 2468 | :param numsamples: The number of samples to use in each demo |
| 2469 | frequency distributions. |
| 2470 | :type numoutcomes: int |
| 2471 | :param numoutcomes: The total number of outcomes for each |
| 2472 | demo frequency distribution. These outcomes are divided into |
| 2473 | ``numsamples`` bins. |
| 2474 | :rtype: None |
| 2475 | """ |
| 2476 | |
| 2477 | # Randomly sample a stochastic process three times. |
| 2478 | fdist1 = _create_rand_fdist(numsamples, numoutcomes) |
| 2479 | fdist2 = _create_rand_fdist(numsamples, numoutcomes) |
| 2480 | fdist3 = _create_rand_fdist(numsamples, numoutcomes) |
| 2481 | |
| 2482 | # Use our samples to create probability distributions. |
| 2483 | pdists = [ |
| 2484 | MLEProbDist(fdist1), |
| 2485 | LidstoneProbDist(fdist1, 0.5, numsamples), |
| 2486 | HeldoutProbDist(fdist1, fdist2, numsamples), |
| 2487 | HeldoutProbDist(fdist2, fdist1, numsamples), |
| 2488 | CrossValidationProbDist([fdist1, fdist2, fdist3], numsamples), |
| 2489 | SimpleGoodTuringProbDist(fdist1), |
| 2490 | SimpleGoodTuringProbDist(fdist1, 7), |
| 2491 | _create_sum_pdist(numsamples), |
| 2492 | ] |
| 2493 | |
| 2494 | # Find the probability of each sample. |
| 2495 | vals = [] |
| 2496 | for n in range(1, numsamples + 1): |
| 2497 | vals.append(tuple([n, fdist1.freq(n)] + [pdist.prob(n) for pdist in pdists])) |
| 2498 | |
| 2499 | # Print the results in a formatted table. |
| 2500 | print( |
| 2501 | "%d samples (1-%d); %d outcomes were sampled for each FreqDist" |
| 2502 | % (numsamples, numsamples, numoutcomes) |
| 2503 | ) |
| 2504 | print("=" * 9 * (len(pdists) + 2)) |
| 2505 | FORMATSTR = " FreqDist " + "%8s " * (len(pdists) - 1) + "| Actual" |
| 2506 | print(FORMATSTR % tuple(repr(pdist)[1:9] for pdist in pdists[:-1])) |
| 2507 | print("-" * 9 * (len(pdists) + 2)) |
| 2508 | FORMATSTR = "%3d %8.6f " + "%8.6f " * (len(pdists) - 1) + "| %8.6f" |
| 2509 | for val in vals: |
| 2510 | print(FORMATSTR % val) |
| 2511 | |
| 2512 | # Print the totals for each column (should all be 1.0) |
| 2513 | zvals = list(zip(*vals)) |
no test coverage detected
searching dependent graphs…