MCPcopy
hub / github.com/ddbourgin/numpy-ml / compare_probs

Function compare_probs

numpy_ml/plots/ngram_plots.py:31–77  ·  view source on GitHub ↗
(fp, N)

Source from the content-addressed store, hash-verified

29
30
31def compare_probs(fp, N):
32 MLE = MLENGram(N, unk=False, filter_punctuation=False, filter_stopwords=False)
33 MLE.train(fp, encoding="utf-8-sig")
34
35 add_y, mle_y, gtt_y = [], [], []
36 addu_y, mleu_y, gttu_y = [], [], []
37 seen = ("<bol>", "the")
38 unseen = ("<bol>", "asdf")
39
40 GTT = GoodTuringNGram(
41 N, conf=1.96, unk=False, filter_stopwords=False, filter_punctuation=False
42 )
43 GTT.train(fp, encoding="utf-8-sig")
44
45 gtt_prob = GTT.log_prob(seen, N)
46 gtt_prob_u = GTT.log_prob(unseen, N)
47
48 for K in np.linspace(0, 10, 20):
49 ADD = AdditiveNGram(
50 N, K, unk=False, filter_punctuation=False, filter_stopwords=False
51 )
52 ADD.train(fp, encoding="utf-8-sig")
53
54 add_prob = ADD.log_prob(seen, N)
55 mle_prob = MLE.log_prob(seen, N)
56
57 add_y.append(add_prob)
58 mle_y.append(mle_prob)
59 gtt_y.append(gtt_prob)
60
61 mle_prob_u = MLE.log_prob(unseen, N)
62 add_prob_u = ADD.log_prob(unseen, N)
63
64 addu_y.append(add_prob_u)
65 mleu_y.append(mle_prob_u)
66 gttu_y.append(gtt_prob_u)
67
68 plt.plot(np.linspace(0, 10, 20), add_y, label="Additive (seen ngram)")
69 plt.plot(np.linspace(0, 10, 20), addu_y, label="Additive (unseen ngram)")
70 # plt.plot(np.linspace(0, 10, 20), gtt_y, label="Good-Turing (seen ngram)")
71 # plt.plot(np.linspace(0, 10, 20), gttu_y, label="Good-Turing (unseen ngram)")
72 plt.plot(np.linspace(0, 10, 20), mle_y, "--", label="MLE (seen ngram)")
73 plt.xlabel("K")
74 plt.ylabel("log P(sequence)")
75 plt.legend()
76 plt.savefig("img/add_smooth.png")
77 plt.close("all")
78
79
80def plot_gt_freqs(fp):

Callers

nothing calls this directly

Calls 8

trainMethod · 0.95
log_probMethod · 0.95
log_probMethod · 0.95
log_probMethod · 0.95
MLENGramClass · 0.90
GoodTuringNGramClass · 0.90
AdditiveNGramClass · 0.90
trainMethod · 0.45

Tested by

no test coverage detected