Function compare_probs

numpy_ml/plots/ngram_plots.py:31–77 · view source on GitHub ↗

(fp, N)

Source from the content-addressed store, hash-verified

29
30
31	def compare_probs(fp, N):
32	MLE = MLENGram(N, unk=False, filter_punctuation=False, filter_stopwords=False)
33	MLE.train(fp, encoding="utf-8-sig")
34
35	add_y, mle_y, gtt_y = [], [], []
36	addu_y, mleu_y, gttu_y = [], [], []
37	seen = ("<bol>", "the")
38	unseen = ("<bol>", "asdf")
39
40	GTT = GoodTuringNGram(
41	N, conf=1.96, unk=False, filter_stopwords=False, filter_punctuation=False
42	)
43	GTT.train(fp, encoding="utf-8-sig")
44
45	gtt_prob = GTT.log_prob(seen, N)
46	gtt_prob_u = GTT.log_prob(unseen, N)
47
48	for K in np.linspace(0, 10, 20):
49	ADD = AdditiveNGram(
50	N, K, unk=False, filter_punctuation=False, filter_stopwords=False
51	)
52	ADD.train(fp, encoding="utf-8-sig")
53
54	add_prob = ADD.log_prob(seen, N)
55	mle_prob = MLE.log_prob(seen, N)
56
57	add_y.append(add_prob)
58	mle_y.append(mle_prob)
59	gtt_y.append(gtt_prob)
60
61	mle_prob_u = MLE.log_prob(unseen, N)
62	add_prob_u = ADD.log_prob(unseen, N)
63
64	addu_y.append(add_prob_u)
65	mleu_y.append(mle_prob_u)
66	gttu_y.append(gtt_prob_u)
67
68	plt.plot(np.linspace(0, 10, 20), add_y, label="Additive (seen ngram)")
69	plt.plot(np.linspace(0, 10, 20), addu_y, label="Additive (unseen ngram)")
70	# plt.plot(np.linspace(0, 10, 20), gtt_y, label="Good-Turing (seen ngram)")
71	# plt.plot(np.linspace(0, 10, 20), gttu_y, label="Good-Turing (unseen ngram)")
72	plt.plot(np.linspace(0, 10, 20), mle_y, "--", label="MLE (seen ngram)")
73	plt.xlabel("K")
74	plt.ylabel("log P(sequence)")
75	plt.legend()
76	plt.savefig("img/add_smooth.png")
77	plt.close("all")
78
79
80	def plot_gt_freqs(fp):

nothing calls this directly

trainMethod · 0.95

log_probMethod · 0.95

MLENGramClass · 0.90

GoodTuringNGramClass · 0.90

AdditiveNGramClass · 0.90

trainMethod · 0.45

no test coverage detected