Function classify

torbot/modules/nlp/main.py:13–41 · view source on GitHub ↗

Classify URL specified by user

(data)

Source from the content-addressed store, hash-verified

11
12
13	def classify(data):
14	"""
15	Classify URL specified by user
16	"""
17	soup = BeautifulSoup(data, features='html.parser')
18	html = soup.get_text()
19
20	# create classifier
21	clf = Pipeline([('vect', CountVectorizer()), ('tfidf', TfidfTransformer()), ('clf', SGDClassifier())])
22	try:
23	os.chdir(Path(__file__).parent)
24
25	dataset = load_files('training_data')
26	except FileNotFoundError:
27	print("Training data not found. Obtaining training data...")
28	print("This may take a while...")
29	from .gather_data import write_data
30	write_data()
31	print("Training data obtained.")
32	dataset = load_files('training_data')
33	pass
34	x_train, x_test, y_train, y_test = train_test_split(dataset.data, dataset.target)
35	clf.fit(x_train, y_train)
36
37	# returns an array of target_name values
38	predicted = clf.predict([html])
39	accuracy = np.mean(predicted == y_test)
40
41	return [dataset.target_names[predicted[0]], accuracy]

_append_nodeMethod · 0.85

write_dataFunction · 0.85

no test coverage detected