Function get_normalized_data

ann_class2/util.py:117–145 · view source on GitHub ↗

()

Source from the content-addressed store, hash-verified

115
116
117	def get_normalized_data():
118	print("Reading in and transforming data...")
119
120	if not os.path.exists('../large_files/train.csv'):
121	print('Looking for ../large_files/train.csv')
122	print('You have not downloaded the data and/or not placed the files in the correct location.')
123	print('Please get the data from: https://www.kaggle.com/c/digit-recognizer')
124	print('Place train.csv in the folder large_files adjacent to the class folder')
125	exit()
126
127	df = pd.read_csv('../large_files/train.csv')
128	data = df.values.astype(np.float32)
129	np.random.shuffle(data)
130	X = data[:, 1:]
131	Y = data[:, 0]
132
133	Xtrain = X[:-1000]
134	Ytrain = Y[:-1000]
135	Xtest = X[-1000:]
136	Ytest = Y[-1000:]
137
138	# normalize the data
139	mu = Xtrain.mean(axis=0)
140	std = Xtrain.std(axis=0)
141	np.place(std, std == 0, 1)
142	Xtrain = (Xtrain - mu) / std
143	Xtest = (Xtest - mu) / std
144
145	return Xtrain, Xtest, Ytrain, Ytest
146
147
148	def plot_cumulative_variance(pca):

mainFunction · 0.90

pytorch_dropout.pyFile · 0.90

mainFunction · 0.90

pytorch_batchnorm.pyFile · 0.90

mainFunction · 0.90

keras_functional.pyFile · 0.90

pytorch_example2.pyFile · 0.90

mainFunction · 0.90

no outgoing calls

no test coverage detected