(key, value)
| 9 | import numpy |
| 10 | |
| 11 | def map(key, value): |
| 12 | # input key= class for one training example, e.g. "-1.0" |
| 13 | classes = [float(item) for item in key.split(",")] # e.g. [-1.0] |
| 14 | D = numpy.diag(classes) |
| 15 | |
| 16 | # input value = feature vector for one training example, e.g. "3.0, 7.0, 2.0" |
| 17 | featurematrix = [float(item) for item in value.split(",")] |
| 18 | A = numpy.matrix(featurematrix) |
| 19 | |
| 20 | # create matrix E and vector e |
| 21 | e = numpy.matrix(numpy.ones(len(A)).reshape(len(A), 1)) |
| 22 | E = numpy.matrix(numpy.append(A, -e, axis=1)) |
| 23 | |
| 24 | # create a tuple with the values to be used by reducer |
| 25 | # and encode it with base64 to avoid potential trouble with '\t' and '\n' used |
| 26 | # as default separators in Hadoop Streaming |
| 27 | producedvalue = base64.b64encode(pickle.dumps( (E.T*E, E.T*D*e)) |
| 28 | |
| 29 | # note: a single constant key "producedkey" sends to only one reducer |
| 30 | # somewhat "atypical" due to low degree of parallism on reducer side |
| 31 | print "producedkey\t%s" % (producedvalue) |
| 32 | |
| 33 | def reduce(key, values, mu=0.1): |
| 34 | sumETE = None |
| 35 | sumETDe = None |
| 36 |
no outgoing calls
no test coverage detected