MCPcopy
hub / github.com/shenweichen/DeepCTR-Torch / get_test_data

Function get_test_data

tests/utils.py:26–75  ·  view source on GitHub ↗
(sample_size=1000, embedding_size=4, sparse_feature_num=1, dense_feature_num=1,
                  sequence_feature=['sum', 'mean', 'max'], classification=True, include_length=False,
                  hash_flag=False, prefix='')

Source from the content-addressed store, hash-verified

24
25
26def get_test_data(sample_size=1000, embedding_size=4, sparse_feature_num=1, dense_feature_num=1,
27 sequence_feature=['sum', 'mean', 'max'], classification=True, include_length=False,
28 hash_flag=False, prefix=''):
29 feature_columns = []
30 model_input = {}
31
32 if 'weight' in sequence_feature:
33 feature_columns.append(
34 VarLenSparseFeat(SparseFeat(prefix + "weighted_seq", vocabulary_size=2, embedding_dim=embedding_size),
35 maxlen=3, length_name=prefix + "weighted_seq" + "_seq_length",
36 weight_name=prefix + "weight"))
37 s_input, s_len_input = gen_sequence(
38 2, 3, sample_size)
39
40 model_input[prefix + "weighted_seq"] = s_input
41 model_input[prefix + 'weight'] = np.random.randn(sample_size, 3, 1)
42 model_input[prefix + "weighted_seq" + "_seq_length"] = s_len_input
43 sequence_feature.pop(sequence_feature.index('weight'))
44
45 for i in range(sparse_feature_num):
46 dim = np.random.randint(1, 10)
47 feature_columns.append(SparseFeat(prefix + 'sparse_feature_' + str(i), dim, embedding_size, dtype=torch.int32))
48 for i in range(dense_feature_num):
49 feature_columns.append(DenseFeat(prefix + 'dense_feature_' + str(i), 1, dtype=torch.float32))
50 for i, mode in enumerate(sequence_feature):
51 dim = np.random.randint(1, 10)
52 maxlen = np.random.randint(1, 10)
53 feature_columns.append(
54 VarLenSparseFeat(SparseFeat(prefix + 'sequence_' + mode, vocabulary_size=dim, embedding_dim=embedding_size),
55 maxlen=maxlen, combiner=mode))
56
57 for fc in feature_columns:
58 if isinstance(fc, SparseFeat):
59 model_input[fc.name] = np.random.randint(0, fc.vocabulary_size, sample_size)
60 elif isinstance(fc, DenseFeat):
61 model_input[fc.name] = np.random.random(sample_size)
62 else:
63 s_input, s_len_input = gen_sequence(
64 fc.vocabulary_size, fc.maxlen, sample_size)
65 model_input[fc.name] = s_input
66 if include_length:
67 fc.length_name = prefix + "sequence_" + str(i) + '_seq_length'
68 model_input[prefix + "sequence_" + str(i) + '_seq_length'] = s_len_input
69
70 if classification:
71 y = np.random.randint(0, 2, sample_size)
72 else:
73 y = np.random.random(sample_size)
74
75 return model_input, y, feature_columns
76
77
78def layer_test(layer_cls, kwargs={}, input_shape=None,

Callers 15

test_CCPMFunction · 0.90
test_CCPM_without_seqFunction · 0.90
test_AFNFunction · 0.90
test_FiBiNETFunction · 0.85
test_AFMFunction · 0.85
test_DCNFunction · 0.85
test_DCNMixFunction · 0.85
test_WDLFunction · 0.85
test_PNNFunction · 0.85
test_NFMFunction · 0.85
test_xDeepFMFunction · 0.85
test_AutoIntFunction · 0.85

Calls 5

VarLenSparseFeatClass · 0.90
SparseFeatClass · 0.90
DenseFeatClass · 0.90
appendMethod · 0.80
gen_sequenceFunction · 0.70

Tested by 15

test_CCPMFunction · 0.72
test_CCPM_without_seqFunction · 0.72
test_AFNFunction · 0.72
test_FiBiNETFunction · 0.68
test_AFMFunction · 0.68
test_DCNFunction · 0.68
test_DCNMixFunction · 0.68
test_WDLFunction · 0.68
test_PNNFunction · 0.68
test_NFMFunction · 0.68
test_xDeepFMFunction · 0.68
test_AutoIntFunction · 0.68