| 36 | |
| 37 | |
| 38 | def get_test_data(sample_size=1000, embedding_size=4, sparse_feature_num=1, dense_feature_num=1, |
| 39 | sequence_feature=None, classification=True, include_length=False, |
| 40 | hash_flag=False, prefix='', use_group=False): |
| 41 | if sequence_feature is None: |
| 42 | sequence_feature = ['sum', 'mean', 'max', 'weight'] |
| 43 | feature_columns = [] |
| 44 | model_input = {} |
| 45 | |
| 46 | if 'weight' in sequence_feature: |
| 47 | feature_columns.append( |
| 48 | VarLenSparseFeat(SparseFeat(prefix + "weighted_seq", vocabulary_size=2, embedding_dim=embedding_size), |
| 49 | maxlen=3, length_name=prefix + "weighted_seq" + "_seq_length", |
| 50 | weight_name=prefix + "weight")) |
| 51 | s_input, s_len_input = gen_sequence( |
| 52 | 2, 3, sample_size) |
| 53 | |
| 54 | model_input[prefix + "weighted_seq"] = s_input |
| 55 | model_input[prefix + 'weight'] = np.random.randn(sample_size, 3, 1) |
| 56 | model_input[prefix + "weighted_seq" + "_seq_length"] = s_len_input |
| 57 | sequence_feature.pop(sequence_feature.index('weight')) |
| 58 | |
| 59 | for i in range(sparse_feature_num): |
| 60 | if use_group: |
| 61 | group_name = str(i % 3) |
| 62 | else: |
| 63 | group_name = DEFAULT_GROUP_NAME |
| 64 | dim = np.random.randint(1, 10) |
| 65 | feature_columns.append( |
| 66 | SparseFeat(prefix + 'sparse_feature_' + str(i), dim, embedding_size, use_hash=hash_flag, dtype=tf.int32, |
| 67 | group_name=group_name)) |
| 68 | |
| 69 | for i in range(dense_feature_num): |
| 70 | def transform_fn(x): return (x - 0.0) / 1.0 |
| 71 | |
| 72 | feature_columns.append( |
| 73 | DenseFeat( |
| 74 | prefix + 'dense_feature_' + str(i), |
| 75 | 1, |
| 76 | dtype=tf.float32, |
| 77 | transform_fn=transform_fn |
| 78 | ) |
| 79 | ) |
| 80 | for i, mode in enumerate(sequence_feature): |
| 81 | dim = np.random.randint(1, 10) |
| 82 | maxlen = np.random.randint(1, 10) |
| 83 | feature_columns.append( |
| 84 | VarLenSparseFeat(SparseFeat(prefix + 'sequence_' + mode, vocabulary_size=dim, embedding_dim=embedding_size), |
| 85 | maxlen=maxlen, combiner=mode)) |
| 86 | |
| 87 | for fc in feature_columns: |
| 88 | if isinstance(fc, SparseFeat): |
| 89 | model_input[fc.name] = np.random.randint(0, fc.vocabulary_size, sample_size) |
| 90 | elif isinstance(fc, DenseFeat): |
| 91 | model_input[fc.name] = np.random.random(sample_size) |
| 92 | else: |
| 93 | s_input, s_len_input = gen_sequence( |
| 94 | fc.vocabulary_size, fc.maxlen, sample_size) |
| 95 | model_input[fc.name] = s_input |