Creates an `input_fn` closure to be passed to TPUEstimator.
(input_files,
seq_length,
is_training,
num_cpu_threads=8,
evaluate_for_fixed_number_of_steps=True)
| 38 | |
| 39 | |
| 40 | def input_fn_builder(input_files, |
| 41 | seq_length, |
| 42 | is_training, |
| 43 | num_cpu_threads=8, |
| 44 | evaluate_for_fixed_number_of_steps=True): |
| 45 | """Creates an `input_fn` closure to be passed to TPUEstimator.""" |
| 46 | |
| 47 | def input_fn(params): |
| 48 | """The actual input function.""" |
| 49 | batch_size = params["batch_size"] |
| 50 | name_to_features = { |
| 51 | "input_ids": tf.FixedLenFeature([seq_length], tf.int64), |
| 52 | } |
| 53 | # For training, we want a lot of parallel reading and shuffling. |
| 54 | # For eval, we want no shuffling and parallel reading doesn't matter. |
| 55 | |
| 56 | d = tf.data.TFRecordDataset(input_files) |
| 57 | # If we evaluate for a fixed number of steps we don't want to encounter |
| 58 | # out-of-range exceptions. |
| 59 | if evaluate_for_fixed_number_of_steps: |
| 60 | d = d.repeat() |
| 61 | |
| 62 | # We must `drop_remainder` on training because the TPU requires fixed |
| 63 | # size dimensions. For eval, we assume we are evaluating on the CPU or GPU |
| 64 | # and we *don't* want to drop the remainder, otherwise we wont cover |
| 65 | # every sample. |
| 66 | #d = d.apply( |
| 67 | # tf.data.experimental.map_and_batch( |
| 68 | # lambda record: _decode_record(record, name_to_features), |
| 69 | # batch_size=batch_size, |
| 70 | # num_parallel_batches=num_cpu_threads, |
| 71 | # drop_remainder=True)) |
| 72 | print("the actual lens of data is>>>>>>>>>>>>>>>>>>>>>>>>>>>> ", d) |
| 73 | return d |
| 74 | |
| 75 | return input_fn |
| 76 | |
| 77 | |
| 78 | # ~~~~~~~~~~~~~~ This is for classification / AF ~~~~~~~~~~~~~~~~~~ |