MCPcopy Index your code
hub / github.com/tensorflow/tfjs / computeDatasetStatistics

Function computeDatasetStatistics

tfjs-data/src/statistics.ts:106–212  ·  view source on GitHub ↗
(
    dataset: Dataset<TabularRecord>, sampleSize?: number,
    shuffleWindowSize?: number)

Source from the content-addressed store, hash-verified

104 * each column.
105 */
106export async function computeDatasetStatistics(
107 dataset: Dataset<TabularRecord>, sampleSize?: number,
108 shuffleWindowSize?: number): Promise<DatasetStatistics> {
109 let sampleDataset = dataset;
110 // TODO(soergel): allow for deep shuffle where possible.
111 if (shuffleWindowSize != null) {
112 sampleDataset = sampleDataset.shuffle(shuffleWindowSize);
113 }
114 if (sampleSize != null) {
115 sampleDataset = sampleDataset.take(sampleSize);
116 }
117
118 // TODO(soergel): prepare the column objects based on a schema.
119 const result: DatasetStatistics = {};
120
121 await sampleDataset.forEachAsync(e => {
122 for (const key of Object.keys(e)) {
123 const value = e[key];
124 if (typeof (value) === 'string') {
125 // No statistics for string element.
126 } else {
127 let previousMean = 0;
128 let previousLength = 0;
129 let previousVariance = 0;
130 let columnStats: NumericColumnStatistics = result[key];
131 if (columnStats == null) {
132 columnStats = {
133 min: Number.POSITIVE_INFINITY,
134 max: Number.NEGATIVE_INFINITY,
135 mean: 0,
136 variance: 0,
137 stddev: 0,
138 length: 0
139 };
140 result[key] = columnStats;
141 } else {
142 previousMean = columnStats.mean;
143 previousLength = columnStats.length;
144 previousVariance = columnStats.variance;
145 }
146 let recordMin: number;
147 let recordMax: number;
148
149 // Calculate accumulated mean and variance following tf.Transform
150 // implementation
151 let valueLength = 0;
152 let valueMean = 0;
153 let valueVariance = 0;
154 let combinedLength = 0;
155 let combinedMean = 0;
156 let combinedVariance = 0;
157
158 if (value instanceof tf.Tensor) {
159 recordMin = min(value).dataSync()[0];
160 recordMax = max(value).dataSync()[0];
161 const valueMoment = tf.moments(value);
162 valueMean = valueMoment.mean.dataSync()[0];
163 valueVariance = valueMoment.variance.dataSync()[0];

Callers 1

statistics_test.tsFile · 0.90

Calls 6

minMethod · 0.80
maxMethod · 0.80
sqrtMethod · 0.80
dataSyncMethod · 0.65
minFunction · 0.50
maxFunction · 0.50

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…