Secondary normalization method. To be used when primary method fails.
(length int)
| 332 | // Secondary normalization method. |
| 333 | // To be used when primary method fails. |
| 334 | func (s *fseEncoder) normalizeCount2(length int) error { |
| 335 | const notYetAssigned = -2 |
| 336 | var ( |
| 337 | distributed uint32 |
| 338 | total = uint32(length) |
| 339 | tableLog = s.actualTableLog |
| 340 | lowThreshold = total >> tableLog |
| 341 | lowOne = (total * 3) >> (tableLog + 1) |
| 342 | ) |
| 343 | for i, cnt := range s.count[:s.symbolLen] { |
| 344 | if cnt == 0 { |
| 345 | s.norm[i] = 0 |
| 346 | continue |
| 347 | } |
| 348 | if cnt <= lowThreshold { |
| 349 | s.norm[i] = -1 |
| 350 | distributed++ |
| 351 | total -= cnt |
| 352 | continue |
| 353 | } |
| 354 | if cnt <= lowOne { |
| 355 | s.norm[i] = 1 |
| 356 | distributed++ |
| 357 | total -= cnt |
| 358 | continue |
| 359 | } |
| 360 | s.norm[i] = notYetAssigned |
| 361 | } |
| 362 | toDistribute := (1 << tableLog) - distributed |
| 363 | |
| 364 | if (total / toDistribute) > lowOne { |
| 365 | // risk of rounding to zero |
| 366 | lowOne = (total * 3) / (toDistribute * 2) |
| 367 | for i, cnt := range s.count[:s.symbolLen] { |
| 368 | if (s.norm[i] == notYetAssigned) && (cnt <= lowOne) { |
| 369 | s.norm[i] = 1 |
| 370 | distributed++ |
| 371 | total -= cnt |
| 372 | continue |
| 373 | } |
| 374 | } |
| 375 | toDistribute = (1 << tableLog) - distributed |
| 376 | } |
| 377 | if distributed == uint32(s.symbolLen)+1 { |
| 378 | // all values are pretty poor; |
| 379 | // probably incompressible data (should have already been detected); |
| 380 | // find max, then give all remaining points to max |
| 381 | var maxV int |
| 382 | var maxC uint32 |
| 383 | for i, cnt := range s.count[:s.symbolLen] { |
| 384 | if cnt > maxC { |
| 385 | maxV = i |
| 386 | maxC = cnt |
| 387 | } |
| 388 | } |
| 389 | s.norm[maxV] += int16(toDistribute) |
| 390 | return nil |
| 391 | } |