NewLineArray returns a new line array from an array of bytes
(size uint64, endings FileFormat, reader io.Reader)
| 96 | |
| 97 | // NewLineArray returns a new line array from an array of bytes |
| 98 | func NewLineArray(size uint64, endings FileFormat, reader io.Reader) *LineArray { |
| 99 | la := new(LineArray) |
| 100 | |
| 101 | la.lines = make([]Line, 0, 1000) |
| 102 | la.initsize = size |
| 103 | |
| 104 | br := bufio.NewReader(reader) |
| 105 | var loaded int |
| 106 | |
| 107 | la.Endings = endings |
| 108 | |
| 109 | n := 0 |
| 110 | for { |
| 111 | data, err := br.ReadBytes('\n') |
| 112 | // Detect the line ending by checking to see if there is a '\r' char |
| 113 | // before the '\n' |
| 114 | // Even if the file format is set to DOS, the '\r' is removed so |
| 115 | // that all lines end with '\n' |
| 116 | dlen := len(data) |
| 117 | if dlen > 1 && data[dlen-2] == '\r' { |
| 118 | data = append(data[:dlen-2], '\n') |
| 119 | if la.Endings == FFAuto { |
| 120 | la.Endings = FFDos |
| 121 | } |
| 122 | dlen = len(data) |
| 123 | } else if dlen > 0 { |
| 124 | if la.Endings == FFAuto { |
| 125 | la.Endings = FFUnix |
| 126 | } |
| 127 | } |
| 128 | |
| 129 | // If we are loading a large file (greater than 1000) we use the file |
| 130 | // size and the length of the first 1000 lines to try to estimate |
| 131 | // how many lines will need to be allocated for the rest of the file |
| 132 | // We add an extra 10000 to the original estimate to be safe and give |
| 133 | // plenty of room for expansion |
| 134 | if n >= 1000 && loaded >= 0 { |
| 135 | totalLinesNum := int(float64(size) * (float64(n) / float64(loaded))) |
| 136 | newSlice := make([]Line, len(la.lines), totalLinesNum+10000) |
| 137 | copy(newSlice, la.lines) |
| 138 | la.lines = newSlice |
| 139 | loaded = -1 |
| 140 | } |
| 141 | |
| 142 | // Counter for the number of bytes in the first 1000 lines |
| 143 | if loaded >= 0 { |
| 144 | loaded += dlen |
| 145 | } |
| 146 | |
| 147 | if err != nil { |
| 148 | if err == io.EOF { |
| 149 | la.lines = Append(la.lines, Line{ |
| 150 | data: data, |
| 151 | state: nil, |
| 152 | match: nil, |
| 153 | }) |
| 154 | } |
| 155 | // Last line was read |