MCPcopy
hub / github.com/feross/buffer / utf8ToBytes

Function utf8ToBytes

index.js:1954–2032  ·  view source on GitHub ↗
(string, units)

Source from the content-addressed store, hash-verified

1952}
1953
1954function utf8ToBytes (string, units) {
1955 units = units || Infinity
1956 let codePoint
1957 const length = string.length
1958 let leadSurrogate = null
1959 const bytes = []
1960
1961 for (let i = 0; i < length; ++i) {
1962 codePoint = string.charCodeAt(i)
1963
1964 // is surrogate component
1965 if (codePoint > 0xD7FF && codePoint < 0xE000) {
1966 // last char was a lead
1967 if (!leadSurrogate) {
1968 // no lead yet
1969 if (codePoint > 0xDBFF) {
1970 // unexpected trail
1971 if ((units -= 3) > -1) bytes.push(0xEF, 0xBF, 0xBD)
1972 continue
1973 } else if (i + 1 === length) {
1974 // unpaired lead
1975 if ((units -= 3) > -1) bytes.push(0xEF, 0xBF, 0xBD)
1976 continue
1977 }
1978
1979 // valid lead
1980 leadSurrogate = codePoint
1981
1982 continue
1983 }
1984
1985 // 2 leads in a row
1986 if (codePoint < 0xDC00) {
1987 if ((units -= 3) > -1) bytes.push(0xEF, 0xBF, 0xBD)
1988 leadSurrogate = codePoint
1989 continue
1990 }
1991
1992 // valid surrogate pair
1993 codePoint = (leadSurrogate - 0xD800 << 10 | codePoint - 0xDC00) + 0x10000
1994 } else if (leadSurrogate) {
1995 // valid bmp char, but last char was a lead
1996 if ((units -= 3) > -1) bytes.push(0xEF, 0xBF, 0xBD)
1997 }
1998
1999 leadSurrogate = null
2000
2001 // encode utf8
2002 if (codePoint < 0x80) {
2003 if ((units -= 1) < 0) break
2004 bytes.push(codePoint)
2005 } else if (codePoint < 0x800) {
2006 if ((units -= 2) < 0) break
2007 bytes.push(
2008 codePoint >> 0x6 | 0xC0,
2009 codePoint & 0x3F | 0x80
2010 )
2011 } else if (codePoint < 0x10000) {

Callers 2

byteLengthFunction · 0.85
utf8WriteFunction · 0.85

Calls

no outgoing calls

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…