| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104 |
- const b4a = require('b4a')
- /**
- * https://encoding.spec.whatwg.org/#utf-8-decoder
- */
- module.exports = class UTF8Decoder {
- constructor () {
- this.codePoint = 0
- this.bytesSeen = 0
- this.bytesNeeded = 0
- this.lowerBoundary = 0x80
- this.upperBoundary = 0xbf
- }
- get remaining () {
- return this.bytesSeen
- }
- decode (data) {
- // If we have a fast path, just sniff if the last part is a boundary
- if (this.bytesNeeded === 0) {
- let isBoundary = true
- for (let i = Math.max(0, data.byteLength - 4), n = data.byteLength; i < n && isBoundary; i++) {
- isBoundary = data[i] <= 0x7f
- }
- if (isBoundary) return b4a.toString(data, 'utf8')
- }
- let result = ''
- for (let i = 0, n = data.byteLength; i < n; i++) {
- const byte = data[i]
- if (this.bytesNeeded === 0) {
- if (byte <= 0x7f) {
- result += String.fromCharCode(byte)
- } else {
- this.bytesSeen = 1
- if (byte >= 0xc2 && byte <= 0xdf) {
- this.bytesNeeded = 2
- this.codePoint = byte & 0x1f
- } else if (byte >= 0xe0 && byte <= 0xef) {
- if (byte === 0xe0) this.lowerBoundary = 0xa0
- else if (byte === 0xed) this.upperBoundary = 0x9f
- this.bytesNeeded = 3
- this.codePoint = byte & 0xf
- } else if (byte >= 0xf0 && byte <= 0xf4) {
- if (byte === 0xf0) this.lowerBoundary = 0x90
- if (byte === 0xf4) this.upperBoundary = 0x8f
- this.bytesNeeded = 4
- this.codePoint = byte & 0x7
- } else {
- result += '\ufffd'
- }
- }
- continue
- }
- if (byte < this.lowerBoundary || byte > this.upperBoundary) {
- this.codePoint = 0
- this.bytesNeeded = 0
- this.bytesSeen = 0
- this.lowerBoundary = 0x80
- this.upperBoundary = 0xbf
- result += '\ufffd'
- continue
- }
- this.lowerBoundary = 0x80
- this.upperBoundary = 0xbf
- this.codePoint = (this.codePoint << 6) | (byte & 0x3f)
- this.bytesSeen++
- if (this.bytesSeen !== this.bytesNeeded) continue
- result += String.fromCodePoint(this.codePoint)
- this.codePoint = 0
- this.bytesNeeded = 0
- this.bytesSeen = 0
- }
- return result
- }
- flush () {
- const result = this.bytesNeeded > 0 ? '\ufffd' : ''
- this.codePoint = 0
- this.bytesNeeded = 0
- this.bytesSeen = 0
- this.lowerBoundary = 0x80
- this.upperBoundary = 0xbf
- return result
- }
- }
|