You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

295 lines
6.4 KiB
Go

package stt
import (
"fmt"
"io"
)
// ValueType the type for JSON element
type ValueType int
const (
// InvalidValue invalid element
InvalidValue ValueType = iota
// StringValue element "string"
StringValue
// NumberValue element 100 or 0.10
NumberValue
// NilValue element null
NilValue
// BoolValue element true or false
BoolValue
)
var valueTypes []ValueType
func init() {
valueTypes = make([]ValueType, 256)
for i := 0; i < len(valueTypes); i++ {
valueTypes[i] = StringValue
}
valueTypes['-'] = NumberValue
valueTypes['0'] = NumberValue
valueTypes['1'] = NumberValue
valueTypes['2'] = NumberValue
valueTypes['3'] = NumberValue
valueTypes['4'] = NumberValue
valueTypes['5'] = NumberValue
valueTypes['6'] = NumberValue
valueTypes['7'] = NumberValue
valueTypes['8'] = NumberValue
valueTypes['9'] = NumberValue
valueTypes['t'] = BoolValue
valueTypes['f'] = BoolValue
valueTypes['n'] = NilValue
}
// Iterator is an io.Reader like object, with STT specific read functions.
// Error is not returned as return value, but stored as Error member on this iterator instance.
type Iterator struct {
cfg *frozenConfig
reader io.Reader
buf []byte
head int
tail int
depth int
captureStartedAt int
captured []byte
Error error
Attachment interface{} // open for customized decoder
}
// NewIterator creates an empty Iterator instance
func NewIterator(cfg API) *Iterator {
return &Iterator{
cfg: cfg.(*frozenConfig),
reader: nil,
buf: nil,
head: 0,
tail: 0,
depth: 0,
}
}
// Parse creates an Iterator instance from io.Reader
func Parse(cfg API, reader io.Reader, bufSize int) *Iterator {
return &Iterator{
cfg: cfg.(*frozenConfig),
reader: reader,
buf: make([]byte, bufSize),
head: 0,
tail: 0,
depth: 0,
}
}
// ParseBytes creates an Iterator instance from byte array
func ParseBytes(cfg API, input []byte) *Iterator {
return &Iterator{
cfg: cfg.(*frozenConfig),
reader: nil,
buf: input,
head: 0,
tail: len(input),
depth: 0,
}
}
// ParseString creates an Iterator instance from string
func ParseString(cfg API, input string) *Iterator {
return ParseBytes(cfg, []byte(input))
}
// Pool returns a pool can provide more iterator with same configuration
func (iter *Iterator) Pool() IteratorPool {
return iter.cfg
}
// Reset reuse iterator instance by specifying another reader
func (iter *Iterator) Reset(reader io.Reader) *Iterator {
iter.reader = reader
iter.head = 0
iter.tail = 0
iter.depth = 0
return iter
}
// ResetBytes reuse iterator instance by specifying another byte array as input
func (iter *Iterator) ResetBytes(input []byte) *Iterator {
iter.reader = nil
iter.buf = input
iter.head = 0
iter.tail = len(input)
iter.depth = 0
return iter
}
// WhatIsNext gets ValueType of relatively next json element
func (iter *Iterator) WhatIsNext() ValueType {
valueType := valueTypes[iter.nextToken()]
iter.unreadByte()
return valueType
}
func (iter *Iterator) skipWhitespacesWithoutLoadMore() bool {
for i := iter.head; i < iter.tail; i++ {
c := iter.buf[i]
switch c {
case ' ', '\n', '\t', '\r':
continue
}
iter.head = i
return false
}
return true
}
func (iter *Iterator) nextToken() byte {
// a variation of skip whitespaces, returning the next non-whitespace token
for {
for i := iter.head; i < iter.tail; i++ {
c := iter.buf[i]
switch c {
case ' ', '\n', '\t', '\r':
continue
}
iter.head = i + 1
return c
}
if !iter.loadMore() {
return 0
}
}
}
// ReportError record an error in iterator instance with current position.
func (iter *Iterator) ReportError(operation string, msg string) {
if iter.Error != nil {
if iter.Error != io.EOF {
return
}
}
peekStart := iter.head - 10
if peekStart < 0 {
peekStart = 0
}
peekEnd := iter.head + 10
if peekEnd > iter.tail {
peekEnd = iter.tail
}
parsing := string(iter.buf[peekStart:peekEnd])
contextStart := iter.head - 50
if contextStart < 0 {
contextStart = 0
}
contextEnd := iter.head + 50
if contextEnd > iter.tail {
contextEnd = iter.tail
}
context := string(iter.buf[contextStart:contextEnd])
iter.Error = fmt.Errorf("%s: %s, error found in #%v byte of ...|%s|..., bigger context ...|%s|...",
operation, msg, iter.head-peekStart, parsing, context)
}
// CurrentBuffer gets current buffer as string for debugging purpose
func (iter *Iterator) CurrentBuffer() string {
peekStart := iter.head - 10
if peekStart < 0 {
peekStart = 0
}
return fmt.Sprintf("parsing #%v byte, around ...|%s|..., whole buffer ...|%s|...", iter.head,
string(iter.buf[peekStart:iter.head]), string(iter.buf[0:iter.tail]))
}
func (iter *Iterator) readByte() (ret byte) {
if iter.head == iter.tail {
if iter.loadMore() {
ret = iter.buf[iter.head]
iter.head++
return ret
}
return 0
}
ret = iter.buf[iter.head]
iter.head++
return ret
}
func (iter *Iterator) loadMore() bool {
if iter.reader == nil {
if iter.Error == nil {
iter.head = iter.tail
iter.Error = io.EOF
}
return false
}
if iter.captured != nil {
iter.captured = append(iter.captured,
iter.buf[iter.captureStartedAt:iter.tail]...)
iter.captureStartedAt = 0
}
for {
n, err := iter.reader.Read(iter.buf)
if n == 0 {
if err != nil {
if iter.Error == nil {
iter.Error = err
}
return false
}
} else {
iter.head = 0
iter.tail = n
return true
}
}
}
func (iter *Iterator) unreadByte() {
if iter.Error != nil {
return
}
iter.head--
return
}
// Read the next STT element as generic interface{}.
func (iter *Iterator) Read() interface{} {
valueType := iter.WhatIsNext()
switch valueType {
case StringValue:
return iter.ReadString()
case NumberValue:
return iter.ReadFloat64()
case NilValue:
iter.skipBytes('n', 'u', 'l', 'l')
return nil
case BoolValue:
return iter.ReadBool()
default:
iter.ReportError("Read", fmt.Sprintf("unexpected value type: %v", valueType))
return nil
}
}
// limit maximum depth of nesting
const maxDepth = 20
func (iter *Iterator) incrementDepth() (success bool) {
iter.depth++
if iter.depth <= maxDepth {
return true
}
iter.ReportError("incrementDepth", "exceeded max depth")
return false
}
func (iter *Iterator) decrementDepth() (success bool) {
iter.depth--
if iter.depth >= 0 {
return true
}
iter.ReportError("decrementDepth", "unexpected negative nesting")
return false
}