123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487 |
- // Copyright 2015 Unknwon
- //
- // Licensed under the Apache License, Version 2.0 (the "License"): you may
- // not use this file except in compliance with the License. You may obtain
- // a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- // License for the specific language governing permissions and limitations
- // under the License.
- package ini
- import (
- "bufio"
- "bytes"
- "fmt"
- "io"
- "regexp"
- "strconv"
- "strings"
- "unicode"
- )
- var pythonMultiline = regexp.MustCompile("^(\\s+)([^\n]+)")
- type parserOptions struct {
- IgnoreContinuation bool
- IgnoreInlineComment bool
- AllowPythonMultilineValues bool
- SpaceBeforeInlineComment bool
- UnescapeValueDoubleQuotes bool
- UnescapeValueCommentSymbols bool
- PreserveSurroundedQuote bool
- }
- type parser struct {
- buf *bufio.Reader
- options parserOptions
- isEOF bool
- count int
- comment *bytes.Buffer
- }
- func newParser(r io.Reader, opts parserOptions) *parser {
- return &parser{
- buf: bufio.NewReader(r),
- options: opts,
- count: 1,
- comment: &bytes.Buffer{},
- }
- }
- // BOM handles header of UTF-8, UTF-16 LE and UTF-16 BE's BOM format.
- // http://en.wikipedia.org/wiki/Byte_order_mark#Representations_of_byte_order_marks_by_encoding
- func (p *parser) BOM() error {
- mask, err := p.buf.Peek(2)
- if err != nil && err != io.EOF {
- return err
- } else if len(mask) < 2 {
- return nil
- }
- switch {
- case mask[0] == 254 && mask[1] == 255:
- fallthrough
- case mask[0] == 255 && mask[1] == 254:
- p.buf.Read(mask)
- case mask[0] == 239 && mask[1] == 187:
- mask, err := p.buf.Peek(3)
- if err != nil && err != io.EOF {
- return err
- } else if len(mask) < 3 {
- return nil
- }
- if mask[2] == 191 {
- p.buf.Read(mask)
- }
- }
- return nil
- }
- func (p *parser) readUntil(delim byte) ([]byte, error) {
- data, err := p.buf.ReadBytes(delim)
- if err != nil {
- if err == io.EOF {
- p.isEOF = true
- } else {
- return nil, err
- }
- }
- return data, nil
- }
- func cleanComment(in []byte) ([]byte, bool) {
- i := bytes.IndexAny(in, "#;")
- if i == -1 {
- return nil, false
- }
- return in[i:], true
- }
- func readKeyName(delimiters string, in []byte) (string, int, error) {
- line := string(in)
- // Check if key name surrounded by quotes.
- var keyQuote string
- if line[0] == '"' {
- if len(line) > 6 && string(line[0:3]) == `"""` {
- keyQuote = `"""`
- } else {
- keyQuote = `"`
- }
- } else if line[0] == '`' {
- keyQuote = "`"
- }
- // Get out key name
- endIdx := -1
- if len(keyQuote) > 0 {
- startIdx := len(keyQuote)
- // FIXME: fail case -> """"""name"""=value
- pos := strings.Index(line[startIdx:], keyQuote)
- if pos == -1 {
- return "", -1, fmt.Errorf("missing closing key quote: %s", line)
- }
- pos += startIdx
- // Find key-value delimiter
- i := strings.IndexAny(line[pos+startIdx:], delimiters)
- if i < 0 {
- return "", -1, ErrDelimiterNotFound{line}
- }
- endIdx = pos + i
- return strings.TrimSpace(line[startIdx:pos]), endIdx + startIdx + 1, nil
- }
- endIdx = strings.IndexAny(line, delimiters)
- if endIdx < 0 {
- return "", -1, ErrDelimiterNotFound{line}
- }
- return strings.TrimSpace(line[0:endIdx]), endIdx + 1, nil
- }
- func (p *parser) readMultilines(line, val, valQuote string) (string, error) {
- for {
- data, err := p.readUntil('\n')
- if err != nil {
- return "", err
- }
- next := string(data)
- pos := strings.LastIndex(next, valQuote)
- if pos > -1 {
- val += next[:pos]
- comment, has := cleanComment([]byte(next[pos:]))
- if has {
- p.comment.Write(bytes.TrimSpace(comment))
- }
- break
- }
- val += next
- if p.isEOF {
- return "", fmt.Errorf("missing closing key quote from '%s' to '%s'", line, next)
- }
- }
- return val, nil
- }
- func (p *parser) readContinuationLines(val string) (string, error) {
- for {
- data, err := p.readUntil('\n')
- if err != nil {
- return "", err
- }
- next := strings.TrimSpace(string(data))
- if len(next) == 0 {
- break
- }
- val += next
- if val[len(val)-1] != '\\' {
- break
- }
- val = val[:len(val)-1]
- }
- return val, nil
- }
- // hasSurroundedQuote check if and only if the first and last characters
- // are quotes \" or \'.
- // It returns false if any other parts also contain same kind of quotes.
- func hasSurroundedQuote(in string, quote byte) bool {
- return len(in) >= 2 && in[0] == quote && in[len(in)-1] == quote &&
- strings.IndexByte(in[1:], quote) == len(in)-2
- }
- func (p *parser) readValue(in []byte, bufferSize int) (string, error) {
- line := strings.TrimLeftFunc(string(in), unicode.IsSpace)
- if len(line) == 0 {
- if p.options.AllowPythonMultilineValues && len(in) > 0 && in[len(in)-1] == '\n' {
- return p.readPythonMultilines(line, bufferSize)
- }
- return "", nil
- }
- var valQuote string
- if len(line) > 3 && string(line[0:3]) == `"""` {
- valQuote = `"""`
- } else if line[0] == '`' {
- valQuote = "`"
- } else if p.options.UnescapeValueDoubleQuotes && line[0] == '"' {
- valQuote = `"`
- }
- if len(valQuote) > 0 {
- startIdx := len(valQuote)
- pos := strings.LastIndex(line[startIdx:], valQuote)
- // Check for multi-line value
- if pos == -1 {
- return p.readMultilines(line, line[startIdx:], valQuote)
- }
- if p.options.UnescapeValueDoubleQuotes && valQuote == `"` {
- return strings.Replace(line[startIdx:pos+startIdx], `\"`, `"`, -1), nil
- }
- return line[startIdx : pos+startIdx], nil
- }
- lastChar := line[len(line)-1]
- // Won't be able to reach here if value only contains whitespace
- line = strings.TrimSpace(line)
- trimmedLastChar := line[len(line)-1]
- // Check continuation lines when desired
- if !p.options.IgnoreContinuation && trimmedLastChar == '\\' {
- return p.readContinuationLines(line[:len(line)-1])
- }
- // Check if ignore inline comment
- if !p.options.IgnoreInlineComment {
- var i int
- if p.options.SpaceBeforeInlineComment {
- i = strings.Index(line, " #")
- if i == -1 {
- i = strings.Index(line, " ;")
- }
- } else {
- i = strings.IndexAny(line, "#;")
- }
- if i > -1 {
- p.comment.WriteString(line[i:])
- line = strings.TrimSpace(line[:i])
- }
- }
- // Trim single and double quotes
- if (hasSurroundedQuote(line, '\'') ||
- hasSurroundedQuote(line, '"')) && !p.options.PreserveSurroundedQuote {
- line = line[1 : len(line)-1]
- } else if len(valQuote) == 0 && p.options.UnescapeValueCommentSymbols {
- if strings.Contains(line, `\;`) {
- line = strings.Replace(line, `\;`, ";", -1)
- }
- if strings.Contains(line, `\#`) {
- line = strings.Replace(line, `\#`, "#", -1)
- }
- } else if p.options.AllowPythonMultilineValues && lastChar == '\n' {
- return p.readPythonMultilines(line, bufferSize)
- }
- return line, nil
- }
- func (p *parser) readPythonMultilines(line string, bufferSize int) (string, error) {
- parserBufferPeekResult, _ := p.buf.Peek(bufferSize)
- peekBuffer := bytes.NewBuffer(parserBufferPeekResult)
- for {
- peekData, peekErr := peekBuffer.ReadBytes('\n')
- if peekErr != nil {
- if peekErr == io.EOF {
- return line, nil
- }
- return "", peekErr
- }
- peekMatches := pythonMultiline.FindStringSubmatch(string(peekData))
- if len(peekMatches) != 3 {
- return line, nil
- }
- // NOTE: Return if not a python-ini multi-line value.
- currentIdentSize := len(peekMatches[1])
- if currentIdentSize <= 0 {
- return line, nil
- }
- // NOTE: Just advance the parser reader (buffer) in-sync with the peek buffer.
- _, err := p.readUntil('\n')
- if err != nil {
- return "", err
- }
- line += fmt.Sprintf("\n%s", peekMatches[2])
- }
- }
- // parse parses data through an io.Reader.
- func (f *File) parse(reader io.Reader) (err error) {
- p := newParser(reader, parserOptions{
- IgnoreContinuation: f.options.IgnoreContinuation,
- IgnoreInlineComment: f.options.IgnoreInlineComment,
- AllowPythonMultilineValues: f.options.AllowPythonMultilineValues,
- SpaceBeforeInlineComment: f.options.SpaceBeforeInlineComment,
- UnescapeValueDoubleQuotes: f.options.UnescapeValueDoubleQuotes,
- UnescapeValueCommentSymbols: f.options.UnescapeValueCommentSymbols,
- PreserveSurroundedQuote: f.options.PreserveSurroundedQuote,
- })
- if err = p.BOM(); err != nil {
- return fmt.Errorf("BOM: %v", err)
- }
- // Ignore error because default section name is never empty string.
- name := DefaultSection
- if f.options.Insensitive {
- name = strings.ToLower(DefaultSection)
- }
- section, _ := f.NewSection(name)
- // This "last" is not strictly equivalent to "previous one" if current key is not the first nested key
- var isLastValueEmpty bool
- var lastRegularKey *Key
- var line []byte
- var inUnparseableSection bool
- // NOTE: Iterate and increase `currentPeekSize` until
- // the size of the parser buffer is found.
- // TODO(unknwon): When Golang 1.10 is the lowest version supported, replace with `parserBufferSize := p.buf.Size()`.
- parserBufferSize := 0
- // NOTE: Peek 1kb at a time.
- currentPeekSize := 1024
- if f.options.AllowPythonMultilineValues {
- for {
- peekBytes, _ := p.buf.Peek(currentPeekSize)
- peekBytesLength := len(peekBytes)
- if parserBufferSize >= peekBytesLength {
- break
- }
- currentPeekSize *= 2
- parserBufferSize = peekBytesLength
- }
- }
- for !p.isEOF {
- line, err = p.readUntil('\n')
- if err != nil {
- return err
- }
- if f.options.AllowNestedValues &&
- isLastValueEmpty && len(line) > 0 {
- if line[0] == ' ' || line[0] == '\t' {
- lastRegularKey.addNestedValue(string(bytes.TrimSpace(line)))
- continue
- }
- }
- line = bytes.TrimLeftFunc(line, unicode.IsSpace)
- if len(line) == 0 {
- continue
- }
- // Comments
- if line[0] == '#' || line[0] == ';' {
- // Note: we do not care ending line break,
- // it is needed for adding second line,
- // so just clean it once at the end when set to value.
- p.comment.Write(line)
- continue
- }
- // Section
- if line[0] == '[' {
- // Read to the next ']' (TODO: support quoted strings)
- closeIdx := bytes.LastIndexByte(line, ']')
- if closeIdx == -1 {
- return fmt.Errorf("unclosed section: %s", line)
- }
- name := string(line[1:closeIdx])
- section, err = f.NewSection(name)
- if err != nil {
- return err
- }
- comment, has := cleanComment(line[closeIdx+1:])
- if has {
- p.comment.Write(comment)
- }
- section.Comment = strings.TrimSpace(p.comment.String())
- // Reset aotu-counter and comments
- p.comment.Reset()
- p.count = 1
- inUnparseableSection = false
- for i := range f.options.UnparseableSections {
- if f.options.UnparseableSections[i] == name ||
- (f.options.Insensitive && strings.ToLower(f.options.UnparseableSections[i]) == strings.ToLower(name)) {
- inUnparseableSection = true
- continue
- }
- }
- continue
- }
- if inUnparseableSection {
- section.isRawSection = true
- section.rawBody += string(line)
- continue
- }
- kname, offset, err := readKeyName(f.options.KeyValueDelimiters, line)
- if err != nil {
- // Treat as boolean key when desired, and whole line is key name.
- if IsErrDelimiterNotFound(err) {
- switch {
- case f.options.AllowBooleanKeys:
- kname, err := p.readValue(line, parserBufferSize)
- if err != nil {
- return err
- }
- key, err := section.NewBooleanKey(kname)
- if err != nil {
- return err
- }
- key.Comment = strings.TrimSpace(p.comment.String())
- p.comment.Reset()
- continue
- case f.options.SkipUnrecognizableLines:
- continue
- }
- }
- return err
- }
- // Auto increment.
- isAutoIncr := false
- if kname == "-" {
- isAutoIncr = true
- kname = "#" + strconv.Itoa(p.count)
- p.count++
- }
- value, err := p.readValue(line[offset:], parserBufferSize)
- if err != nil {
- return err
- }
- isLastValueEmpty = len(value) == 0
- key, err := section.NewKey(kname, value)
- if err != nil {
- return err
- }
- key.isAutoIncrement = isAutoIncr
- key.Comment = strings.TrimSpace(p.comment.String())
- p.comment.Reset()
- lastRegularKey = key
- }
- return nil
- }
|