parser.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488
  1. // Copyright 2015 Unknwon
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License"): you may
  4. // not use this file except in compliance with the License. You may obtain
  5. // a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  11. // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  12. // License for the specific language governing permissions and limitations
  13. // under the License.
  14. package ini
  15. import (
  16. "bufio"
  17. "bytes"
  18. "fmt"
  19. "io"
  20. "regexp"
  21. "strconv"
  22. "strings"
  23. "unicode"
  24. )
  25. var pythonMultiline = regexp.MustCompile("^(\\s+)([^\n]+)")
  26. type tokenType int
  27. const (
  28. _TOKEN_INVALID tokenType = iota
  29. _TOKEN_COMMENT
  30. _TOKEN_SECTION
  31. _TOKEN_KEY
  32. )
  33. type parser struct {
  34. buf *bufio.Reader
  35. isEOF bool
  36. count int
  37. comment *bytes.Buffer
  38. }
  39. func newParser(r io.Reader) *parser {
  40. return &parser{
  41. buf: bufio.NewReader(r),
  42. count: 1,
  43. comment: &bytes.Buffer{},
  44. }
  45. }
  46. // BOM handles header of UTF-8, UTF-16 LE and UTF-16 BE's BOM format.
  47. // http://en.wikipedia.org/wiki/Byte_order_mark#Representations_of_byte_order_marks_by_encoding
  48. func (p *parser) BOM() error {
  49. mask, err := p.buf.Peek(2)
  50. if err != nil && err != io.EOF {
  51. return err
  52. } else if len(mask) < 2 {
  53. return nil
  54. }
  55. switch {
  56. case mask[0] == 254 && mask[1] == 255:
  57. fallthrough
  58. case mask[0] == 255 && mask[1] == 254:
  59. p.buf.Read(mask)
  60. case mask[0] == 239 && mask[1] == 187:
  61. mask, err := p.buf.Peek(3)
  62. if err != nil && err != io.EOF {
  63. return err
  64. } else if len(mask) < 3 {
  65. return nil
  66. }
  67. if mask[2] == 191 {
  68. p.buf.Read(mask)
  69. }
  70. }
  71. return nil
  72. }
  73. func (p *parser) readUntil(delim byte) ([]byte, error) {
  74. data, err := p.buf.ReadBytes(delim)
  75. if err != nil {
  76. if err == io.EOF {
  77. p.isEOF = true
  78. } else {
  79. return nil, err
  80. }
  81. }
  82. return data, nil
  83. }
  84. func cleanComment(in []byte) ([]byte, bool) {
  85. i := bytes.IndexAny(in, "#;")
  86. if i == -1 {
  87. return nil, false
  88. }
  89. return in[i:], true
  90. }
  91. func readKeyName(delimiters string, in []byte) (string, int, error) {
  92. line := string(in)
  93. // Check if key name surrounded by quotes.
  94. var keyQuote string
  95. if line[0] == '"' {
  96. if len(line) > 6 && string(line[0:3]) == `"""` {
  97. keyQuote = `"""`
  98. } else {
  99. keyQuote = `"`
  100. }
  101. } else if line[0] == '`' {
  102. keyQuote = "`"
  103. }
  104. // Get out key name
  105. endIdx := -1
  106. if len(keyQuote) > 0 {
  107. startIdx := len(keyQuote)
  108. // FIXME: fail case -> """"""name"""=value
  109. pos := strings.Index(line[startIdx:], keyQuote)
  110. if pos == -1 {
  111. return "", -1, fmt.Errorf("missing closing key quote: %s", line)
  112. }
  113. pos += startIdx
  114. // Find key-value delimiter
  115. i := strings.IndexAny(line[pos+startIdx:], delimiters)
  116. if i < 0 {
  117. return "", -1, ErrDelimiterNotFound{line}
  118. }
  119. endIdx = pos + i
  120. return strings.TrimSpace(line[startIdx:pos]), endIdx + startIdx + 1, nil
  121. }
  122. endIdx = strings.IndexAny(line, delimiters)
  123. if endIdx < 0 {
  124. return "", -1, ErrDelimiterNotFound{line}
  125. }
  126. return strings.TrimSpace(line[0:endIdx]), endIdx + 1, nil
  127. }
  128. func (p *parser) readMultilines(line, val, valQuote string) (string, error) {
  129. for {
  130. data, err := p.readUntil('\n')
  131. if err != nil {
  132. return "", err
  133. }
  134. next := string(data)
  135. pos := strings.LastIndex(next, valQuote)
  136. if pos > -1 {
  137. val += next[:pos]
  138. comment, has := cleanComment([]byte(next[pos:]))
  139. if has {
  140. p.comment.Write(bytes.TrimSpace(comment))
  141. }
  142. break
  143. }
  144. val += next
  145. if p.isEOF {
  146. return "", fmt.Errorf("missing closing key quote from '%s' to '%s'", line, next)
  147. }
  148. }
  149. return val, nil
  150. }
  151. func (p *parser) readContinuationLines(val string) (string, error) {
  152. for {
  153. data, err := p.readUntil('\n')
  154. if err != nil {
  155. return "", err
  156. }
  157. next := strings.TrimSpace(string(data))
  158. if len(next) == 0 {
  159. break
  160. }
  161. val += next
  162. if val[len(val)-1] != '\\' {
  163. break
  164. }
  165. val = val[:len(val)-1]
  166. }
  167. return val, nil
  168. }
  169. // hasSurroundedQuote check if and only if the first and last characters
  170. // are quotes \" or \'.
  171. // It returns false if any other parts also contain same kind of quotes.
  172. func hasSurroundedQuote(in string, quote byte) bool {
  173. return len(in) >= 2 && in[0] == quote && in[len(in)-1] == quote &&
  174. strings.IndexByte(in[1:], quote) == len(in)-2
  175. }
  176. func (p *parser) readValue(in []byte,
  177. parserBufferSize int,
  178. ignoreContinuation, ignoreInlineComment, unescapeValueDoubleQuotes, unescapeValueCommentSymbols, allowPythonMultilines, spaceBeforeInlineComment, preserveSurroundedQuote bool) (string, error) {
  179. line := strings.TrimLeftFunc(string(in), unicode.IsSpace)
  180. if len(line) == 0 {
  181. return "", nil
  182. }
  183. var valQuote string
  184. if len(line) > 3 && string(line[0:3]) == `"""` {
  185. valQuote = `"""`
  186. } else if line[0] == '`' {
  187. valQuote = "`"
  188. } else if unescapeValueDoubleQuotes && line[0] == '"' {
  189. valQuote = `"`
  190. }
  191. if len(valQuote) > 0 {
  192. startIdx := len(valQuote)
  193. pos := strings.LastIndex(line[startIdx:], valQuote)
  194. // Check for multi-line value
  195. if pos == -1 {
  196. return p.readMultilines(line, line[startIdx:], valQuote)
  197. }
  198. if unescapeValueDoubleQuotes && valQuote == `"` {
  199. return strings.Replace(line[startIdx:pos+startIdx], `\"`, `"`, -1), nil
  200. }
  201. return line[startIdx : pos+startIdx], nil
  202. }
  203. lastChar := line[len(line)-1]
  204. // Won't be able to reach here if value only contains whitespace
  205. line = strings.TrimSpace(line)
  206. trimmedLastChar := line[len(line)-1]
  207. // Check continuation lines when desired
  208. if !ignoreContinuation && trimmedLastChar == '\\' {
  209. return p.readContinuationLines(line[:len(line)-1])
  210. }
  211. // Check if ignore inline comment
  212. if !ignoreInlineComment {
  213. var i int
  214. if spaceBeforeInlineComment {
  215. i = strings.Index(line, " #")
  216. if i == -1 {
  217. i = strings.Index(line, " ;")
  218. }
  219. } else {
  220. i = strings.IndexAny(line, "#;")
  221. }
  222. if i > -1 {
  223. p.comment.WriteString(line[i:])
  224. line = strings.TrimSpace(line[:i])
  225. }
  226. }
  227. // Trim single and double quotes
  228. if (hasSurroundedQuote(line, '\'') ||
  229. hasSurroundedQuote(line, '"')) && !preserveSurroundedQuote {
  230. line = line[1 : len(line)-1]
  231. } else if len(valQuote) == 0 && unescapeValueCommentSymbols {
  232. if strings.Contains(line, `\;`) {
  233. line = strings.Replace(line, `\;`, ";", -1)
  234. }
  235. if strings.Contains(line, `\#`) {
  236. line = strings.Replace(line, `\#`, "#", -1)
  237. }
  238. } else if allowPythonMultilines && lastChar == '\n' {
  239. parserBufferPeekResult, _ := p.buf.Peek(parserBufferSize)
  240. peekBuffer := bytes.NewBuffer(parserBufferPeekResult)
  241. val := line
  242. for {
  243. peekData, peekErr := peekBuffer.ReadBytes('\n')
  244. if peekErr != nil {
  245. if peekErr == io.EOF {
  246. return val, nil
  247. }
  248. return "", peekErr
  249. }
  250. peekMatches := pythonMultiline.FindStringSubmatch(string(peekData))
  251. if len(peekMatches) != 3 {
  252. return val, nil
  253. }
  254. // NOTE: Return if not a python-ini multi-line value.
  255. currentIdentSize := len(peekMatches[1])
  256. if currentIdentSize <= 0 {
  257. return val, nil
  258. }
  259. // NOTE: Just advance the parser reader (buffer) in-sync with the peek buffer.
  260. _, err := p.readUntil('\n')
  261. if err != nil {
  262. return "", err
  263. }
  264. val += fmt.Sprintf("\n%s", peekMatches[2])
  265. }
  266. }
  267. return line, nil
  268. }
  269. // parse parses data through an io.Reader.
  270. func (f *File) parse(reader io.Reader) (err error) {
  271. p := newParser(reader)
  272. if err = p.BOM(); err != nil {
  273. return fmt.Errorf("BOM: %v", err)
  274. }
  275. // Ignore error because default section name is never empty string.
  276. name := DEFAULT_SECTION
  277. if f.options.Insensitive {
  278. name = strings.ToLower(DEFAULT_SECTION)
  279. }
  280. section, _ := f.NewSection(name)
  281. // This "last" is not strictly equivalent to "previous one" if current key is not the first nested key
  282. var isLastValueEmpty bool
  283. var lastRegularKey *Key
  284. var line []byte
  285. var inUnparseableSection bool
  286. // NOTE: Iterate and increase `currentPeekSize` until
  287. // the size of the parser buffer is found.
  288. // TODO(unknwon): When Golang 1.10 is the lowest version supported, replace with `parserBufferSize := p.buf.Size()`.
  289. parserBufferSize := 0
  290. // NOTE: Peek 1kb at a time.
  291. currentPeekSize := 1024
  292. if f.options.AllowPythonMultilineValues {
  293. for {
  294. peekBytes, _ := p.buf.Peek(currentPeekSize)
  295. peekBytesLength := len(peekBytes)
  296. if parserBufferSize >= peekBytesLength {
  297. break
  298. }
  299. currentPeekSize *= 2
  300. parserBufferSize = peekBytesLength
  301. }
  302. }
  303. for !p.isEOF {
  304. line, err = p.readUntil('\n')
  305. if err != nil {
  306. return err
  307. }
  308. if f.options.AllowNestedValues &&
  309. isLastValueEmpty && len(line) > 0 {
  310. if line[0] == ' ' || line[0] == '\t' {
  311. lastRegularKey.addNestedValue(string(bytes.TrimSpace(line)))
  312. continue
  313. }
  314. }
  315. line = bytes.TrimLeftFunc(line, unicode.IsSpace)
  316. if len(line) == 0 {
  317. continue
  318. }
  319. // Comments
  320. if line[0] == '#' || line[0] == ';' {
  321. // Note: we do not care ending line break,
  322. // it is needed for adding second line,
  323. // so just clean it once at the end when set to value.
  324. p.comment.Write(line)
  325. continue
  326. }
  327. // Section
  328. if line[0] == '[' {
  329. // Read to the next ']' (TODO: support quoted strings)
  330. closeIdx := bytes.LastIndexByte(line, ']')
  331. if closeIdx == -1 {
  332. return fmt.Errorf("unclosed section: %s", line)
  333. }
  334. name := string(line[1:closeIdx])
  335. section, err = f.NewSection(name)
  336. if err != nil {
  337. return err
  338. }
  339. comment, has := cleanComment(line[closeIdx+1:])
  340. if has {
  341. p.comment.Write(comment)
  342. }
  343. section.Comment = strings.TrimSpace(p.comment.String())
  344. // Reset aotu-counter and comments
  345. p.comment.Reset()
  346. p.count = 1
  347. inUnparseableSection = false
  348. for i := range f.options.UnparseableSections {
  349. if f.options.UnparseableSections[i] == name ||
  350. (f.options.Insensitive && strings.ToLower(f.options.UnparseableSections[i]) == strings.ToLower(name)) {
  351. inUnparseableSection = true
  352. continue
  353. }
  354. }
  355. continue
  356. }
  357. if inUnparseableSection {
  358. section.isRawSection = true
  359. section.rawBody += string(line)
  360. continue
  361. }
  362. kname, offset, err := readKeyName(f.options.KeyValueDelimiters, line)
  363. if err != nil {
  364. // Treat as boolean key when desired, and whole line is key name.
  365. if IsErrDelimiterNotFound(err) {
  366. switch {
  367. case f.options.AllowBooleanKeys:
  368. kname, err := p.readValue(line,
  369. parserBufferSize,
  370. f.options.IgnoreContinuation,
  371. f.options.IgnoreInlineComment,
  372. f.options.UnescapeValueDoubleQuotes,
  373. f.options.UnescapeValueCommentSymbols,
  374. f.options.AllowPythonMultilineValues,
  375. f.options.SpaceBeforeInlineComment,
  376. f.options.PreserveSurroundedQuote)
  377. if err != nil {
  378. return err
  379. }
  380. key, err := section.NewBooleanKey(kname)
  381. if err != nil {
  382. return err
  383. }
  384. key.Comment = strings.TrimSpace(p.comment.String())
  385. p.comment.Reset()
  386. continue
  387. case f.options.SkipUnrecognizableLines:
  388. continue
  389. }
  390. }
  391. return err
  392. }
  393. // Auto increment.
  394. isAutoIncr := false
  395. if kname == "-" {
  396. isAutoIncr = true
  397. kname = "#" + strconv.Itoa(p.count)
  398. p.count++
  399. }
  400. value, err := p.readValue(line[offset:],
  401. parserBufferSize,
  402. f.options.IgnoreContinuation,
  403. f.options.IgnoreInlineComment,
  404. f.options.UnescapeValueDoubleQuotes,
  405. f.options.UnescapeValueCommentSymbols,
  406. f.options.AllowPythonMultilineValues,
  407. f.options.SpaceBeforeInlineComment,
  408. f.options.PreserveSurroundedQuote)
  409. if err != nil {
  410. return err
  411. }
  412. isLastValueEmpty = len(value) == 0
  413. key, err := section.NewKey(kname, value)
  414. if err != nil {
  415. return err
  416. }
  417. key.isAutoIncrement = isAutoIncr
  418. key.Comment = strings.TrimSpace(p.comment.String())
  419. p.comment.Reset()
  420. lastRegularKey = key
  421. }
  422. return nil
  423. }