split.go 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. // Copyright 2015 go-swagger maintainers
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package swag
  15. import (
  16. "unicode"
  17. )
  18. var nameReplaceTable = map[rune]string{
  19. '@': "At ",
  20. '&': "And ",
  21. '|': "Pipe ",
  22. '$': "Dollar ",
  23. '!': "Bang ",
  24. '-': "",
  25. '_': "",
  26. }
  27. type (
  28. splitter struct {
  29. postSplitInitialismCheck bool
  30. initialisms []string
  31. }
  32. splitterOption func(*splitter) *splitter
  33. )
  34. // split calls the splitter; splitter provides more control and post options
  35. func split(str string) []string {
  36. lexems := newSplitter().split(str)
  37. result := make([]string, 0, len(lexems))
  38. for _, lexem := range lexems {
  39. result = append(result, lexem.GetOriginal())
  40. }
  41. return result
  42. }
  43. func (s *splitter) split(str string) []nameLexem {
  44. return s.toNameLexems(str)
  45. }
  46. func newSplitter(options ...splitterOption) *splitter {
  47. splitter := &splitter{
  48. postSplitInitialismCheck: false,
  49. initialisms: initialisms,
  50. }
  51. for _, option := range options {
  52. splitter = option(splitter)
  53. }
  54. return splitter
  55. }
  56. // withPostSplitInitialismCheck allows to catch initialisms after main split process
  57. func withPostSplitInitialismCheck(s *splitter) *splitter {
  58. s.postSplitInitialismCheck = true
  59. return s
  60. }
  61. type (
  62. initialismMatch struct {
  63. start, end int
  64. body []rune
  65. complete bool
  66. }
  67. initialismMatches []*initialismMatch
  68. )
  69. func (s *splitter) toNameLexems(name string) []nameLexem {
  70. nameRunes := []rune(name)
  71. matches := s.gatherInitialismMatches(nameRunes)
  72. return s.mapMatchesToNameLexems(nameRunes, matches)
  73. }
  74. func (s *splitter) gatherInitialismMatches(nameRunes []rune) initialismMatches {
  75. matches := make(initialismMatches, 0)
  76. for currentRunePosition, currentRune := range nameRunes {
  77. newMatches := make(initialismMatches, 0, len(matches))
  78. // check current initialism matches
  79. for _, match := range matches {
  80. if keepCompleteMatch := match.complete; keepCompleteMatch {
  81. newMatches = append(newMatches, match)
  82. continue
  83. }
  84. // drop failed match
  85. currentMatchRune := match.body[currentRunePosition-match.start]
  86. if !s.initialismRuneEqual(currentMatchRune, currentRune) {
  87. continue
  88. }
  89. // try to complete ongoing match
  90. if currentRunePosition-match.start == len(match.body)-1 {
  91. // we are close; the next step is to check the symbol ahead
  92. // if it is a small letter, then it is not the end of match
  93. // but beginning of the next word
  94. if currentRunePosition < len(nameRunes)-1 {
  95. nextRune := nameRunes[currentRunePosition+1]
  96. if newWord := unicode.IsLower(nextRune); newWord {
  97. // oh ok, it was the start of a new word
  98. continue
  99. }
  100. }
  101. match.complete = true
  102. match.end = currentRunePosition
  103. }
  104. newMatches = append(newMatches, match)
  105. }
  106. // check for new initialism matches
  107. for _, initialism := range s.initialisms {
  108. initialismRunes := []rune(initialism)
  109. if s.initialismRuneEqual(initialismRunes[0], currentRune) {
  110. newMatches = append(newMatches, &initialismMatch{
  111. start: currentRunePosition,
  112. body: initialismRunes,
  113. complete: false,
  114. })
  115. }
  116. }
  117. matches = newMatches
  118. }
  119. return matches
  120. }
  121. func (s *splitter) mapMatchesToNameLexems(nameRunes []rune, matches initialismMatches) []nameLexem {
  122. nameLexems := make([]nameLexem, 0)
  123. var lastAcceptedMatch *initialismMatch
  124. for _, match := range matches {
  125. if !match.complete {
  126. continue
  127. }
  128. if firstMatch := lastAcceptedMatch == nil; firstMatch {
  129. nameLexems = append(nameLexems, s.breakCasualString(nameRunes[:match.start])...)
  130. nameLexems = append(nameLexems, s.breakInitialism(string(match.body)))
  131. lastAcceptedMatch = match
  132. continue
  133. }
  134. if overlappedMatch := match.start <= lastAcceptedMatch.end; overlappedMatch {
  135. continue
  136. }
  137. middle := nameRunes[lastAcceptedMatch.end+1 : match.start]
  138. nameLexems = append(nameLexems, s.breakCasualString(middle)...)
  139. nameLexems = append(nameLexems, s.breakInitialism(string(match.body)))
  140. lastAcceptedMatch = match
  141. }
  142. // we have not found any accepted matches
  143. if lastAcceptedMatch == nil {
  144. return s.breakCasualString(nameRunes)
  145. }
  146. if lastAcceptedMatch.end+1 != len(nameRunes) {
  147. rest := nameRunes[lastAcceptedMatch.end+1:]
  148. nameLexems = append(nameLexems, s.breakCasualString(rest)...)
  149. }
  150. return nameLexems
  151. }
  152. func (s *splitter) initialismRuneEqual(a, b rune) bool {
  153. return a == b
  154. }
  155. func (s *splitter) breakInitialism(original string) nameLexem {
  156. return newInitialismNameLexem(original, original)
  157. }
  158. func (s *splitter) breakCasualString(str []rune) []nameLexem {
  159. segments := make([]nameLexem, 0)
  160. currentSegment := ""
  161. addCasualNameLexem := func(original string) {
  162. segments = append(segments, newCasualNameLexem(original))
  163. }
  164. addInitialismNameLexem := func(original, match string) {
  165. segments = append(segments, newInitialismNameLexem(original, match))
  166. }
  167. addNameLexem := func(original string) {
  168. if s.postSplitInitialismCheck {
  169. for _, initialism := range s.initialisms {
  170. if upper(initialism) == upper(original) {
  171. addInitialismNameLexem(original, initialism)
  172. return
  173. }
  174. }
  175. }
  176. addCasualNameLexem(original)
  177. }
  178. for _, rn := range string(str) {
  179. if replace, found := nameReplaceTable[rn]; found {
  180. if currentSegment != "" {
  181. addNameLexem(currentSegment)
  182. currentSegment = ""
  183. }
  184. if replace != "" {
  185. addNameLexem(replace)
  186. }
  187. continue
  188. }
  189. if !unicode.In(rn, unicode.L, unicode.M, unicode.N, unicode.Pc) {
  190. if currentSegment != "" {
  191. addNameLexem(currentSegment)
  192. currentSegment = ""
  193. }
  194. continue
  195. }
  196. if unicode.IsUpper(rn) {
  197. if currentSegment != "" {
  198. addNameLexem(currentSegment)
  199. }
  200. currentSegment = ""
  201. }
  202. currentSegment += string(rn)
  203. }
  204. if currentSegment != "" {
  205. addNameLexem(currentSegment)
  206. }
  207. return segments
  208. }