warning_check_log.go 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324
  1. // Copyright 2019 getensh.com. All rights reserved.
  2. // Use of this source code is governed by getensh.com.
  3. package warning
  4. import (
  5. "context"
  6. "gd_crontab/apis"
  7. "gd_crontab/rpc_apis"
  8. "gd_crontab/rpc_apis/gd_management"
  9. "fmt"
  10. "strings"
  11. "sync"
  12. "time"
  13. "gd_crontab/common.in/config"
  14. )
  15. // accessDB is database name for warning in mongodb
  16. var accessDB = "db_gd_access_log"
  17. // accessTabName is collection name for api log
  18. var accessTabName = "t_gd_access_log"
  19. // thirdTabName is collection name for provider log
  20. var thirdTabName = "t_gd_thirdpart_access_log"
  21. // mutex is used for exclusive writing of log
  22. var mutex sync.RWMutex
  23. // AccessLogCheck contains targets and api information for warning
  24. type AccessLogCheck struct {
  25. ApiId int64 `json:"api_id" bson:"api_id"`
  26. ApiName string `json:"api_name" bson:"api_name"`
  27. ApiRouter string `json:"api_router" bson:"api_router"`
  28. Count int `json:"count" bson:"count"`
  29. Success int `json:"success" bson:"success"`
  30. Valid int `json:"valid" bson:"valid"`
  31. NoRecord int `json:"no_record" bson:"no_record"`
  32. AvgElapsed float64 `json:"avg_elapsed" bson:"avg_elapsed"`
  33. Elapsed float64 `json:"avg_elapsed" bson:"elapsed"`
  34. FailRate float64 `json:"fail_rate" bson:"fail_rate"`
  35. NorecordRate float64 `json:"norecord_rate" bson:"norecord_rate"`
  36. }
  37. func GetWarningMongoDb() string {
  38. return accessDB
  39. }
  40. // ThirdLogCheck contains targets and provider api information for warning
  41. type ThirdLogCheck struct {
  42. ProviderName string `json:"provider_name" bson:"provider_name"`
  43. ProviderApiName string `json:"provider_api_name" bson:"api_name"`
  44. ApiRouter string `json:"api_router" bson:"api_router"`
  45. Count int `json:"count" bson:"count"`
  46. Success int `json:"success" bson:"success"`
  47. Valid int `json:"valid" bson:"valid"`
  48. NoRecord int `json:"no_record" bson:"no_record"`
  49. AvgElapsed float64 `json:"avg_elapsed" bson:"avg_elapsed"`
  50. ProviderApiId int64 `json:"provider_api_id" bson:"provider_api_id"`
  51. Elapsed float64 `json:"avg_elapsed" bson:"elapsed"`
  52. FailRate float64 `json:"fail_rate" bson:"fail_rate"`
  53. NorecordRate float64 `json:"norecord_rate" bson:"norecord_rate"`
  54. }
  55. func Percent(data float64) string {
  56. i := int(100.0 * data)
  57. if i == 0 {
  58. return "0"
  59. }
  60. return fmt.Sprintf("%d%%", i)
  61. }
  62. // getThreshold get thresholds from mysql,
  63. // if one value is zero, do not warning
  64. func getThreshold(atype int, id int64) (float64, float64, float64, bool) {
  65. req := gd_management.GetApiThresholdReq{
  66. Type: atype,
  67. ApiId: id,
  68. }
  69. reply, _ := rpc_apis.Management.GetApiThreshold(context.Background(), &req)
  70. if reply.ThresholdTimeout == 0 {
  71. reply.ThresholdTimeout = 9999
  72. }
  73. if reply.ThresholdFailRate == 0 {
  74. reply.ThresholdFailRate = 9999
  75. }
  76. if reply.ThresholdNorecordRate == 0 {
  77. reply.ThresholdNorecordRate = 9999
  78. }
  79. return float64(reply.ThresholdTimeout), reply.ThresholdFailRate, reply.ThresholdNorecordRate, reply.WarningEnable
  80. }
  81. // setCol 设置邮件html表格单元格
  82. func setCol(value string, color string) string {
  83. if color == "" {
  84. return fmt.Sprintf("<td>%s</td>", value)
  85. }
  86. return fmt.Sprintf("<td bgcolor=\"%s\">%s</td>", color, value)
  87. }
  88. // setRow 组装html表格行数据
  89. func setRow(cols []string) string {
  90. value := ""
  91. for _, v := range cols {
  92. value = fmt.Sprintf("%s%s", value, v)
  93. }
  94. return fmt.Sprintf("<tr>%s</tr>", value)
  95. }
  96. // setTable 组装html表格
  97. func setTable(rows []string) string {
  98. value := ""
  99. for _, v := range rows {
  100. value = fmt.Sprintf("%s%s", value, v)
  101. }
  102. return fmt.Sprintf("<table border=\"1\">%s</table>", value)
  103. }
  104. // convertThirdLogToTable 将数据源日志预警信息转为html表格字符串
  105. func convertThirdLogToList(datas []ThirdLogCheck) []string {
  106. lines := make([]string,0)
  107. // 设置行数据
  108. for _, v := range datas {
  109. line := ""
  110. line = fmt.Sprintf("数据源api名称:%s\n",v.ProviderApiName)
  111. line = line + fmt.Sprintf("供应商:%s\n",v.ProviderName)
  112. line = line + fmt.Sprintf("统计量:%d\n",v.Count)
  113. line = line + fmt.Sprintf("平均响应时长:%.2f\n",v.AvgElapsed)
  114. line = line + fmt.Sprintf("失败率:%s\n",Percent(float64(v.Valid-v.Success)/float64(v.Valid)))
  115. line = line + fmt.Sprintf("查无率:%s\n",Percent(float64(v.NoRecord)/float64(v.Valid)))
  116. lines = append(lines,line)
  117. }
  118. return lines
  119. }
  120. // convertThirdLogToTable 将数据源日志预警信息转为html表格字符串
  121. func convertThirdLogToTable(datas []ThirdLogCheck) string {
  122. rows := make([]string, len(datas))
  123. cols := make([]string, 6)
  124. // 设置表头
  125. cols[0] = setCol("数据源api名称", "")
  126. cols[1] = setCol("供应商", "")
  127. cols[2] = setCol("统计量", "")
  128. cols[3] = setCol("平均响应时长", "")
  129. cols[4] = setCol("失败率", "")
  130. cols[5] = setCol("查无率", "")
  131. row := setRow(cols)
  132. rows[0] = row
  133. // 设置行数据
  134. for index, v := range datas {
  135. elapsed := v.Elapsed
  136. failRate := v.FailRate
  137. norecordRate := v.NorecordRate
  138. col := setCol(v.ProviderApiName, "")
  139. cols[0] = col
  140. col = setCol(v.ProviderName, "")
  141. cols[1] = col
  142. col = setCol(fmt.Sprintf("%d", v.Count), "")
  143. cols[2] = col
  144. if v.AvgElapsed < elapsed {
  145. col = setCol(fmt.Sprintf("%.2f", v.AvgElapsed), "")
  146. } else {
  147. col = setCol(fmt.Sprintf("%.2f", v.AvgElapsed), "yellow")
  148. }
  149. cols[3] = col
  150. if float64(v.Success)/float64(v.Valid) > (1 - failRate) {
  151. col = setCol(fmt.Sprintf("%s", Percent(float64(v.Valid-v.Success)/float64(v.Valid))), "")
  152. } else {
  153. col = setCol(fmt.Sprintf("%s", Percent(float64(v.Valid-v.Success)/float64(v.Valid))), "yellow")
  154. }
  155. cols[4] = col
  156. if float64(v.NoRecord)/float64(v.Valid) < norecordRate {
  157. col = setCol(fmt.Sprintf("%s", Percent(float64(v.NoRecord)/float64(v.Valid))), "")
  158. } else {
  159. col = setCol(fmt.Sprintf("%s", Percent(float64(v.NoRecord)/float64(v.Valid))), "yellow")
  160. }
  161. cols[5] = col
  162. row = setRow(cols)
  163. rows[index+1] = row
  164. }
  165. // 组装表格
  166. table := setTable(rows)
  167. return table
  168. }
  169. // accessWarningToMail api日志预警信息发送邮件
  170. func accessWarningToMail(datas []AccessLogCheck, start, end int64) {
  171. if len(datas) == 0 {
  172. return
  173. }
  174. // 将预警信息转化为表格
  175. content := convertAccessLogToList(datas)
  176. req := apis.Warning{}
  177. req.WarningText = content
  178. req.To = strings.Split(config.Conf.Warning.DefaultMails, ";")
  179. req.Subject = "平台api日志告警"
  180. if start > 0 && end > 0 {
  181. req.Subject = fmt.Sprintf("%s(%s-%s %d-%d)",
  182. req.Subject,
  183. time.Unix(start, 0).Format("2006-01-02 15:04:05"),
  184. time.Unix(end, 0).Format("2006-01-02 15:04:05"),
  185. start, end)
  186. }
  187. // 发送预警邮件
  188. Warning(&req)
  189. }
  190. func convertAccessLogToList(datas []AccessLogCheck) []string {
  191. lines := make([]string, 0)
  192. for _, v := range datas {
  193. line := ""
  194. line = fmt.Sprintf("平台api名称:%s\n",v.ApiName)
  195. line = line + fmt.Sprintf("统计量:%d\n",v.Count)
  196. line = line + fmt.Sprintf("平均响应时长:%.2f\n",v.AvgElapsed)
  197. line = line + fmt.Sprintf("失败率:%s\n",Percent(float64(v.Valid-v.Success)/float64(v.Valid)))
  198. line = line + fmt.Sprintf("查无率:%s\n",Percent(float64(v.NoRecord)/float64(v.Valid)))
  199. lines = append(lines,line)
  200. }
  201. return lines
  202. }
  203. // convertAccessLogToTable 将api日志预警信息转换为表格
  204. func convertAccessLogToTable(datas []AccessLogCheck) string {
  205. rows := make([]string, len(datas)+1)
  206. cols := make([]string, 5)
  207. cols[0] = setCol("平台api名称", "")
  208. cols[1] = setCol("统计量", "")
  209. cols[2] = setCol("平均响应时长", "")
  210. cols[3] = setCol("失败率", "")
  211. cols[4] = setCol("查无率", "")
  212. row := setRow(cols)
  213. rows[0] = row
  214. for index, v := range datas {
  215. elapsed := v.Elapsed
  216. failRate := v.FailRate
  217. norecordRate := v.NorecordRate
  218. col := setCol(v.ApiName, "")
  219. cols[0] = col
  220. col = setCol(fmt.Sprintf("%d", v.Count), "")
  221. cols[1] = col
  222. if v.AvgElapsed < elapsed {
  223. col = setCol(fmt.Sprintf("%.2f", v.AvgElapsed), "")
  224. } else {
  225. col = setCol(fmt.Sprintf("%.2f", v.AvgElapsed), "yellow")
  226. }
  227. cols[2] = col
  228. if float64(v.Success)/float64(v.Valid) > (1 - failRate) {
  229. col = setCol(fmt.Sprintf("%s", Percent(float64(v.Valid-v.Success)/float64(v.Valid))), "")
  230. } else {
  231. col = setCol(fmt.Sprintf("%s", Percent(float64(v.Valid-v.Success)/float64(v.Valid))), "yellow")
  232. }
  233. cols[3] = col
  234. if float64(v.NoRecord)/float64(v.Valid) < norecordRate {
  235. col = setCol(fmt.Sprintf("%s", Percent(float64(v.NoRecord)/float64(v.Valid))), "")
  236. } else {
  237. col = setCol(fmt.Sprintf("%s", Percent(float64(v.NoRecord)/float64(v.Valid))), "yellow")
  238. }
  239. cols[4] = col
  240. row = setRow(cols)
  241. rows[index+1] = row
  242. }
  243. table := setTable(rows)
  244. return table
  245. }
  246. // thirdWarningToMail 数据源日志预警发送邮件
  247. func thirdWarningToMail(datas []ThirdLogCheck, start, end int64) {
  248. if len(datas) == 0 {
  249. return
  250. }
  251. content := convertThirdLogToList(datas)
  252. req := apis.Warning{}
  253. req.WarningText = content
  254. req.To = strings.Split(config.Conf.Warning.DefaultMails, ";")
  255. req.Subject = "数据源api日志告警"
  256. if start > 0 && end > 0 {
  257. req.Subject = fmt.Sprintf("%s(%s-%s %d-%d)",
  258. req.Subject,
  259. time.Unix(start, 0).Format("2006-01-02:15:04:05"),
  260. time.Unix(end, 0).Format("2006-01-02:15:04:05"),
  261. start, end)
  262. }
  263. Warning(&req)
  264. }
  265. type MgoAccessLog struct {
  266. ApiName string `json:"api_name" bson:"api_name"`
  267. Elapsed float64 `json:"elapsed" bson:"elapsed"`
  268. State int `json:"state" bson:"state"`
  269. Code int `json:"code" bson:"code"`
  270. ApiId int64 `json:"api_id" bson:"api_id"`
  271. Timestamp int64 `json:"timestamp" bson:"timestamp"`
  272. }
  273. type MgoThirdLog struct {
  274. ProviderApiName string `json:"provider_api_name" bson:"provider_api_name"`
  275. ProviderName string `json:"provider_name" bson:"provider_name"`
  276. Elapsed float64 `json:"elapsed" bson:"elapsed"`
  277. State int `json:"state" bson:"state"`
  278. Code int `json:"code" bson:"code"`
  279. ProviderApiId int64 `json:"provider_api_id" bson:"provider_api_id"`
  280. Timestamp int64 `json:"timestamp" bson:"timestamp"`
  281. }