warning_log.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514
  1. // Copyright 2019 getensh.com. All rights reserved.
  2. // Use of this source code is governed by getensh.com.
  3. package warning
  4. import (
  5. "context"
  6. "gd_crontab/apis"
  7. "gd_crontab/common.in/config"
  8. "fmt"
  9. "sync"
  10. "time"
  11. )
  12. type WarningCount struct{
  13. ApiName string
  14. ProviderName string
  15. Total map[int64]int64
  16. Valid map[int64]int64
  17. Success map[int64]int64
  18. NoRecord map[int64]int64
  19. Elapsed map[int64]float64
  20. }
  21. type Waring struct{
  22. RwMutex sync.RWMutex
  23. Count map[int64]WarningCount
  24. }
  25. type Rate struct {
  26. Elapsed float64
  27. FailRate float64
  28. NorecordRate float64
  29. Enable bool
  30. }
  31. var ApiMutex sync.Mutex
  32. var ApiRate map[int64]Rate
  33. var ProviderApiRate map[int64]Rate
  34. var AccessLogWaring Waring
  35. var ThirdPartLogWaring Waring
  36. var AccessLogLastTime map[int64]int64
  37. var ThirparthLastTime map[int64]int64
  38. func getAccessLogCode(code int,codeType int ) int64 {
  39. switch codeType {
  40. case 1:// 有效
  41. if code == 0 || code == 20001 || code == 20002 || code == 10001 || code == 10002{
  42. return 1
  43. }
  44. case 2: // 成功
  45. if code == 0 || code == 20001{
  46. return 1
  47. }
  48. case 3: // 查无
  49. if code == 20001{
  50. return 1
  51. }
  52. }
  53. return 0
  54. }
  55. func CountAccessLog(accessLog *apis.ReciveAccessLogReq){
  56. timeNow := accessLog.Timestamp
  57. AccessLogWaring.RwMutex.Lock()
  58. defer AccessLogWaring.RwMutex.Unlock()
  59. if AccessLogWaring.Count == nil {
  60. AccessLogWaring.Count = make(map[int64]WarningCount)
  61. }
  62. if v,ok := AccessLogWaring.Count[accessLog.ApiId];ok{
  63. v.ApiName = accessLog.ApiName
  64. if no ,ok := v.Total[timeNow];ok{
  65. v.Total[timeNow] = no + 1
  66. }else {
  67. v.Total[timeNow] = 1
  68. }
  69. if no ,ok := v.Valid[timeNow];ok{
  70. v.Valid[timeNow] = no + getAccessLogCode(accessLog.Code,1)
  71. }else {
  72. v.Valid[timeNow] = getAccessLogCode(accessLog.Code,1)
  73. }
  74. if no ,ok := v.Success[timeNow];ok{
  75. v.Success[timeNow] = no + getAccessLogCode(accessLog.Code,2)
  76. }else {
  77. v.Success[timeNow] = getAccessLogCode(accessLog.Code,2)
  78. }
  79. if no ,ok := v.NoRecord[timeNow];ok{
  80. v.NoRecord[timeNow] = no + getAccessLogCode(accessLog.Code,3)
  81. }else {
  82. v.NoRecord[timeNow] = getAccessLogCode(accessLog.Code,3)
  83. }
  84. if no ,ok := v.Elapsed[timeNow];ok{
  85. v.Elapsed[timeNow] = no + accessLog.Elapsed
  86. }else {
  87. v.Elapsed[timeNow] = accessLog.Elapsed
  88. }
  89. }else{
  90. tmp := WarningCount{}
  91. tmp.ApiName = accessLog.ApiName
  92. tmp.Total = make(map[int64]int64)
  93. tmp.Total[timeNow] = 1
  94. tmp.Valid = make(map[int64]int64)
  95. count := getAccessLogCode(accessLog.Code,1)
  96. if count == 1 {
  97. tmp.Valid[timeNow] = count
  98. }
  99. tmp.Success = make(map[int64]int64)
  100. count = getAccessLogCode(accessLog.Code,2)
  101. if count == 1 {
  102. tmp.Success[timeNow] =count
  103. }
  104. tmp.NoRecord = make(map[int64]int64)
  105. count = getAccessLogCode(accessLog.Code,3)
  106. if count == 1{
  107. tmp.NoRecord[timeNow] = count
  108. }
  109. tmp.Elapsed = make(map[int64]float64)
  110. tmp.Elapsed[timeNow] = accessLog.Elapsed
  111. AccessLogWaring.Count[accessLog.ApiId] = tmp
  112. }
  113. //fmt.Println(AccessLogWaring)
  114. }
  115. func CountThirdPartLog(accessLog *apis.ReciveThirdPartLogReq){
  116. timeNow := accessLog.Timestamp
  117. ThirdPartLogWaring.RwMutex.Lock()
  118. defer ThirdPartLogWaring.RwMutex.Unlock()
  119. if ThirdPartLogWaring.Count == nil {
  120. ThirdPartLogWaring.Count = make(map[int64]WarningCount)
  121. }
  122. if v,ok := ThirdPartLogWaring.Count[accessLog.ProviderApiId];ok{
  123. v.ApiName = accessLog.ProviderApiName
  124. v.ProviderName = accessLog.ProviderName
  125. if no ,ok := v.Valid[timeNow];ok{
  126. v.Valid[timeNow] = no + 1
  127. }else {
  128. v.Valid[timeNow] = 1
  129. }
  130. if no ,ok := v.Success[timeNow];ok{
  131. if accessLog.State == 1 {
  132. v.Success[timeNow] = no + 1
  133. }
  134. }else {
  135. if accessLog.State == 1 {
  136. v.Success[timeNow] = 1
  137. }
  138. }
  139. if no ,ok := v.NoRecord[timeNow];ok{
  140. v.NoRecord[timeNow] = no + getAccessLogCode(accessLog.Code,3)
  141. }else {
  142. v.NoRecord[timeNow] = getAccessLogCode(accessLog.Code,3)
  143. }
  144. if no ,ok := v.Elapsed[timeNow];ok{
  145. v.Elapsed[timeNow] = no + accessLog.Elapsed
  146. }else {
  147. v.Elapsed[timeNow] = accessLog.Elapsed
  148. }
  149. }else{
  150. tmp := WarningCount{}
  151. tmp.ApiName = accessLog.ProviderApiName
  152. tmp.ProviderName = accessLog.ProviderName
  153. tmp.Valid = make(map[int64]int64)
  154. tmp.Valid[timeNow] = 1
  155. tmp.Success = make(map[int64]int64)
  156. if accessLog.State == 1{
  157. tmp.Success[timeNow] = 1
  158. }
  159. tmp.NoRecord = make(map[int64]int64)
  160. count := getAccessLogCode(accessLog.Code,3)
  161. if count == 1{
  162. tmp.NoRecord[timeNow] = count
  163. }
  164. tmp.Elapsed = make(map[int64]float64)
  165. tmp.Elapsed[timeNow] = accessLog.Elapsed
  166. ThirdPartLogWaring.Count[accessLog.ProviderApiId] = tmp
  167. }
  168. //fmt.Println(ThirdPartLogWaring)
  169. }
  170. func getTotal(m map[int64]int64,now,period int64) int64 {
  171. delta := now-period
  172. count := int64(0)
  173. for timestamp,v := range m{
  174. if timestamp >= delta{
  175. count = count + v
  176. }else if timestamp < delta {
  177. delete(m, timestamp)
  178. }
  179. }
  180. return count
  181. }
  182. func getElaspTotal(m map[int64]float64,now,period int64) float64{
  183. delta := now-period
  184. count := float64(0)
  185. for timestamp,v := range m{
  186. if timestamp >= delta{
  187. count = count + v
  188. }else if timestamp < delta {
  189. delete(m, timestamp)
  190. }
  191. }
  192. return count
  193. }
  194. func CheckThirdPartLastSendTime(old []ThirdLogCheck,now int64,warnSendPeriod int64) []ThirdLogCheck {
  195. if ThirparthLastTime == nil {
  196. ThirparthLastTime = make(map[int64]int64)
  197. }
  198. new := []ThirdLogCheck{}
  199. for _,v := range old {
  200. if timeOld,ok := ThirparthLastTime[v.ProviderApiId];ok{
  201. if timeOld+warnSendPeriod*60 <= now{
  202. new = append(new,v)
  203. ThirparthLastTime[v.ProviderApiId] = now
  204. }
  205. }else{
  206. new = append(new,v)
  207. ThirparthLastTime[v.ProviderApiId] = now
  208. }
  209. }
  210. return new
  211. }
  212. func CheckProviderLog(now int64,period int64,warnMinCount,warnSendPeriod int64){
  213. // 发送预警邮件
  214. warningLogs := CheckProviderLogImpl(now,period,warnMinCount)
  215. //fmt.Println("3333333333333333:",warningLogs)
  216. if warningLogs != nil{
  217. warningLogs = CheckThirdPartLastSendTime(warningLogs,now,warnSendPeriod)
  218. thirdWarningToMail(warningLogs, now-period, now)
  219. }
  220. }
  221. func GetPorviderApiIds() []int64 {
  222. providerApiIds := []int64{}
  223. ThirdPartLogWaring.RwMutex.Lock()
  224. defer ThirdPartLogWaring.RwMutex.Unlock()
  225. for k,_ := range ThirdPartLogWaring.Count{
  226. providerApiIds = append(providerApiIds,k)
  227. }
  228. return providerApiIds
  229. }
  230. func GetApiIds() []int64 {
  231. apiIds := []int64{}
  232. AccessLogWaring.RwMutex.Lock()
  233. defer AccessLogWaring.RwMutex.Unlock()
  234. for k,_ := range AccessLogWaring.Count{
  235. apiIds = append(apiIds,k)
  236. }
  237. return apiIds
  238. }
  239. func GetApiRate(){
  240. apiIds := GetApiIds()
  241. providerApiIds := GetPorviderApiIds()
  242. //fmt.Println("ids ----------------------",apiIds,providerApiIds)
  243. ApiRate = make(map[int64]Rate)
  244. ProviderApiRate = make(map[int64]Rate)
  245. for _,v := range apiIds{
  246. elapsed, failRate, norecordRate, enable := getThreshold(0, v)
  247. tmp := Rate{}
  248. tmp.Elapsed = elapsed
  249. tmp.FailRate = failRate
  250. tmp.NorecordRate = norecordRate
  251. tmp.Enable = enable
  252. ApiRate[v] = tmp
  253. //fmt.Println("API RATE 111111111111111111",ApiRate)
  254. }
  255. for _,v := range providerApiIds{
  256. elapsed, failRate, norecordRate, enable := getThreshold(1, v)
  257. tmp := Rate{}
  258. tmp.Elapsed = elapsed
  259. tmp.FailRate = failRate
  260. tmp.NorecordRate = norecordRate
  261. tmp.Enable = enable
  262. ProviderApiRate[v] = tmp
  263. //fmt.Println("PROVIDER API RATE 111111111111111111",ProviderApiRate)
  264. }
  265. }
  266. func CheckProviderLogImpl(now int64,period int64,warnMinCount int64) []ThirdLogCheck {
  267. if ThirdPartLogWaring.Count == nil {
  268. return nil
  269. }
  270. ThirdPartLogWaring.RwMutex.Lock()
  271. defer ThirdPartLogWaring.RwMutex.Unlock()
  272. warningLogs := []ThirdLogCheck{}
  273. for k,v := range ThirdPartLogWaring.Count{
  274. valid := getTotal(v.Valid,now,period)
  275. if valid == 0 {
  276. continue
  277. }
  278. if warnMinCount > 0 && valid <warnMinCount{
  279. continue
  280. }
  281. success := getTotal(v.Success,now,period)
  282. noRecord := getTotal(v.NoRecord,now,period)
  283. elapsedTotal := getElaspTotal(v.Elapsed,now,period)
  284. avgElapsed := elapsedTotal/float64(valid)
  285. //fmt.Println("----------------------------------:",valid,success,noRecord,elapsedTotal,avgElapsed)
  286. elapsed, failRate, norecordRate, enable := getThresholdFromMap(1, k)
  287. //fmt.Println("1111----------------------------------:",elapsed, failRate, norecordRate, enable)
  288. if enable == false {
  289. continue
  290. }
  291. if valid == 0{
  292. if avgElapsed < elapsed{
  293. continue
  294. }
  295. }else{
  296. if avgElapsed < elapsed &&
  297. float64(noRecord)/float64(valid) < norecordRate &&
  298. float64(success)/float64(valid) > (1-failRate) {
  299. continue
  300. }
  301. }
  302. acs := ThirdLogCheck{}
  303. acs.ProviderApiName = v.ApiName
  304. acs.ProviderName = v.ProviderName
  305. acs.Count = int(valid)
  306. acs.AvgElapsed = avgElapsed
  307. acs.Valid = int(valid)
  308. acs.Success = int(success)
  309. acs.NoRecord = int(noRecord)
  310. warningLogs = append(warningLogs, acs)
  311. }
  312. return warningLogs
  313. }
  314. func CheckAccessLogLastSendTime(old []AccessLogCheck,now int64,warnSendPeriod int64) []AccessLogCheck {
  315. if AccessLogLastTime == nil {
  316. AccessLogLastTime = make(map[int64]int64)
  317. }
  318. new := []AccessLogCheck{}
  319. for _,v := range old {
  320. if timeOld,ok := AccessLogLastTime[v.ApiId];ok{
  321. if timeOld+warnSendPeriod*60 <= now{
  322. new = append(new,v)
  323. AccessLogLastTime[v.ApiId] = now
  324. }
  325. }else{
  326. new = append(new,v)
  327. AccessLogLastTime[v.ApiId] = now
  328. }
  329. }
  330. return new
  331. }
  332. func CheckAccessLog(now int64,period int64,warnMinCount,warnSendPeriod int64){
  333. // 发送预警邮件
  334. warningLogs := CheckAccessLogImpl(now,period,warnMinCount)
  335. //fmt.Println("22222222222222222222:",warningLogs)
  336. if warningLogs != nil{
  337. warningLogs = CheckAccessLogLastSendTime(warningLogs,now,warnSendPeriod)
  338. accessWarningToMail(warningLogs, now-period, now)
  339. }
  340. }
  341. func getThresholdFromMap(atype int,id int64)(float64, float64, float64, bool){
  342. //fmt.Println("000000000000000000000000000",atype,id)
  343. if atype == 0 {
  344. if v,ok:= ApiRate[id];ok{
  345. //fmt.Println("get api rate 1111111111111",v)
  346. return v.Elapsed,v.FailRate,v.NorecordRate,v.Enable
  347. }
  348. }else if atype == 1 {
  349. if v,ok:= ProviderApiRate[id];ok{
  350. //fmt.Println("get provider api rate 1111111111111",v)
  351. return v.Elapsed,v.FailRate,v.NorecordRate,v.Enable
  352. }
  353. }
  354. return 9999,9999,9999,false
  355. }
  356. func CheckAccessLogImpl(now int64,period int64,warnMinCount int64) []AccessLogCheck {
  357. if AccessLogWaring.Count == nil {
  358. return nil
  359. }
  360. AccessLogWaring.RwMutex.Lock()
  361. defer AccessLogWaring.RwMutex.Unlock()
  362. warningLogs := []AccessLogCheck{}
  363. for k,v := range AccessLogWaring.Count{
  364. //fmt.Println("??????????????????????????????",v,now,period)
  365. total := getTotal(v.Total,now,period)
  366. if total == 0 {
  367. continue
  368. }
  369. if warnMinCount > 0 && total <warnMinCount{
  370. continue
  371. }
  372. valid := getTotal(v.Valid,now,period)
  373. success := getTotal(v.Success,now,period)
  374. noRecord := getTotal(v.NoRecord,now,period)
  375. elapsedTotal := getElaspTotal(v.Elapsed,now,period)
  376. avgElapsed := elapsedTotal/float64(total)
  377. //fmt.Println("222222----------------------------------:",valid,success,noRecord,elapsedTotal,avgElapsed)
  378. elapsed, failRate, norecordRate, enable := getThresholdFromMap(0, k)
  379. //fmt.Println("333333----------------------------------:",elapsed, failRate, norecordRate, enable)
  380. if enable == false {
  381. continue
  382. }
  383. if valid == 0{
  384. if avgElapsed < elapsed{
  385. continue
  386. }
  387. }else{
  388. if avgElapsed < elapsed &&
  389. float64(noRecord)/float64(valid) < norecordRate &&
  390. float64(success)/float64(valid) > (1-failRate) {
  391. continue
  392. }
  393. }
  394. acs := AccessLogCheck{}
  395. acs.ApiName = v.ApiName
  396. acs.Count = int(total)
  397. acs.AvgElapsed = avgElapsed
  398. acs.Valid = int(valid)
  399. acs.Success = int(success)
  400. acs.NoRecord = int(noRecord)
  401. warningLogs = append(warningLogs, acs)
  402. }
  403. return warningLogs
  404. }
  405. func CheckLog(){
  406. defer func() {
  407. if r := recover(); r != nil {
  408. err := fmt.Errorf("%+v", r)
  409. fmt.Println("err:",err)
  410. }
  411. }()
  412. now := time.Now().Unix()
  413. //fmt.Println("time 11111111111111111111111111111111111111:",now)
  414. ApiMutex.Lock()
  415. defer ApiMutex.Unlock()
  416. GetApiRate()
  417. warnPeriod,_ := config.Conf.WarnPeriod.Int64()
  418. warnPeriod =warnPeriod*60
  419. warnMinCount,_:= config.Conf.WarnMinCount.Int64()
  420. warnSendPeriod,_:= config.Conf.WarnSendPeriod.Int64()
  421. CheckAccessLog(now,warnPeriod,warnMinCount,warnSendPeriod)
  422. CheckProviderLog(now,warnPeriod,warnMinCount,warnSendPeriod)
  423. }
  424. func ReciveAccessLog(ctx context.Context, req *apis.ReciveAccessLogReq, reply *apis.ReciveAccessLogReply) error{
  425. //fmt.Println("access log req:",req)
  426. defer func() {
  427. if r := recover(); r != nil {
  428. err := fmt.Errorf("%+v", r)
  429. fmt.Println("err:",err)
  430. }
  431. }()
  432. CountAccessLog(req)
  433. return nil
  434. }
  435. func ReciveThirdPartLog(ctx context.Context, req *apis.ReciveThirdPartLogReq, reply *apis.ReciveThirdPartLogReply) error{
  436. //fmt.Println("thirdpart log req:",req)
  437. defer func() {
  438. if r := recover(); r != nil {
  439. err := fmt.Errorf("%+v", r)
  440. fmt.Println("err:",err)
  441. }
  442. }()
  443. CountThirdPartLog(req)
  444. return nil
  445. }