video.go 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297
  1. package track
  2. import (
  3. "io"
  4. "time"
  5. "github.com/pion/rtp"
  6. "go.uber.org/zap"
  7. "m7s.live/engine/v4/codec"
  8. "m7s.live/engine/v4/common"
  9. . "m7s.live/engine/v4/common"
  10. "m7s.live/engine/v4/util"
  11. )
  12. type Video struct {
  13. Media
  14. CodecID codec.VideoCodecID
  15. GOP int //关键帧间隔
  16. nalulenSize int //avcc格式中表示nalu长度的字节数,通常为4
  17. dcChanged bool //解码器配置是否改变了,一般由于变码率导致
  18. dtsEst *util.DTSEstimator
  19. lostFlag bool // 是否丢帧
  20. codec.SPSInfo
  21. ParamaterSets `json:"-" yaml:"-"`
  22. SPS []byte `json:"-" yaml:"-"`
  23. PPS []byte `json:"-" yaml:"-"`
  24. }
  25. func (v *Video) Attach() {
  26. v.Info("attach video track", zap.Uint("width", v.Width), zap.Uint("height", v.Height))
  27. if err := v.Publisher.GetStream().AddTrack(v).Await(); err != nil {
  28. v.Error("attach video track failed", zap.Error(err))
  29. } else {
  30. v.Info("video track attached", zap.Uint("width", v.Width), zap.Uint("height", v.Height))
  31. }
  32. }
  33. func (v *Video) Detach() {
  34. v.Publisher.GetStream().RemoveTrack(v)
  35. }
  36. func (vt *Video) GetName() string {
  37. if vt.Name == "" {
  38. return vt.CodecID.String()
  39. }
  40. return vt.Name
  41. }
  42. func (vt *Video) GetCodec() codec.VideoCodecID {
  43. return vt.CodecID
  44. }
  45. // PlayFullAnnexB 订阅annex-b格式的流数据,每一个I帧增加sps、pps头
  46. // func (vt *Video) PlayFullAnnexB(ctx context.Context, onMedia func(net.Buffers) error) error {
  47. // for vr := vt.ReadRing(); ctx.Err() == nil; vr.MoveNext() {
  48. // vp := vr.Read(ctx)
  49. // var data net.Buffers
  50. // if vp.IFrame {
  51. // data = vt.GetAnnexB()
  52. // }
  53. // data = append(data, codec.NALU_Delimiter2)
  54. // for slice := vp.AUList.Head; slice != nil; slice = slice.Next {
  55. // data = append(data, slice.ToBuffers()...)
  56. // if slice.Next != nil {
  57. // data = append(data, codec.NALU_Delimiter1)
  58. // }
  59. // }
  60. // if err := onMedia(data); err != nil {
  61. // // TODO: log err
  62. // return err
  63. // }
  64. // }
  65. // return ctx.Err()
  66. // }
  67. func (vt *Video) computeGOP() {
  68. if vt.IDRing != nil {
  69. vt.GOP = int(vt.Value.Sequence - vt.IDRing.Value.Sequence)
  70. if vt.HistoryRing == nil {
  71. vt.narrow(vt.GOP)
  72. }
  73. }
  74. vt.AddIDR()
  75. // var n int
  76. // for i := 0; i < len(vt.BytesPool); i++ {
  77. // n += vt.BytesPool[i].Length
  78. // }
  79. // println(n)
  80. }
  81. func (vt *Video) writeAnnexBSlice(nalu []byte) {
  82. common.SplitAnnexB(nalu, vt.WriteSliceBytes, codec.NALU_Delimiter1)
  83. }
  84. func (vt *Video) WriteNalu(pts uint32, dts uint32, nalu []byte) {
  85. if dts == 0 {
  86. vt.generateTimestamp(pts)
  87. } else {
  88. vt.Value.PTS = time.Duration(pts)
  89. vt.Value.DTS = time.Duration(dts)
  90. }
  91. vt.Value.BytesIn += len(nalu)
  92. vt.WriteSliceBytes(nalu)
  93. vt.Flush()
  94. }
  95. func (vt *Video) WriteAnnexB(pts uint32, dts uint32, frame []byte) {
  96. if dts == 0 {
  97. vt.generateTimestamp(pts)
  98. } else {
  99. vt.Value.PTS = time.Duration(pts)
  100. vt.Value.DTS = time.Duration(dts)
  101. }
  102. vt.Value.BytesIn += len(frame)
  103. common.SplitAnnexB(frame, vt.writeAnnexBSlice, codec.NALU_Delimiter2)
  104. if vt.Value.AUList.ByteLength > 0 {
  105. vt.Flush()
  106. }
  107. }
  108. func (vt *Video) writeAVCCFrame(ts uint32, r *util.BLLReader, frame *util.BLL) (err error) {
  109. var cts uint32
  110. cts, err = r.ReadBE(3)
  111. if err != nil {
  112. return err
  113. }
  114. vt.Value.PTS = time.Duration(ts+cts) * 90
  115. vt.Value.DTS = time.Duration(ts) * 90
  116. var nalulen uint32
  117. for nalulen, err = r.ReadBE(vt.nalulenSize); err == nil; nalulen, err = r.ReadBE(vt.nalulenSize) {
  118. if remain := frame.ByteLength - r.GetOffset(); remain < int(nalulen) {
  119. vt.Error("read nalu length error", zap.Int("nalulen", int(nalulen)), zap.Int("remain", remain))
  120. frame.Recycle()
  121. vt.Value.Reset()
  122. return
  123. }
  124. vt.AppendAuBytes(r.ReadN(int(nalulen))...)
  125. }
  126. return nil
  127. }
  128. func (vt *Video) WriteAVCC(ts uint32, frame *util.BLL) (err error) {
  129. if l := frame.ByteLength; l < 6 {
  130. vt.Error("AVCC data too short", zap.Int("len", l))
  131. return io.ErrShortWrite
  132. }
  133. // bbb := util.Buffer(frame.ToBytes()[5:])
  134. r := frame.NewReader()
  135. b, _ := r.ReadByte()
  136. isExtHeader := (b >> 4) & 0b1000
  137. frameType := (b >> 4) & 0b0111
  138. vt.Value.IFrame = frameType == 1 || frameType == 4
  139. packetType := b & 0b1111
  140. if isExtHeader != 0 {
  141. r.ReadBE(4) // fourcc
  142. switch packetType {
  143. case codec.PacketTypeSequenceStart:
  144. err = vt.SpesificTrack.WriteSequenceHead(frame.ToBytes())
  145. frame.Recycle()
  146. return
  147. case codec.PacketTypeCodedFrames:
  148. err = vt.SpesificTrack.writeAVCCFrame(ts, r, frame)
  149. case codec.PacketTypeCodedFramesX:
  150. }
  151. } else {
  152. b, _ = r.ReadByte() //sequence frame flag
  153. if b == 0 {
  154. err = vt.SpesificTrack.WriteSequenceHead(frame.ToBytes())
  155. frame.Recycle()
  156. return
  157. }
  158. err = vt.SpesificTrack.writeAVCCFrame(ts, r, frame)
  159. }
  160. if err == nil {
  161. vt.Value.WriteAVCC(ts, frame)
  162. vt.Flush()
  163. }
  164. return
  165. }
  166. func (vt *Video) WriteSliceByte(b ...byte) {
  167. // fmt.Println("write slice byte", b)
  168. vt.WriteSliceBytes(b)
  169. }
  170. // 在I帧前面插入sps pps webrtc需要
  171. func (vt *Video) insertDCRtp() {
  172. head := vt.Value.RTP.Next
  173. for _, nalu := range vt.ParamaterSets {
  174. var packet rtp.Packet
  175. packet.Version = 2
  176. packet.PayloadType = vt.PayloadType
  177. packet.Payload = nalu
  178. packet.SSRC = vt.SSRC
  179. packet.Timestamp = uint32(vt.Value.PTS)
  180. packet.Marker = false
  181. head.InsertBeforeValue(RTPFrame{Packet: &packet})
  182. }
  183. }
  184. func (vt *Video) generateTimestamp(ts uint32) {
  185. if vt.State == TrackStateOffline {
  186. vt.dtsEst = util.NewDTSEstimator()
  187. }
  188. vt.Value.PTS = time.Duration(ts)
  189. vt.Value.DTS = time.Duration(vt.dtsEst.Feed(ts))
  190. }
  191. func (vt *Video) SetLostFlag() {
  192. vt.lostFlag = true
  193. }
  194. func (vt *Video) CompleteAVCC(rv *AVFrame) {
  195. mem := vt.BytesPool.Get(5)
  196. b := mem.Value
  197. if rv.IFrame {
  198. b[0] = 0x10 | byte(vt.CodecID)
  199. } else {
  200. b[0] = 0x20 | byte(vt.CodecID)
  201. }
  202. b[1] = 1
  203. // println(rv.PTS < rv.DTS, "\t", rv.PTS, "\t", rv.DTS, "\t", rv.PTS-rv.DTS)
  204. // vt.Info("cts", zap.Uint32("cts", uint32((rv.PTS-rv.DTS)/90)))
  205. // 写入CTS
  206. util.PutBE(b[2:5], (rv.PTS-rv.DTS)/90)
  207. rv.AVCC.Push(mem)
  208. // if rv.AVCC.ByteLength != 5 {
  209. // panic("error")
  210. // }
  211. // var tmp = 0
  212. rv.AUList.Range(func(au *util.BLL) bool {
  213. mem = vt.BytesPool.Get(4)
  214. // println(au.ByteLength)
  215. util.PutBE(mem.Value, uint32(au.ByteLength))
  216. rv.AVCC.Push(mem)
  217. au.Range(func(slice util.Buffer) bool {
  218. rv.AVCC.Push(vt.BytesPool.GetShell(slice))
  219. return true
  220. })
  221. // tmp += 4 + au.ByteLength
  222. // if rv.AVCC.ByteLength != 5+tmp {
  223. // panic("error")
  224. // }
  225. return true
  226. })
  227. }
  228. func (vt *Video) Flush() {
  229. rv := vt.Value
  230. if rv.IFrame {
  231. vt.computeGOP()
  232. if audioTrack := vt.Publisher.GetAudioTrack(); audioTrack != nil {
  233. audioTrack.Narrow()
  234. }
  235. }
  236. if !vt.iframeReceived {
  237. if vt.IDRing != nil && vt.SequenceHeadSeq > 0 {
  238. defer vt.Attach()
  239. vt.iframeReceived = true
  240. } else {
  241. rv.Reset()
  242. return
  243. }
  244. }
  245. if vt.lostFlag {
  246. if rv.IFrame {
  247. vt.lostFlag = false
  248. } else {
  249. rv.Reset()
  250. return
  251. }
  252. }
  253. vt.Media.Flush()
  254. vt.dcChanged = false
  255. }
  256. func (vt *Video) WriteSequenceHead(sh []byte) {
  257. vt.Media.WriteSequenceHead(sh)
  258. vt.dcChanged = true
  259. }
  260. /*
  261. Access Unit的首个nalu是4字节起始码。
  262. 这里举个例子说明,用JM可以生成这样一段码流(不要使用JM8.6,它在这部分与标准不符),这个码流可以见本楼附件:
  263. SPS (4字节头)
  264. PPS (4字节头)
  265. SEI (4字节头)
  266. I0(slice0) (4字节头)
  267. I0(slice1) (3字节头)
  268. P1(slice0) (4字节头)
  269. P1(slice1) (3字节头)
  270. P2(slice0) (4字节头)
  271. P2(slice1) (3字节头)
  272. I0(slice0)是序列第一帧(I帧)的第一个slice,是当前Access Unit的首个nalu,所以是4字节头。而I0(slice1)表示第一帧的第二个slice,所以是3字节头。P1(slice0) 、P1(slice1)同理。
  273. */