codec.go 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231
  1. package codec
  2. import (
  3. "errors"
  4. )
  5. type AudioCodecID byte
  6. type VideoCodecID byte
  7. const (
  8. ADTS_HEADER_SIZE = 7
  9. CodecID_AAC AudioCodecID = 0xA
  10. CodecID_PCMA AudioCodecID = 7
  11. CodecID_PCMU AudioCodecID = 8
  12. CodecID_OPUS AudioCodecID = 0xC
  13. CodecID_H264 VideoCodecID = 7
  14. CodecID_H265 VideoCodecID = 0xC
  15. CodecID_AV1 VideoCodecID = 0xD
  16. )
  17. func (codecId AudioCodecID) String() string {
  18. switch codecId {
  19. case CodecID_AAC:
  20. return "aac"
  21. case CodecID_PCMA:
  22. return "pcma"
  23. case CodecID_PCMU:
  24. return "pcmu"
  25. case CodecID_OPUS:
  26. return "opus"
  27. }
  28. return "unknow"
  29. }
  30. func (codecId VideoCodecID) String() string {
  31. switch codecId {
  32. case CodecID_H264:
  33. return "h264"
  34. case CodecID_H265:
  35. return "h265"
  36. case CodecID_AV1:
  37. return "av1"
  38. }
  39. return "unknow"
  40. }
  41. // ISO/IEC 14496-3 38(52)/page
  42. //
  43. // Audio
  44. //
  45. type AudioSpecificConfig struct {
  46. AudioObjectType byte // 5 bits
  47. SamplingFrequencyIndex byte // 4 bits
  48. ChannelConfiguration byte // 4 bits
  49. GASpecificConfig
  50. }
  51. func (asc *AudioSpecificConfig) Parse(data []byte) {
  52. asc.AudioObjectType = data[0] >> 3
  53. asc.SamplingFrequencyIndex = (data[0] & 0x07 << 1) | (data[1] >> 7)
  54. asc.ChannelConfiguration = (data[1] >> 3) & 0x0F
  55. asc.FrameLengthFlag = (data[1] >> 2) & 0x01
  56. asc.DependsOnCoreCoder = (data[1] >> 1) & 0x01
  57. asc.ExtensionFlag = data[1] & 0x01
  58. }
  59. func (asc *AudioSpecificConfig) ToADTS(rawDataLength int, adtsByte []byte) (adts ADTS, err error) {
  60. return AudioSpecificConfigToADTS(asc, rawDataLength, adtsByte)
  61. }
  62. type GASpecificConfig struct {
  63. FrameLengthFlag byte // 1 bit
  64. DependsOnCoreCoder byte // 1 bit
  65. ExtensionFlag byte // 1 bit
  66. }
  67. //
  68. // AudioObjectTypes -> ISO/IEC 14496-3 43(57)/page
  69. //
  70. // 1 AAC MAIN ISO/IEC 14496-3 subpart 4
  71. // 2 AAC LC ISO/IEC 14496-3 subpart 4
  72. // 3 AAC SSR ISO/IEC 14496-3 subpart 4
  73. // 4 AAC LTP ISO/IEC 14496-3 subpart 4
  74. //
  75. //
  76. // ISO/IEC 13838-7 20(25)/page
  77. //
  78. // # Advanced Audio Coding
  79. //
  80. // AudioDataTransportStream
  81. type ADTS struct {
  82. ADTSFixedHeader
  83. ADTSVariableHeader
  84. }
  85. // 28 bits
  86. type ADTSFixedHeader struct {
  87. SyncWord uint16 // 12 bits The bit string ‘1111 1111 1111’. See ISO/IEC 11172-3,subclause 2.4.2.3 (Table 8)
  88. ID byte // 1 bit MPEG identifier, set to ‘1’. See ISO/IEC 11172-3,subclause 2.4.2.3 (Table 8)
  89. Layer byte // 2 bits Indicates which layer is used. Set to ‘00’. See ISO/IEC 11172-3,subclause 2.4.2.3 (Table 8)
  90. ProtectionAbsent byte // 1 bit Indicates whether error_check() data is present or not. Same assyntax element ‘protection_bit’ in ISO/IEC 11172-3,subclause 2.4.1 and 2.4.2 (Table 8)
  91. Profile byte // 2 bits profile used. See clause 2 (Table 8)
  92. SamplingFrequencyIndex byte // 4 bits indicates the sampling frequency used according to the followingtable (Table 8)
  93. PrivateBit byte // 1 bit see ISO/IEC 11172-3, subclause 2.4.2.3 (Table 8)
  94. ChannelConfiguration byte // 3 bits indicates the channel configuration used. Ifchannel_configuration is greater than 0, the channelconfiguration is given in Table 42, see subclause 8.5.3.1. Ifchannel_configuration equals 0, the channel configuration is notspecified in the header and must be given by aprogram_config_element() following as first syntactic element inthe first raw_data_block() after the header (seesubclause 8.5.3.2), or by the implicit configuration (seesubclause 8.5.3.3) or must be known in the application (Table 8)
  95. OriginalCopy byte // 1 bit see ISO/IEC 11172-3, definition of data element copyright
  96. Home byte // 1 bit see ISO/IEC 11172-3, definition of data element original/copy
  97. }
  98. // SyncWord, 同步头 总是0xFFF, all bits must be 1,代表着一个ADTS帧的开始
  99. // ID, MPEG Version: 0 for MPEG-4, 1 for MPEG-2
  100. // Layer, always: '00'
  101. // ProtectionAbsent, 表示是否误码校验
  102. // Profile, 表示使用哪个级别的AAC,有些芯片只支持AAC LC 。在MPEG-2 AAC中定义了3种.
  103. // SamplingFrequencyIndex, 表示使用的采样率下标,通过这个下标在 Sampling Frequencies[ ]数组中查找得知采样率的值
  104. // PrivateBit,
  105. // ChannelConfiguration, 表示声道数
  106. // OriginalCopy,
  107. // Home,
  108. // Profile:
  109. //
  110. // 0: Main profile
  111. // 1: Low Complexity profile(LC)
  112. // 2: Scalable Sampling Rate profile(SSR)
  113. // 3: Reserved
  114. var SamplingFrequencies = [...]int{96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 16000, 12000, 11025, 8000, 7350, 0, 0, 0}
  115. // Sampling Frequencies[]:
  116. //
  117. // 0: 96000 Hz
  118. // 1: 88200 Hz
  119. // 2: 64000 Hz
  120. // 3: 48000 Hz
  121. // 4: 44100 Hz
  122. // 5: 32000 Hz
  123. // 6: 24000 Hz
  124. // 7: 22050 Hz
  125. // 8: 16000 Hz
  126. // 9: 12000 Hz
  127. // 10: 11025 Hz
  128. // 11: 8000 Hz
  129. // 12: 7350 Hz
  130. // 13: Reserved
  131. // 14: Reserved
  132. // 15: frequency is written explictly
  133. //
  134. // ChannelConfiguration:
  135. //
  136. // 0: Defined in AOT Specifc Config
  137. // 1: 1 channel: front-center
  138. // 2: 2 channels: front-left, front-right
  139. // 3: 3 channels: front-center, front-left, front-right
  140. // 4: 4 channels: front-center, front-left, front-right, back-center
  141. // 5: 5 channels: front-center, front-left, front-right, back-left, back-right
  142. // 6: 6 channels: front-center, front-left, front-right, back-left, back-right, LFE-channel
  143. // 7: 8 channels: front-center, front-left, front-right, side-left, side-right, back-left, back-right, LFE-channel
  144. // 8-15: Reserved
  145. //
  146. // 28 bits
  147. type ADTSVariableHeader struct {
  148. CopyrightIdentificationBit byte // 1 bit One bit of the 72-bit copyright identification field (seecopyright_id above). The bits of this field are transmitted frame by frame; the first bit is indicated by the copyright_identification_start bit set to ‘1’. The field consists of an 8-bit copyright_identifier, followed by a 64-bit copyright_number.The copyright identifier is given by a Registration Authority as designated by SC29. The copyright_number is a value which identifies uniquely the copyrighted material. See ISO/IEC 13818-3, subclause 2.5.2.13 (Table 9)
  149. CopyrightIdentificationStart byte // 1 bit One bit to indicate that the copyright_identification_bit in this audio frame is the first bit of the 72-bit copyright identification. If no copyright identification is transmitted, this bit should be kept '0'.'0' no start of copyright identification in this audio frame '1' start of copyright identification in this audio frame See ISO/IEC 13818-3, subclause 2.5.2.13 (Table 9)
  150. AACFrameLength uint16 // 13 bits Length of the frame including headers and error_check in bytes(Table 9)
  151. ADTSBufferFullness uint16 // 11 bits state of the bit reservoir in the course of encoding the ADTS frame, up to and including the first raw_data_block() and the optionally following adts_raw_data_block_error_check(). It is transmitted as the number of available bits in the bit reservoir divided by NCC divided by 32 and truncated to an integer value (Table 9). A value of hexadecimal 7FF signals that the bitstream is a variable rate bitstream. In this case, buffer fullness is not applicable
  152. NumberOfRawDataBlockInFrame byte // 2 bits Number of raw_data_block()’s that are multiplexed in the adts_frame() is equal to number_of_raw_data_blocks_in_frame + 1. The minimum value is 0 indicating 1 raw_data_block()(Table 9)
  153. }
  154. // CopyrightIdentificationBit,
  155. // CopyrightIdentificationStart,
  156. // AACFrameLength, 一个ADTS帧的长度包括ADTS头和raw data block.
  157. // ADTSBufferFullness, 0x7FF 说明是码率可变的码流.
  158. // NumberOfRawDataBlockInFrame, 表示ADTS帧中有number_of_raw_data_blocks_in_frame + 1个AAC原始帧
  159. // 所以说number_of_raw_data_blocks_in_frame == 0 表示说ADTS帧中有一个AAC数据块并不是说没有。(一个AAC原始帧包含一段时间内1024个采样及相关数据)
  160. func AudioSpecificConfigToADTS(asc *AudioSpecificConfig, rawDataLength int, adtsByte []byte) (adts ADTS, err error) {
  161. if asc.ChannelConfiguration > 8 || asc.FrameLengthFlag > 13 {
  162. err = errors.New("Reserved field.")
  163. return
  164. }
  165. // ADTSFixedHeader
  166. adts.SyncWord = 0xfff
  167. adts.ID = 0
  168. adts.Layer = 0
  169. adts.ProtectionAbsent = 1
  170. // SyncWord(12) + ID(1) + Layer(2) + ProtectionAbsent(1)
  171. adtsByte[0] = 0xFF
  172. adtsByte[1] = 0xF1
  173. if asc.AudioObjectType >= 3 || asc.AudioObjectType == 0 {
  174. adts.Profile = 1
  175. } else {
  176. adts.Profile = asc.AudioObjectType - 1
  177. }
  178. adts.SamplingFrequencyIndex = asc.SamplingFrequencyIndex
  179. adts.PrivateBit = 0
  180. adts.ChannelConfiguration = asc.ChannelConfiguration
  181. adts.OriginalCopy = 0
  182. adts.Home = 0
  183. // Profile(2) + SamplingFrequencyIndex(4) + PrivateBit(1) + ChannelConfiguration(3)(取高1位)
  184. adtsByte[2] = uint8(adts.Profile<<6) + uint8(adts.SamplingFrequencyIndex<<2) + uint8(adts.PrivateBit<<1) + uint8((adts.ChannelConfiguration&0x7)>>2)
  185. // ADTSVariableHeader
  186. adts.CopyrightIdentificationBit = 0
  187. adts.CopyrightIdentificationStart = 0
  188. adts.AACFrameLength = 7 + uint16(rawDataLength)
  189. adts.ADTSBufferFullness = 0x7ff
  190. adts.NumberOfRawDataBlockInFrame = 0
  191. // ChannelConfiguration(3)(取低2位) + OriginalCopy(1) + Home(1) + CopyrightIdentificationBit(1) + CopyrightIdentificationStart(1) + AACFrameLength(13)(取高2位)
  192. adtsByte[3] = uint8((adts.ChannelConfiguration&0x3)<<6) + uint8((adts.AACFrameLength&0x1fff)>>11)
  193. // AACFrameLength(13)
  194. // xx xxxxxxxx xxx
  195. // 取中间的部分
  196. adtsByte[4] = uint8(((adts.AACFrameLength & 0x1fff) >> 3) & 0x0ff)
  197. // AACFrameLength(13)(取低3位) + ADTSBufferFullness(11)(取高5位)
  198. adtsByte[5] = uint8((adts.AACFrameLength&0x0007)<<5) + 0x1f
  199. // ADTSBufferFullness(11)(取低6位) + NumberOfRawDataBlockInFrame(2)
  200. adtsByte[6] = 0xfc
  201. return
  202. }