init
This commit is contained in:
687
frame/frame.go
Executable file
687
frame/frame.go
Executable file
@@ -0,0 +1,687 @@
|
||||
// Package frame implements access to FLAC audio frames.
|
||||
//
|
||||
// A brief introduction of the FLAC audio format [1] follows. FLAC encoders
|
||||
// divide the audio stream into blocks through a process called blocking [2]. A
|
||||
// block contains the unencoded audio samples from all channels during a short
|
||||
// period of time. Each audio block is divided into subblocks, one per channel.
|
||||
//
|
||||
// There is often a correlation between the left and right channel of stereo
|
||||
// audio. Using inter-channel decorrelation [3] it is possible to store only one
|
||||
// of the channels and the difference between the channels, or store the average
|
||||
// of the channels and their difference. An encoder decorrelates audio samples
|
||||
// as follows:
|
||||
//
|
||||
// mid = (left + right)/2 // average of the channels
|
||||
// side = left - right // difference between the channels
|
||||
//
|
||||
// The blocks are encoded using a variety of prediction methods [4][5] and
|
||||
// stored in frames. Blocks and subblocks contains unencoded audio samples while
|
||||
// frames and subframes contain encoded audio samples. A FLAC stream contains
|
||||
// one or more audio frames.
|
||||
//
|
||||
// [1]: https://www.xiph.org/flac/format.html#architecture
|
||||
// [2]: https://www.xiph.org/flac/format.html#blocking
|
||||
// [3]: https://www.xiph.org/flac/format.html#interchannel
|
||||
// [4]: https://www.xiph.org/flac/format.html#prediction
|
||||
// [5]: https://godoc.org/github.com/mewkiz/flac/frame#Pred
|
||||
package frame
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
"hash"
|
||||
"io"
|
||||
"log"
|
||||
|
||||
"github.com/mewkiz/flac/internal/bits"
|
||||
"github.com/mewkiz/flac/internal/hashutil"
|
||||
"github.com/mewkiz/flac/internal/hashutil/crc16"
|
||||
"github.com/mewkiz/flac/internal/hashutil/crc8"
|
||||
"github.com/mewkiz/flac/internal/utf8"
|
||||
)
|
||||
|
||||
// A Frame contains the header and subframes of an audio frame. It holds the
|
||||
// encoded samples from a block (a part) of the audio stream. Each subframe
|
||||
// holding the samples from one of its channel.
|
||||
//
|
||||
// ref: https://www.xiph.org/flac/format.html#frame
|
||||
type Frame struct {
|
||||
// Audio frame header.
|
||||
Header
|
||||
// One subframe per channel, containing encoded audio samples.
|
||||
Subframes []*Subframe
|
||||
// CRC-16 hash sum, calculated by read operations on hr.
|
||||
crc hashutil.Hash16
|
||||
// A bit reader, wrapping read operations to hr.
|
||||
br *bits.Reader
|
||||
// A CRC-16 hash reader, wrapping read operations to r.
|
||||
hr io.Reader
|
||||
// Underlying io.Reader.
|
||||
r io.Reader
|
||||
}
|
||||
|
||||
// New creates a new Frame for accessing the audio samples of r. It reads and
|
||||
// parses an audio frame header. It returns io.EOF to signal a graceful end of
|
||||
// FLAC stream.
|
||||
//
|
||||
// Call Frame.Parse to parse the audio samples of its subframes.
|
||||
func New(r io.Reader) (frame *Frame, err error) {
|
||||
// Create a new CRC-16 hash reader which adds the data from all read
|
||||
// operations to a running hash.
|
||||
crc := crc16.NewIBM()
|
||||
hr := io.TeeReader(r, crc)
|
||||
|
||||
// Parse frame header.
|
||||
frame = &Frame{crc: crc, hr: hr, r: r}
|
||||
err = frame.parseHeader()
|
||||
return frame, err
|
||||
}
|
||||
|
||||
// Parse reads and parses the header, and the audio samples from each subframe
|
||||
// of a frame. If the samples are inter-channel decorrelated between the
|
||||
// subframes, it correlates them. It returns io.EOF to signal a graceful end of
|
||||
// FLAC stream.
|
||||
//
|
||||
// ref: https://www.xiph.org/flac/format.html#interchannel
|
||||
func Parse(r io.Reader) (frame *Frame, err error) {
|
||||
// Parse frame header.
|
||||
frame, err = New(r)
|
||||
if err != nil {
|
||||
return frame, err
|
||||
}
|
||||
|
||||
// Parse subframes.
|
||||
err = frame.Parse()
|
||||
return frame, err
|
||||
}
|
||||
|
||||
// Parse reads and parses the audio samples from each subframe of the frame. If
|
||||
// the samples are inter-channel decorrelated between the subframes, it
|
||||
// correlates them.
|
||||
//
|
||||
// ref: https://www.xiph.org/flac/format.html#interchannel
|
||||
func (frame *Frame) Parse() error {
|
||||
// Parse subframes.
|
||||
frame.Subframes = make([]*Subframe, frame.Channels.Count())
|
||||
var err error
|
||||
for channel := range frame.Subframes {
|
||||
// The side channel requires an extra bit per sample when using
|
||||
// inter-channel decorrelation.
|
||||
bps := uint(frame.BitsPerSample)
|
||||
switch frame.Channels {
|
||||
case ChannelsSideRight:
|
||||
// channel 0 is the side channel.
|
||||
if channel == 0 {
|
||||
bps++
|
||||
}
|
||||
case ChannelsLeftSide, ChannelsMidSide:
|
||||
// channel 1 is the side channel.
|
||||
if channel == 1 {
|
||||
bps++
|
||||
}
|
||||
}
|
||||
|
||||
// Parse subframe.
|
||||
frame.Subframes[channel], err = frame.parseSubframe(frame.br, bps)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Inter-channel correlation of subframe samples.
|
||||
frame.Correlate()
|
||||
|
||||
// 2 bytes: CRC-16 checksum.
|
||||
var want uint16
|
||||
if err = binary.Read(frame.r, binary.BigEndian, &want); err != nil {
|
||||
return unexpected(err)
|
||||
}
|
||||
got := frame.crc.Sum16()
|
||||
if got != want {
|
||||
return fmt.Errorf("frame.Frame.Parse: CRC-16 checksum mismatch; expected 0x%04X, got 0x%04X", want, got)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Hash adds the decoded audio samples of the frame to a running MD5 hash. It
|
||||
// can be used in conjunction with StreamInfo.MD5sum to verify the integrity of
|
||||
// the decoded audio samples.
|
||||
//
|
||||
// Note: The audio samples of the frame must be decoded before calling Hash.
|
||||
func (frame *Frame) Hash(md5sum hash.Hash) {
|
||||
// Write decoded samples to a running MD5 hash.
|
||||
bps := frame.BitsPerSample
|
||||
var buf [3]byte
|
||||
for i := 0; i < int(frame.BlockSize); i++ {
|
||||
for _, subframe := range frame.Subframes {
|
||||
sample := subframe.Samples[i]
|
||||
switch {
|
||||
case 1 <= bps && bps <= 8:
|
||||
buf[0] = uint8(sample)
|
||||
md5sum.Write(buf[:1])
|
||||
case 9 <= bps && bps <= 16:
|
||||
buf[0] = uint8(sample)
|
||||
buf[1] = uint8(sample >> 8)
|
||||
md5sum.Write(buf[:2])
|
||||
case 17 <= bps && bps <= 24:
|
||||
buf[0] = uint8(sample)
|
||||
buf[1] = uint8(sample >> 8)
|
||||
buf[2] = uint8(sample >> 16)
|
||||
md5sum.Write(buf[:])
|
||||
default:
|
||||
log.Printf("frame.Frame.Hash: support for %d-bit sample size not yet implemented", bps)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// A Header contains the basic properties of an audio frame, such as its sample
|
||||
// rate and channel count. To facilitate random access decoding each frame
|
||||
// header starts with a sync-code. This allows the decoder to synchronize and
|
||||
// locate the start of a frame header.
|
||||
//
|
||||
// ref: https://www.xiph.org/flac/format.html#frame_header
|
||||
type Header struct {
|
||||
// Specifies if the block size is fixed or variable.
|
||||
HasFixedBlockSize bool
|
||||
// Block size in inter-channel samples, i.e. the number of audio samples in
|
||||
// each subframe.
|
||||
BlockSize uint16
|
||||
// Sample rate in Hz; a 0 value implies unknown, get sample rate from
|
||||
// StreamInfo.
|
||||
SampleRate uint32
|
||||
// Specifies the number of channels (subframes) that exist in the frame,
|
||||
// their order and possible inter-channel decorrelation.
|
||||
Channels Channels
|
||||
// Sample size in bits-per-sample; a 0 value implies unknown, get sample size
|
||||
// from StreamInfo.
|
||||
BitsPerSample uint8
|
||||
// Specifies the frame number if the block size is fixed, and the first
|
||||
// sample number in the frame otherwise. When using fixed block size, the
|
||||
// first sample number in the frame can be derived by multiplying the frame
|
||||
// number with the block size (in samples).
|
||||
Num uint64
|
||||
}
|
||||
|
||||
// Errors returned by Frame.parseHeader.
|
||||
var (
|
||||
ErrInvalidSync = errors.New("frame.Frame.parseHeader: invalid sync-code")
|
||||
)
|
||||
|
||||
// parseHeader reads and parses the header of an audio frame.
|
||||
func (frame *Frame) parseHeader() error {
|
||||
// Create a new CRC-8 hash reader which adds the data from all read
|
||||
// operations to a running hash.
|
||||
h := crc8.NewATM()
|
||||
hr := io.TeeReader(frame.hr, h)
|
||||
|
||||
// Create bit reader.
|
||||
br := bits.NewReader(hr)
|
||||
frame.br = br
|
||||
|
||||
// 14 bits: sync-code (11111111111110)
|
||||
x, err := br.Read(14)
|
||||
if err != nil {
|
||||
// This is the only place an audio frame may return io.EOF, which signals
|
||||
// a graceful end of a FLAC stream.
|
||||
return err
|
||||
}
|
||||
if x != 0x3FFE {
|
||||
return ErrInvalidSync
|
||||
}
|
||||
|
||||
// 1 bit: reserved.
|
||||
x, err = br.Read(1)
|
||||
if err != nil {
|
||||
return unexpected(err)
|
||||
}
|
||||
if x != 0 {
|
||||
return errors.New("frame.Frame.parseHeader: non-zero reserved value")
|
||||
}
|
||||
|
||||
// 1 bit: HasFixedBlockSize.
|
||||
x, err = br.Read(1)
|
||||
if err != nil {
|
||||
return unexpected(err)
|
||||
}
|
||||
if x == 0 {
|
||||
frame.HasFixedBlockSize = true
|
||||
}
|
||||
|
||||
// 4 bits: BlockSize. The block size parsing is simplified by deferring it to
|
||||
// the end of the header.
|
||||
blockSize, err := br.Read(4)
|
||||
if err != nil {
|
||||
return unexpected(err)
|
||||
}
|
||||
|
||||
// 4 bits: SampleRate. The sample rate parsing is simplified by deferring it
|
||||
// to the end of the header.
|
||||
sampleRate, err := br.Read(4)
|
||||
if err != nil {
|
||||
return unexpected(err)
|
||||
}
|
||||
|
||||
// Parse channels.
|
||||
if err := frame.parseChannels(br); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Parse bits per sample.
|
||||
if err := frame.parseBitsPerSample(br); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// 1 bit: reserved.
|
||||
x, err = br.Read(1)
|
||||
if err != nil {
|
||||
return unexpected(err)
|
||||
}
|
||||
if x != 0 {
|
||||
return errors.New("frame.Frame.parseHeader: non-zero reserved value")
|
||||
}
|
||||
|
||||
// if (fixed block size)
|
||||
// 1-6 bytes: UTF-8 encoded frame number.
|
||||
// else
|
||||
// 1-7 bytes: UTF-8 encoded sample number.
|
||||
frame.Num, err = utf8.Decode(hr)
|
||||
if err != nil {
|
||||
return unexpected(err)
|
||||
}
|
||||
|
||||
// Parse block size.
|
||||
if err := frame.parseBlockSize(br, blockSize); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Parse sample rate.
|
||||
if err := frame.parseSampleRate(br, sampleRate); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// 1 byte: CRC-8 checksum.
|
||||
var want uint8
|
||||
if err = binary.Read(frame.hr, binary.BigEndian, &want); err != nil {
|
||||
return unexpected(err)
|
||||
}
|
||||
got := h.Sum8()
|
||||
if want != got {
|
||||
return fmt.Errorf("frame.Frame.parseHeader: CRC-8 checksum mismatch; expected 0x%02X, got 0x%02X", want, got)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// parseBitsPerSample parses the bits per sample of the header.
|
||||
func (frame *Frame) parseBitsPerSample(br *bits.Reader) error {
|
||||
// 3 bits: BitsPerSample.
|
||||
x, err := br.Read(3)
|
||||
if err != nil {
|
||||
return unexpected(err)
|
||||
}
|
||||
|
||||
// The 3 bits are used to specify the sample size as follows:
|
||||
// 000: unknown sample size; get from StreamInfo.
|
||||
// 001: 8 bits-per-sample.
|
||||
// 010: 12 bits-per-sample.
|
||||
// 011: reserved.
|
||||
// 100: 16 bits-per-sample.
|
||||
// 101: 20 bits-per-sample.
|
||||
// 110: 24 bits-per-sample.
|
||||
// 111: reserved.
|
||||
switch x {
|
||||
case 0x0:
|
||||
// 000: unknown bits-per-sample; get from StreamInfo.
|
||||
case 0x1:
|
||||
// 001: 8 bits-per-sample.
|
||||
frame.BitsPerSample = 8
|
||||
case 0x2:
|
||||
// 010: 12 bits-per-sample.
|
||||
frame.BitsPerSample = 12
|
||||
case 0x4:
|
||||
// 100: 16 bits-per-sample.
|
||||
frame.BitsPerSample = 16
|
||||
case 0x5:
|
||||
// 101: 20 bits-per-sample.
|
||||
frame.BitsPerSample = 20
|
||||
case 0x6:
|
||||
// 110: 24 bits-per-sample.
|
||||
frame.BitsPerSample = 24
|
||||
default:
|
||||
// 011: reserved.
|
||||
// 111: reserved.
|
||||
return fmt.Errorf("frame.Frame.parseHeader: reserved sample size bit pattern (%03b)", x)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// parseChannels parses the channels of the header.
|
||||
func (frame *Frame) parseChannels(br *bits.Reader) error {
|
||||
// 4 bits: Channels.
|
||||
//
|
||||
// The 4 bits are used to specify the channels as follows:
|
||||
// 0000: (1 channel) mono.
|
||||
// 0001: (2 channels) left, right.
|
||||
// 0010: (3 channels) left, right, center.
|
||||
// 0011: (4 channels) left, right, left surround, right surround.
|
||||
// 0100: (5 channels) left, right, center, left surround, right surround.
|
||||
// 0101: (6 channels) left, right, center, LFE, left surround, right surround.
|
||||
// 0110: (7 channels) left, right, center, LFE, center surround, side left, side right.
|
||||
// 0111: (8 channels) left, right, center, LFE, left surround, right surround, side left, side right.
|
||||
// 1000: (2 channels) left, side; using inter-channel decorrelation.
|
||||
// 1001: (2 channels) side, right; using inter-channel decorrelation.
|
||||
// 1010: (2 channels) mid, side; using inter-channel decorrelation.
|
||||
// 1011: reserved.
|
||||
// 1100: reserved.
|
||||
// 1101: reserved.
|
||||
// 1111: reserved.
|
||||
x, err := br.Read(4)
|
||||
if err != nil {
|
||||
return unexpected(err)
|
||||
}
|
||||
if x >= 0xB {
|
||||
return fmt.Errorf("frame.Frame.parseHeader: reserved channels bit pattern (%04b)", x)
|
||||
}
|
||||
frame.Channels = Channels(x)
|
||||
return nil
|
||||
}
|
||||
|
||||
// parseBlockSize parses the block size of the header.
|
||||
func (frame *Frame) parseBlockSize(br *bits.Reader, blockSize uint64) error {
|
||||
// The 4 bits of n are used to specify the block size as follows:
|
||||
// 0000: reserved.
|
||||
// 0001: 192 samples.
|
||||
// 0010-0101: 576 * 2^(n-2) samples.
|
||||
// 0110: get 8 bit (block size)-1 from the end of the header.
|
||||
// 0111: get 16 bit (block size)-1 from the end of the header.
|
||||
// 1000-1111: 256 * 2^(n-8) samples.
|
||||
n := blockSize
|
||||
switch {
|
||||
case n == 0x0:
|
||||
// 0000: reserved.
|
||||
return errors.New("frame.Frame.parseHeader: reserved block size bit pattern (0000)")
|
||||
case n == 0x1:
|
||||
// 0001: 192 samples.
|
||||
frame.BlockSize = 192
|
||||
case n >= 0x2 && n <= 0x5:
|
||||
// 0010-0101: 576 * 2^(n-2) samples.
|
||||
frame.BlockSize = 576 * (1 << (n - 2))
|
||||
case n == 0x6:
|
||||
// 0110: get 8 bit (block size)-1 from the end of the header.
|
||||
x, err := br.Read(8)
|
||||
if err != nil {
|
||||
return unexpected(err)
|
||||
}
|
||||
frame.BlockSize = uint16(x + 1)
|
||||
case n == 0x7:
|
||||
// 0111: get 16 bit (block size)-1 from the end of the header.
|
||||
x, err := br.Read(16)
|
||||
if err != nil {
|
||||
return unexpected(err)
|
||||
}
|
||||
frame.BlockSize = uint16(x + 1)
|
||||
default:
|
||||
// 1000-1111: 256 * 2^(n-8) samples.
|
||||
frame.BlockSize = 256 * (1 << (n - 8))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// parseSampleRate parses the sample rate of the header.
|
||||
func (frame *Frame) parseSampleRate(br *bits.Reader, sampleRate uint64) error {
|
||||
// The 4 bits are used to specify the sample rate as follows:
|
||||
// 0000: unknown sample rate; get from StreamInfo.
|
||||
// 0001: 88.2 kHz.
|
||||
// 0010: 176.4 kHz.
|
||||
// 0011: 192 kHz.
|
||||
// 0100: 8 kHz.
|
||||
// 0101: 16 kHz.
|
||||
// 0110: 22.05 kHz.
|
||||
// 0111: 24 kHz.
|
||||
// 1000: 32 kHz.
|
||||
// 1001: 44.1 kHz.
|
||||
// 1010: 48 kHz.
|
||||
// 1011: 96 kHz.
|
||||
// 1100: get 8 bit sample rate (in kHz) from the end of the header.
|
||||
// 1101: get 16 bit sample rate (in Hz) from the end of the header.
|
||||
// 1110: get 16 bit sample rate (in daHz) from the end of the header.
|
||||
// 1111: invalid.
|
||||
switch sampleRate {
|
||||
case 0x0:
|
||||
// 0000: unknown sample rate; get from StreamInfo.
|
||||
case 0x1:
|
||||
// 0001: 88.2 kHz.
|
||||
frame.SampleRate = 88200
|
||||
case 0x2:
|
||||
// 0010: 176.4 kHz.
|
||||
frame.SampleRate = 176400
|
||||
// TODO(u): Remove log message when the test cases have been extended.
|
||||
log.Printf("frame.Frame.parseHeader: The flac library test cases do not yet include any audio files with sample rate %d. If possible please consider contributing this audio sample to improve the reliability of the test cases.", frame.SampleRate)
|
||||
case 0x3:
|
||||
// 0011: 192 kHz.
|
||||
frame.SampleRate = 192000
|
||||
case 0x4:
|
||||
// 0100: 8 kHz.
|
||||
frame.SampleRate = 8000
|
||||
case 0x5:
|
||||
// 0101: 16 kHz.
|
||||
frame.SampleRate = 16000
|
||||
case 0x6:
|
||||
// 0110: 22.05 kHz.
|
||||
frame.SampleRate = 22050
|
||||
case 0x7:
|
||||
// 0111: 24 kHz.
|
||||
frame.SampleRate = 24000
|
||||
// TODO(u): Remove log message when the test cases have been extended.
|
||||
log.Printf("frame.Frame.parseHeader: The flac library test cases do not yet include any audio files with sample rate %d. If possible please consider contributing this audio sample to improve the reliability of the test cases.", frame.SampleRate)
|
||||
case 0x8:
|
||||
// 1000: 32 kHz.
|
||||
frame.SampleRate = 32000
|
||||
case 0x9:
|
||||
// 1001: 44.1 kHz.
|
||||
frame.SampleRate = 44100
|
||||
case 0xA:
|
||||
// 1010: 48 kHz.
|
||||
frame.SampleRate = 48000
|
||||
case 0xB:
|
||||
// 1011: 96 kHz.
|
||||
frame.SampleRate = 96000
|
||||
case 0xC:
|
||||
// 1100: get 8 bit sample rate (in kHz) from the end of the header.
|
||||
x, err := br.Read(8)
|
||||
if err != nil {
|
||||
return unexpected(err)
|
||||
}
|
||||
frame.SampleRate = uint32(x * 1000)
|
||||
case 0xD:
|
||||
// 1101: get 16 bit sample rate (in Hz) from the end of the header.
|
||||
x, err := br.Read(16)
|
||||
if err != nil {
|
||||
return unexpected(err)
|
||||
}
|
||||
frame.SampleRate = uint32(x)
|
||||
case 0xE:
|
||||
// 1110: get 16 bit sample rate (in daHz) from the end of the header.
|
||||
x, err := br.Read(16)
|
||||
if err != nil {
|
||||
return unexpected(err)
|
||||
}
|
||||
frame.SampleRate = uint32(x * 10)
|
||||
default:
|
||||
// 1111: invalid.
|
||||
return errors.New("frame.Frame.parseHeader: invalid sample rate bit pattern (1111)")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Channels specifies the number of channels (subframes) that exist in a frame,
|
||||
// their order and possible inter-channel decorrelation.
|
||||
type Channels uint8
|
||||
|
||||
// Channel assignments. The following abbreviations are used:
|
||||
//
|
||||
// C: center (directly in front)
|
||||
// R: right (standard stereo)
|
||||
// Sr: side right (directly to the right)
|
||||
// Rs: right surround (back right)
|
||||
// Cs: center surround (rear center)
|
||||
// Ls: left surround (back left)
|
||||
// Sl: side left (directly to the left)
|
||||
// L: left (standard stereo)
|
||||
// Lfe: low-frequency effect (placed according to room acoustics)
|
||||
//
|
||||
// The first 6 channel constants follow the SMPTE/ITU-R channel order:
|
||||
//
|
||||
// L R C Lfe Ls Rs
|
||||
const (
|
||||
ChannelsMono Channels = iota // 1 channel: mono.
|
||||
ChannelsLR // 2 channels: left, right.
|
||||
ChannelsLRC // 3 channels: left, right, center.
|
||||
ChannelsLRLsRs // 4 channels: left, right, left surround, right surround.
|
||||
ChannelsLRCLsRs // 5 channels: left, right, center, left surround, right surround.
|
||||
ChannelsLRCLfeLsRs // 6 channels: left, right, center, LFE, left surround, right surround.
|
||||
ChannelsLRCLfeCsSlSr // 7 channels: left, right, center, LFE, center surround, side left, side right.
|
||||
ChannelsLRCLfeLsRsSlSr // 8 channels: left, right, center, LFE, left surround, right surround, side left, side right.
|
||||
ChannelsLeftSide // 2 channels: left, side; using inter-channel decorrelation.
|
||||
ChannelsSideRight // 2 channels: side, right; using inter-channel decorrelation.
|
||||
ChannelsMidSide // 2 channels: mid, side; using inter-channel decorrelation.
|
||||
)
|
||||
|
||||
// nChannels specifies the number of channels used by each channel assignment.
|
||||
var nChannels = [...]int{
|
||||
ChannelsMono: 1,
|
||||
ChannelsLR: 2,
|
||||
ChannelsLRC: 3,
|
||||
ChannelsLRLsRs: 4,
|
||||
ChannelsLRCLsRs: 5,
|
||||
ChannelsLRCLfeLsRs: 6,
|
||||
ChannelsLRCLfeCsSlSr: 7,
|
||||
ChannelsLRCLfeLsRsSlSr: 8,
|
||||
ChannelsLeftSide: 2,
|
||||
ChannelsSideRight: 2,
|
||||
ChannelsMidSide: 2,
|
||||
}
|
||||
|
||||
// Count returns the number of channels (subframes) used by the provided channel
|
||||
// assignment.
|
||||
func (channels Channels) Count() int {
|
||||
return nChannels[channels]
|
||||
}
|
||||
|
||||
// Correlate reverts any inter-channel decorrelation between the samples of the
|
||||
// subframes.
|
||||
//
|
||||
// An encoder decorrelates audio samples as follows:
|
||||
//
|
||||
// mid = (left + right)/2
|
||||
// side = left - right
|
||||
func (frame *Frame) Correlate() {
|
||||
switch frame.Channels {
|
||||
case ChannelsLeftSide:
|
||||
// 2 channels: left, side; using inter-channel decorrelation.
|
||||
left := frame.Subframes[0].Samples
|
||||
side := frame.Subframes[1].Samples
|
||||
for i := range side {
|
||||
// right = left - side
|
||||
side[i] = left[i] - side[i]
|
||||
}
|
||||
case ChannelsSideRight:
|
||||
// 2 channels: side, right; using inter-channel decorrelation.
|
||||
side := frame.Subframes[0].Samples
|
||||
right := frame.Subframes[1].Samples
|
||||
for i := range side {
|
||||
// left = right + side
|
||||
side[i] = right[i] + side[i]
|
||||
}
|
||||
case ChannelsMidSide:
|
||||
// 2 channels: mid, side; using inter-channel decorrelation.
|
||||
mid := frame.Subframes[0].Samples
|
||||
side := frame.Subframes[1].Samples
|
||||
for i := range side {
|
||||
// left = (2*mid + side)/2
|
||||
// right = (2*mid - side)/2
|
||||
m := mid[i]
|
||||
s := side[i]
|
||||
m *= 2
|
||||
// Notice that the integer division in mid = (left + right)/2 discards
|
||||
// the least significant bit. It can be reconstructed however, since a
|
||||
// sum A+B and a difference A-B has the same least significant bit.
|
||||
//
|
||||
// ref: Data Compression: The Complete Reference (ch. 7, Decorrelation)
|
||||
m |= s & 1
|
||||
mid[i] = (m + s) / 2
|
||||
side[i] = (m - s) / 2
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Decorrelate performs inter-channel decorrelation between the samples of the
|
||||
// subframes.
|
||||
//
|
||||
// An encoder decorrelates audio samples as follows:
|
||||
//
|
||||
// mid = (left + right)/2
|
||||
// side = left - right
|
||||
func (frame *Frame) Decorrelate() {
|
||||
switch frame.Channels {
|
||||
case ChannelsLeftSide:
|
||||
// 2 channels: left, side; using inter-channel decorrelation.
|
||||
left := frame.Subframes[0].Samples // already left; no change after inter-channel decorrelation.
|
||||
right := frame.Subframes[1].Samples // set to side after inter-channel decorrelation.
|
||||
for i := range left {
|
||||
l := left[i]
|
||||
r := right[i]
|
||||
// inter-channel decorrelation:
|
||||
// side = left - right
|
||||
side := l - r
|
||||
right[i] = side
|
||||
}
|
||||
case ChannelsSideRight:
|
||||
// 2 channels: side, right; using inter-channel decorrelation.
|
||||
left := frame.Subframes[0].Samples // set to side after inter-channel decorrelation.
|
||||
right := frame.Subframes[1].Samples // already right; no change after inter-channel decorrelation.
|
||||
for i := range left {
|
||||
l := left[i]
|
||||
r := right[i]
|
||||
// inter-channel decorrelation:
|
||||
// side = left - right
|
||||
side := l - r
|
||||
left[i] = side
|
||||
}
|
||||
case ChannelsMidSide:
|
||||
// 2 channels: mid, side; using inter-channel decorrelation.
|
||||
left := frame.Subframes[0].Samples // set to mid after inter-channel decorrelation.
|
||||
right := frame.Subframes[1].Samples // set to side after inter-channel decorrelation.
|
||||
for i := range left {
|
||||
// inter-channel decorrelation:
|
||||
// mid = (left + right)/2
|
||||
// side = left - right
|
||||
l := left[i]
|
||||
r := right[i]
|
||||
mid := int32((int64(l) + int64(r)) >> 1) // NOTE: using `(left + right) >> 1`, not the same as `(left + right) / 2`.
|
||||
side := l - r
|
||||
left[i] = mid
|
||||
right[i] = side
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// SampleNumber returns the first sample number contained within the frame.
|
||||
func (frame *Frame) SampleNumber() uint64 {
|
||||
if frame.HasFixedBlockSize {
|
||||
return frame.Num * uint64(frame.BlockSize)
|
||||
}
|
||||
return frame.Num
|
||||
}
|
||||
|
||||
// unexpected returns io.ErrUnexpectedEOF if err is io.EOF, and returns err
|
||||
// otherwise.
|
||||
func unexpected(err error) error {
|
||||
if err == io.EOF {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
return err
|
||||
}
|
193
frame/frame_test.go
Executable file
193
frame/frame_test.go
Executable file
@@ -0,0 +1,193 @@
|
||||
package frame_test
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/md5"
|
||||
"io"
|
||||
"testing"
|
||||
|
||||
"github.com/mewkiz/flac"
|
||||
)
|
||||
|
||||
var golden = []struct {
|
||||
path string
|
||||
}{
|
||||
{path: "../testdata/love.flac"},
|
||||
{path: "../testdata/19875.flac"},
|
||||
{path: "../testdata/44127.flac"},
|
||||
{path: "../testdata/59996.flac"},
|
||||
{path: "../testdata/80574.flac"},
|
||||
{path: "../testdata/172960.flac"},
|
||||
{path: "../testdata/189983.flac"},
|
||||
{path: "../testdata/191885.flac"},
|
||||
{path: "../testdata/212768.flac"},
|
||||
{path: "../testdata/220014.flac"},
|
||||
{path: "../testdata/243749.flac"},
|
||||
{path: "../testdata/256529.flac"},
|
||||
{path: "../testdata/257344.flac"},
|
||||
|
||||
// IETF test cases.
|
||||
{path: "../testdata/flac-test-files/subset/01 - blocksize 4096.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/02 - blocksize 4608.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/03 - blocksize 16.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/04 - blocksize 192.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/05 - blocksize 254.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/06 - blocksize 512.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/07 - blocksize 725.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/08 - blocksize 1000.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/09 - blocksize 1937.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/10 - blocksize 2304.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/11 - partition order 8.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/12 - qlp precision 15 bit.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/13 - qlp precision 2 bit.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/14 - wasted bits.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/15 - only verbatim subframes.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/16 - partition order 8 containing escaped partitions.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/17 - all fixed orders.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/18 - precision search.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/19 - samplerate 35467Hz.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/20 - samplerate 39kHz.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/21 - samplerate 22050Hz.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/22 - 12 bit per sample.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/23 - 8 bit per sample.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/24 - variable blocksize file created with flake revision 264.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/25 - variable blocksize file created with flake revision 264, modified to create smaller blocks.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/26 - variable blocksize file created with CUETools.Flake 2.1.6.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/27 - old format variable blocksize file created with Flake 0.11.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/28 - high resolution audio, default settings.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/29 - high resolution audio, blocksize 16384.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/30 - high resolution audio, blocksize 13456.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/31 - high resolution audio, using only 32nd order predictors.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/32 - high resolution audio, partition order 8 containing escaped partitions.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/33 - samplerate 192kHz.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/34 - samplerate 192kHz, using only 32nd order predictors.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/35 - samplerate 134560Hz.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/36 - samplerate 384kHz.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/37 - 20 bit per sample.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/38 - 3 channels (3.0).flac"},
|
||||
{path: "../testdata/flac-test-files/subset/39 - 4 channels (4.0).flac"},
|
||||
{path: "../testdata/flac-test-files/subset/40 - 5 channels (5.0).flac"},
|
||||
{path: "../testdata/flac-test-files/subset/41 - 6 channels (5.1).flac"},
|
||||
{path: "../testdata/flac-test-files/subset/42 - 7 channels (6.1).flac"},
|
||||
{path: "../testdata/flac-test-files/subset/43 - 8 channels (7.1).flac"},
|
||||
{path: "../testdata/flac-test-files/subset/44 - 8-channel surround, 192kHz, 24 bit, using only 32nd order predictors.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/45 - no total number of samples set.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/46 - no min-max framesize set.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/47 - only STREAMINFO.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/48 - Extremely large SEEKTABLE.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/49 - Extremely large PADDING.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/50 - Extremely large PICTURE.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/51 - Extremely large VORBISCOMMENT.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/52 - Extremely large APPLICATION.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/53 - CUESHEET with very many indexes.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/54 - 1000x repeating VORBISCOMMENT.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/55 - file 48-53 combined.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/56 - JPG PICTURE.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/57 - PNG PICTURE.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/58 - GIF PICTURE.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/59 - AVIF PICTURE.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/60 - mono audio.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/61 - predictor overflow check, 16-bit.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/62 - predictor overflow check, 20-bit.flac"},
|
||||
// TODO: fix decoding of "subset/63 - ...flac": MD5 checksum mismatch for decoded audio samples; expected e4e4a6b3a672a849a3e2157c11ad23c6, got a0343afaaaa6229266d78ccf3175eb8d
|
||||
{path: "../testdata/flac-test-files/subset/63 - predictor overflow check, 24-bit.flac"},
|
||||
{path: "../testdata/flac-test-files/subset/64 - rice partitions with escape code zero.flac"},
|
||||
}
|
||||
|
||||
func TestFrameHash(t *testing.T) {
|
||||
var zeroHash [md5.Size]byte
|
||||
for _, g := range golden {
|
||||
t.Run(g.path, func(t *testing.T) {
|
||||
stream, err := flac.Open(g.path)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer stream.Close()
|
||||
|
||||
// Skip frame hash test if no MD5 hash was set in StreamInfo.
|
||||
want := stream.Info.MD5sum[:]
|
||||
if bytes.Equal(want, zeroHash[:]) {
|
||||
t.Skipf("path=%q, skipping frame hash test as no MD5 hash was set in StreamInfo", g.path)
|
||||
return
|
||||
}
|
||||
|
||||
md5sum := md5.New()
|
||||
for frameNum := 0; ; frameNum++ {
|
||||
frame, err := stream.ParseNext()
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
t.Errorf("path=%q, frameNum=%d: error while parsing frame; %v", g.path, frameNum, err)
|
||||
continue
|
||||
}
|
||||
frame.Hash(md5sum)
|
||||
}
|
||||
got := md5sum.Sum(nil)
|
||||
// Verify the decoded audio samples by comparing the MD5 checksum that is
|
||||
// stored in StreamInfo with the computed one.
|
||||
if !bytes.Equal(got, want) {
|
||||
t.Errorf("path=%q: MD5 checksum mismatch for decoded audio samples; expected %32x, got %32x", g.path, want, got)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkFrameParse(b *testing.B) {
|
||||
// The file 151185.flac is a 119.5 MB public domain FLAC file used to
|
||||
// benchmark the flac library. Because of its size, it has not been included
|
||||
// in the repository, but is available for download at
|
||||
//
|
||||
// http://freesound.org/people/jarfil/sounds/151185/
|
||||
for i := 0; i < b.N; i++ {
|
||||
stream, err := flac.Open("../testdata/benchmark/151185.flac")
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
for {
|
||||
_, err := stream.ParseNext()
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
stream.Close()
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
stream.Close()
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkFrameHash(b *testing.B) {
|
||||
// The file 151185.flac is a 119.5 MB public domain FLAC file used to
|
||||
// benchmark the flac library. Because of its size, it has not been included
|
||||
// in the repository, but is available for download at
|
||||
//
|
||||
// http://freesound.org/people/jarfil/sounds/151185/
|
||||
for i := 0; i < b.N; i++ {
|
||||
stream, err := flac.Open("../testdata/benchmark/151185.flac")
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
md5sum := md5.New()
|
||||
for {
|
||||
frame, err := stream.ParseNext()
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
stream.Close()
|
||||
b.Fatal(err)
|
||||
}
|
||||
frame.Hash(md5sum)
|
||||
}
|
||||
stream.Close()
|
||||
want := stream.Info.MD5sum[:]
|
||||
got := md5sum.Sum(nil)
|
||||
// Verify the decoded audio samples by comparing the MD5 checksum that is
|
||||
// stored in StreamInfo with the computed one.
|
||||
if !bytes.Equal(got, want) {
|
||||
b.Fatalf("MD5 checksum mismatch for decoded audio samples; expected %32x, got %32x", want, got)
|
||||
}
|
||||
}
|
||||
}
|
534
frame/subframe.go
Executable file
534
frame/subframe.go
Executable file
@@ -0,0 +1,534 @@
|
||||
package frame
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
|
||||
"github.com/mewkiz/flac/internal/bits"
|
||||
)
|
||||
|
||||
// A Subframe contains the encoded audio samples from one channel of an audio
|
||||
// block (a part of the audio stream).
|
||||
//
|
||||
// ref: https://www.xiph.org/flac/format.html#subframe
|
||||
type Subframe struct {
|
||||
// Subframe header.
|
||||
SubHeader
|
||||
// Unencoded audio samples. Samples is initially nil, and gets populated by a
|
||||
// call to Frame.Parse.
|
||||
//
|
||||
// Samples is used by decodeFixed and decodeFIR to temporarily store
|
||||
// residuals. Before returning they call decodeLPC which decodes the audio
|
||||
// samples.
|
||||
Samples []int32
|
||||
// Number of audio samples in the subframe.
|
||||
NSamples int
|
||||
}
|
||||
|
||||
// parseSubframe reads and parses the header, and the audio samples of a
|
||||
// subframe.
|
||||
func (frame *Frame) parseSubframe(br *bits.Reader, bps uint) (subframe *Subframe, err error) {
|
||||
// Parse subframe header.
|
||||
subframe = new(Subframe)
|
||||
if err = subframe.parseHeader(br); err != nil {
|
||||
return subframe, err
|
||||
}
|
||||
// Adjust bps of subframe for wasted bits-per-sample.
|
||||
bps -= subframe.Wasted
|
||||
|
||||
// Decode subframe audio samples.
|
||||
subframe.NSamples = int(frame.BlockSize)
|
||||
subframe.Samples = make([]int32, 0, subframe.NSamples)
|
||||
switch subframe.Pred {
|
||||
case PredConstant:
|
||||
err = subframe.decodeConstant(br, bps)
|
||||
case PredVerbatim:
|
||||
err = subframe.decodeVerbatim(br, bps)
|
||||
case PredFixed:
|
||||
err = subframe.decodeFixed(br, bps)
|
||||
case PredFIR:
|
||||
err = subframe.decodeFIR(br, bps)
|
||||
}
|
||||
|
||||
// Left shift to account for wasted bits-per-sample.
|
||||
for i, sample := range subframe.Samples {
|
||||
subframe.Samples[i] = sample << subframe.Wasted
|
||||
}
|
||||
return subframe, err
|
||||
}
|
||||
|
||||
// A SubHeader specifies the prediction method and order of a subframe.
|
||||
//
|
||||
// ref: https://www.xiph.org/flac/format.html#subframe_header
|
||||
type SubHeader struct {
|
||||
// Specifies the prediction method used to encode the audio sample of the
|
||||
// subframe.
|
||||
Pred Pred
|
||||
// Prediction order used by fixed and FIR linear prediction decoding.
|
||||
Order int
|
||||
// Wasted bits-per-sample.
|
||||
Wasted uint
|
||||
// Residual coding method used by fixed and FIR linear prediction decoding.
|
||||
ResidualCodingMethod ResidualCodingMethod
|
||||
// Coefficients' precision in bits used by FIR linear prediction decoding.
|
||||
CoeffPrec uint
|
||||
// Predictor coefficient shift needed in bits used by FIR linear prediction
|
||||
// decoding.
|
||||
CoeffShift int32
|
||||
// Predictor coefficients used by FIR linear prediction decoding.
|
||||
Coeffs []int32
|
||||
// Rice-coding subframe fields used by residual coding methods rice1 and
|
||||
// rice2; nil if unused.
|
||||
RiceSubframe *RiceSubframe
|
||||
}
|
||||
|
||||
// RiceSubframe holds rice-coding subframe fields used by residual coding
|
||||
// methods rice1 and rice2.
|
||||
type RiceSubframe struct {
|
||||
// Partition order used by fixed and FIR linear prediction decoding
|
||||
// (for residual coding methods, rice1 and rice2).
|
||||
PartOrder int // TODO: remove PartOrder and infer from int(math.Log2(float64(len(Partitions))))?
|
||||
// Rice partitions.
|
||||
Partitions []RicePartition
|
||||
}
|
||||
|
||||
// RicePartition is a partition containing a subset of the residuals of a
|
||||
// subframe.
|
||||
type RicePartition struct {
|
||||
// Rice parameter.
|
||||
Param uint
|
||||
// Residual sample size in bits-per-sample used by escaped partitions.
|
||||
EscapedBitsPerSample uint
|
||||
}
|
||||
|
||||
// parseHeader reads and parses the header of a subframe.
|
||||
func (subframe *Subframe) parseHeader(br *bits.Reader) error {
|
||||
// 1 bit: zero-padding.
|
||||
x, err := br.Read(1)
|
||||
if err != nil {
|
||||
return unexpected(err)
|
||||
}
|
||||
if x != 0 {
|
||||
return errors.New("frame.Subframe.parseHeader: non-zero padding")
|
||||
}
|
||||
|
||||
// 6 bits: Pred.
|
||||
x, err = br.Read(6)
|
||||
if err != nil {
|
||||
return unexpected(err)
|
||||
}
|
||||
// The 6 bits are used to specify the prediction method and order as follows:
|
||||
// 000000: Constant prediction method.
|
||||
// 000001: Verbatim prediction method.
|
||||
// 00001x: reserved.
|
||||
// 0001xx: reserved.
|
||||
// 001xxx:
|
||||
// if (xxx <= 4)
|
||||
// Fixed prediction method; xxx=order
|
||||
// else
|
||||
// reserved.
|
||||
// 01xxxx: reserved.
|
||||
// 1xxxxx: FIR prediction method; xxxxx=order-1
|
||||
switch {
|
||||
case x < 1:
|
||||
// 000000: Constant prediction method.
|
||||
subframe.Pred = PredConstant
|
||||
case x < 2:
|
||||
// 000001: Verbatim prediction method.
|
||||
subframe.Pred = PredVerbatim
|
||||
case x < 8:
|
||||
// 00001x: reserved.
|
||||
// 0001xx: reserved.
|
||||
return fmt.Errorf("frame.Subframe.parseHeader: reserved prediction method bit pattern (%06b)", x)
|
||||
case x < 16:
|
||||
// 001xxx:
|
||||
// if (xxx <= 4)
|
||||
// Fixed prediction method; xxx=order
|
||||
// else
|
||||
// reserved.
|
||||
order := int(x & 0x07)
|
||||
if order > 4 {
|
||||
return fmt.Errorf("frame.Subframe.parseHeader: reserved prediction method bit pattern (%06b)", x)
|
||||
}
|
||||
subframe.Pred = PredFixed
|
||||
subframe.Order = order
|
||||
case x < 32:
|
||||
// 01xxxx: reserved.
|
||||
return fmt.Errorf("frame.Subframe.parseHeader: reserved prediction method bit pattern (%06b)", x)
|
||||
default:
|
||||
// 1xxxxx: FIR prediction method; xxxxx=order-1
|
||||
subframe.Pred = PredFIR
|
||||
subframe.Order = int(x&0x1F) + 1
|
||||
}
|
||||
|
||||
// 1 bit: hasWastedBits.
|
||||
x, err = br.Read(1)
|
||||
if err != nil {
|
||||
return unexpected(err)
|
||||
}
|
||||
if x != 0 {
|
||||
// k wasted bits-per-sample in source subblock, k-1 follows, unary coded;
|
||||
// e.g. k=3 => 001 follows, k=7 => 0000001 follows.
|
||||
x, err = br.ReadUnary()
|
||||
if err != nil {
|
||||
return unexpected(err)
|
||||
}
|
||||
subframe.Wasted = uint(x) + 1
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Pred specifies the prediction method used to encode the audio samples of a
|
||||
// subframe.
|
||||
type Pred uint8
|
||||
|
||||
// Prediction methods.
|
||||
const (
|
||||
// PredConstant specifies that the subframe contains a constant sound. The
|
||||
// audio samples are encoded using run-length encoding. Since every audio
|
||||
// sample has the same constant value, a single unencoded audio sample is
|
||||
// stored in practice. It is replicated a number of times, as specified by
|
||||
// BlockSize in the frame header.
|
||||
PredConstant Pred = iota
|
||||
// PredVerbatim specifies that the subframe contains unencoded audio samples.
|
||||
// Random sound is often stored verbatim, since no prediction method can
|
||||
// compress it sufficiently.
|
||||
PredVerbatim
|
||||
// PredFixed specifies that the subframe contains linear prediction coded
|
||||
// audio samples. The coefficients of the prediction polynomial are selected
|
||||
// from a fixed set, and can represent 0th through fourth-order polynomials.
|
||||
// The prediction order (0 through 4) is stored within the subframe along
|
||||
// with the same number of unencoded warm-up samples, which are used to kick
|
||||
// start the prediction polynomial. The remainder of the subframe stores
|
||||
// encoded residuals (signal errors) which specify the difference between the
|
||||
// predicted and the original audio samples.
|
||||
PredFixed
|
||||
// PredFIR specifies that the subframe contains linear prediction coded audio
|
||||
// samples. The coefficients of the prediction polynomial are stored in the
|
||||
// subframe, and can represent 0th through 32nd-order polynomials. The
|
||||
// prediction order (0 through 32) is stored within the subframe along with
|
||||
// the same number of unencoded warm-up samples, which are used to kick start
|
||||
// the prediction polynomial. The remainder of the subframe stores encoded
|
||||
// residuals (signal errors) which specify the difference between the
|
||||
// predicted and the original audio samples.
|
||||
PredFIR
|
||||
)
|
||||
|
||||
// signExtend interprets x as a signed n-bit integer value and sign extends it
|
||||
// to 32 bits.
|
||||
func signExtend(x uint64, n uint) int32 {
|
||||
// x is signed if its most significant bit is set.
|
||||
if x&(1<<(n-1)) != 0 {
|
||||
// Sign extend x.
|
||||
return int32(x | ^uint64(0)<<n)
|
||||
}
|
||||
return int32(x)
|
||||
}
|
||||
|
||||
// decodeConstant reads an unencoded audio sample of the subframe. Each sample
|
||||
// of the subframe has this constant value. The constant encoding can be thought
|
||||
// of as run-length encoding.
|
||||
//
|
||||
// ref: https://www.xiph.org/flac/format.html#subframe_constant
|
||||
func (subframe *Subframe) decodeConstant(br *bits.Reader, bps uint) error {
|
||||
// (bits-per-sample) bits: Unencoded constant value of the subblock.
|
||||
x, err := br.Read(bps)
|
||||
if err != nil {
|
||||
return unexpected(err)
|
||||
}
|
||||
|
||||
// Each sample of the subframe has the same constant value.
|
||||
sample := signExtend(x, bps)
|
||||
for i := 0; i < subframe.NSamples; i++ {
|
||||
subframe.Samples = append(subframe.Samples, sample)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// decodeVerbatim reads the unencoded audio samples of the subframe.
|
||||
//
|
||||
// ref: https://www.xiph.org/flac/format.html#subframe_verbatim
|
||||
func (subframe *Subframe) decodeVerbatim(br *bits.Reader, bps uint) error {
|
||||
// Parse the unencoded audio samples of the subframe.
|
||||
for i := 0; i < subframe.NSamples; i++ {
|
||||
// (bits-per-sample) bits: Unencoded constant value of the subblock.
|
||||
x, err := br.Read(bps)
|
||||
if err != nil {
|
||||
return unexpected(err)
|
||||
}
|
||||
sample := signExtend(x, bps)
|
||||
subframe.Samples = append(subframe.Samples, sample)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// FixedCoeffs maps from prediction order to the LPC coefficients used in fixed
|
||||
// encoding.
|
||||
//
|
||||
// x_0[n] = 0
|
||||
// x_1[n] = x[n-1]
|
||||
// x_2[n] = 2*x[n-1] - x[n-2]
|
||||
// x_3[n] = 3*x[n-1] - 3*x[n-2] + x[n-3]
|
||||
// x_4[n] = 4*x[n-1] - 6*x[n-2] + 4*x[n-3] - x[n-4]
|
||||
var FixedCoeffs = [...][]int32{
|
||||
// ref: Section 2.2 of http://www.hpl.hp.com/techreports/1999/HPL-1999-144.pdf
|
||||
1: {1},
|
||||
2: {2, -1},
|
||||
3: {3, -3, 1},
|
||||
// ref: Data Compression: The Complete Reference (7.10.1)
|
||||
4: {4, -6, 4, -1},
|
||||
}
|
||||
|
||||
// decodeFixed decodes the linear prediction coded samples of the subframe,
|
||||
// using a fixed set of predefined polynomial coefficients.
|
||||
//
|
||||
// ref: https://www.xiph.org/flac/format.html#subframe_fixed
|
||||
func (subframe *Subframe) decodeFixed(br *bits.Reader, bps uint) error {
|
||||
// Parse unencoded warm-up samples.
|
||||
for i := 0; i < subframe.Order; i++ {
|
||||
// (bits-per-sample) bits: Unencoded warm-up sample.
|
||||
x, err := br.Read(bps)
|
||||
if err != nil {
|
||||
return unexpected(err)
|
||||
}
|
||||
sample := signExtend(x, bps)
|
||||
subframe.Samples = append(subframe.Samples, sample)
|
||||
}
|
||||
|
||||
// Decode subframe residuals.
|
||||
if err := subframe.decodeResiduals(br); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Predict the audio samples of the subframe using a polynomial with
|
||||
// predefined coefficients of a given order. Correct signal errors using the
|
||||
// decoded residuals.
|
||||
const shift = 0
|
||||
return subframe.decodeLPC(FixedCoeffs[subframe.Order], shift)
|
||||
}
|
||||
|
||||
// decodeFIR decodes the linear prediction coded samples of the subframe, using
|
||||
// polynomial coefficients stored in the stream.
|
||||
//
|
||||
// ref: https://www.xiph.org/flac/format.html#subframe_lpc
|
||||
func (subframe *Subframe) decodeFIR(br *bits.Reader, bps uint) error {
|
||||
// Parse unencoded warm-up samples.
|
||||
for i := 0; i < subframe.Order; i++ {
|
||||
// (bits-per-sample) bits: Unencoded warm-up sample.
|
||||
x, err := br.Read(bps)
|
||||
if err != nil {
|
||||
return unexpected(err)
|
||||
}
|
||||
sample := signExtend(x, bps)
|
||||
subframe.Samples = append(subframe.Samples, sample)
|
||||
}
|
||||
|
||||
// 4 bits: (coefficients' precision in bits) - 1.
|
||||
x, err := br.Read(4)
|
||||
if err != nil {
|
||||
return unexpected(err)
|
||||
}
|
||||
if x == 0xF {
|
||||
return errors.New("frame.Subframe.decodeFIR: invalid coefficient precision bit pattern (1111)")
|
||||
}
|
||||
prec := uint(x) + 1
|
||||
subframe.CoeffPrec = prec
|
||||
|
||||
// 5 bits: predictor coefficient shift needed in bits.
|
||||
x, err = br.Read(5)
|
||||
if err != nil {
|
||||
return unexpected(err)
|
||||
}
|
||||
shift := signExtend(x, 5)
|
||||
subframe.CoeffShift = shift
|
||||
|
||||
// Parse coefficients.
|
||||
coeffs := make([]int32, subframe.Order)
|
||||
for i := range coeffs {
|
||||
// (prec) bits: Predictor coefficient.
|
||||
x, err = br.Read(prec)
|
||||
if err != nil {
|
||||
return unexpected(err)
|
||||
}
|
||||
coeffs[i] = signExtend(x, prec)
|
||||
}
|
||||
subframe.Coeffs = coeffs
|
||||
|
||||
// Decode subframe residuals.
|
||||
if err := subframe.decodeResiduals(br); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Predict the audio samples of the subframe using a polynomial with
|
||||
// predefined coefficients of a given order. Correct signal errors using the
|
||||
// decoded residuals.
|
||||
return subframe.decodeLPC(coeffs, shift)
|
||||
}
|
||||
|
||||
// ResidualCodingMethod specifies a residual coding method.
|
||||
type ResidualCodingMethod uint8
|
||||
|
||||
// Residual coding methods.
|
||||
const (
|
||||
// Rice coding with a 4-bit Rice parameter (rice1).
|
||||
ResidualCodingMethodRice1 ResidualCodingMethod = 0
|
||||
// Rice coding with a 5-bit Rice parameter (rice2).
|
||||
ResidualCodingMethodRice2 ResidualCodingMethod = 1
|
||||
)
|
||||
|
||||
// decodeResiduals decodes the encoded residuals (prediction method error
|
||||
// signals) of the subframe.
|
||||
//
|
||||
// ref: https://www.xiph.org/flac/format.html#residual
|
||||
func (subframe *Subframe) decodeResiduals(br *bits.Reader) error {
|
||||
// 2 bits: Residual coding method.
|
||||
x, err := br.Read(2)
|
||||
if err != nil {
|
||||
return unexpected(err)
|
||||
}
|
||||
residualCodingMethod := ResidualCodingMethod(x)
|
||||
subframe.ResidualCodingMethod = residualCodingMethod
|
||||
// The 2 bits are used to specify the residual coding method as follows:
|
||||
// 00: Rice coding with a 4-bit Rice parameter.
|
||||
// 01: Rice coding with a 5-bit Rice parameter.
|
||||
// 10: reserved.
|
||||
// 11: reserved.
|
||||
switch residualCodingMethod {
|
||||
case 0x0:
|
||||
return subframe.decodeRicePart(br, 4)
|
||||
case 0x1:
|
||||
return subframe.decodeRicePart(br, 5)
|
||||
default:
|
||||
return fmt.Errorf("frame.Subframe.decodeResiduals: reserved residual coding method bit pattern (%02b)", uint8(residualCodingMethod))
|
||||
}
|
||||
}
|
||||
|
||||
// decodeRicePart decodes a Rice partition of encoded residuals from the
|
||||
// subframe, using a Rice parameter of the specified size in bits.
|
||||
//
|
||||
// ref: https://www.xiph.org/flac/format.html#partitioned_rice
|
||||
// ref: https://www.xiph.org/flac/format.html#partitioned_rice2
|
||||
func (subframe *Subframe) decodeRicePart(br *bits.Reader, paramSize uint) error {
|
||||
// 4 bits: Partition order.
|
||||
x, err := br.Read(4)
|
||||
if err != nil {
|
||||
return unexpected(err)
|
||||
}
|
||||
partOrder := int(x)
|
||||
riceSubframe := &RiceSubframe{
|
||||
PartOrder: partOrder,
|
||||
}
|
||||
subframe.RiceSubframe = riceSubframe
|
||||
|
||||
// Parse Rice partitions; in total 2^partOrder partitions.
|
||||
//
|
||||
// ref: https://www.xiph.org/flac/format.html#rice_partition
|
||||
// ref: https://www.xiph.org/flac/format.html#rice2_partition
|
||||
nparts := 1 << partOrder
|
||||
partitions := make([]RicePartition, nparts)
|
||||
riceSubframe.Partitions = partitions
|
||||
for i := 0; i < nparts; i++ {
|
||||
partition := &partitions[i]
|
||||
// (4 or 5) bits: Rice parameter.
|
||||
x, err = br.Read(paramSize)
|
||||
if err != nil {
|
||||
return unexpected(err)
|
||||
}
|
||||
param := uint(x)
|
||||
partition.Param = param
|
||||
|
||||
// Determine the number of Rice encoded samples in the partition.
|
||||
var nsamples int
|
||||
if partOrder == 0 {
|
||||
nsamples = subframe.NSamples - subframe.Order
|
||||
} else if i != 0 {
|
||||
nsamples = subframe.NSamples / nparts
|
||||
} else {
|
||||
nsamples = subframe.NSamples/nparts - subframe.Order
|
||||
}
|
||||
|
||||
if paramSize == 4 && param == 0xF || paramSize == 5 && param == 0x1F {
|
||||
// 1111 or 11111: Escape code, meaning the partition is in unencoded
|
||||
// binary form using n bits per sample; n follows as a 5-bit number.
|
||||
x, err := br.Read(5)
|
||||
if err != nil {
|
||||
return unexpected(err)
|
||||
}
|
||||
n := uint(x)
|
||||
partition.EscapedBitsPerSample = n
|
||||
for j := 0; j < nsamples; j++ {
|
||||
sample, err := br.Read(n)
|
||||
if err != nil {
|
||||
return unexpected(err)
|
||||
}
|
||||
// ref: https://datatracker.ietf.org/doc/draft-ietf-cellar-flac/
|
||||
//
|
||||
// From section 9.2.7.1. Escaped partition:
|
||||
//
|
||||
// The residual samples themselves are stored signed two's
|
||||
// complement. For example, when a partition is escaped and each
|
||||
// residual sample is stored with 3 bits, the number -1 is
|
||||
// represented as 0b111.
|
||||
subframe.Samples = append(subframe.Samples, int32(bits.IntN(sample, n)))
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Decode the Rice encoded residuals of the partition.
|
||||
for j := 0; j < nsamples; j++ {
|
||||
residual, err := subframe.decodeRiceResidual(br, param)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
subframe.Samples = append(subframe.Samples, residual)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// decodeRiceResidual decodes and returns a Rice encoded residual (error
|
||||
// signal).
|
||||
func (subframe *Subframe) decodeRiceResidual(br *bits.Reader, k uint) (int32, error) {
|
||||
// Read unary encoded most significant bits.
|
||||
high, err := br.ReadUnary()
|
||||
if err != nil {
|
||||
return 0, unexpected(err)
|
||||
}
|
||||
|
||||
// Read binary encoded least significant bits.
|
||||
low, err := br.Read(k)
|
||||
if err != nil {
|
||||
return 0, unexpected(err)
|
||||
}
|
||||
folded := uint32(high<<k | low)
|
||||
|
||||
// ZigZag decode.
|
||||
residual := bits.DecodeZigZag(folded)
|
||||
return residual, nil
|
||||
}
|
||||
|
||||
// decodeLPC decodes linear prediction coded audio samples, using the
|
||||
// coefficients of a given polynomial, a couple of unencoded warm-up samples,
|
||||
// and the signal errors of the prediction as specified by the residuals.
|
||||
func (subframe *Subframe) decodeLPC(coeffs []int32, shift int32) error {
|
||||
if len(coeffs) != subframe.Order {
|
||||
return fmt.Errorf("frame.Subframe.decodeLPC: prediction order (%d) differs from number of coefficients (%d)", subframe.Order, len(coeffs))
|
||||
}
|
||||
if shift < 0 {
|
||||
return fmt.Errorf("frame.Subframe.decodeLPC: invalid negative shift")
|
||||
}
|
||||
if subframe.NSamples != len(subframe.Samples) {
|
||||
return fmt.Errorf("frame.Subframe.decodeLPC: subframe sample count mismatch; expected %d, got %d", subframe.NSamples, len(subframe.Samples))
|
||||
}
|
||||
for i := subframe.Order; i < subframe.NSamples; i++ {
|
||||
var sample int64
|
||||
for j, c := range coeffs {
|
||||
sample += int64(c) * int64(subframe.Samples[i-j-1])
|
||||
}
|
||||
subframe.Samples[i] += int32(sample >> uint(shift))
|
||||
}
|
||||
return nil
|
||||
}
|
Reference in New Issue
Block a user