This commit is contained in:
2025-04-19 22:56:37 +08:00
commit ceca244eaf
50 changed files with 7321 additions and 0 deletions

687
frame/frame.go Executable file
View File

@@ -0,0 +1,687 @@
// Package frame implements access to FLAC audio frames.
//
// A brief introduction of the FLAC audio format [1] follows. FLAC encoders
// divide the audio stream into blocks through a process called blocking [2]. A
// block contains the unencoded audio samples from all channels during a short
// period of time. Each audio block is divided into subblocks, one per channel.
//
// There is often a correlation between the left and right channel of stereo
// audio. Using inter-channel decorrelation [3] it is possible to store only one
// of the channels and the difference between the channels, or store the average
// of the channels and their difference. An encoder decorrelates audio samples
// as follows:
//
// mid = (left + right)/2 // average of the channels
// side = left - right // difference between the channels
//
// The blocks are encoded using a variety of prediction methods [4][5] and
// stored in frames. Blocks and subblocks contains unencoded audio samples while
// frames and subframes contain encoded audio samples. A FLAC stream contains
// one or more audio frames.
//
// [1]: https://www.xiph.org/flac/format.html#architecture
// [2]: https://www.xiph.org/flac/format.html#blocking
// [3]: https://www.xiph.org/flac/format.html#interchannel
// [4]: https://www.xiph.org/flac/format.html#prediction
// [5]: https://godoc.org/github.com/mewkiz/flac/frame#Pred
package frame
import (
"encoding/binary"
"errors"
"fmt"
"hash"
"io"
"log"
"github.com/mewkiz/flac/internal/bits"
"github.com/mewkiz/flac/internal/hashutil"
"github.com/mewkiz/flac/internal/hashutil/crc16"
"github.com/mewkiz/flac/internal/hashutil/crc8"
"github.com/mewkiz/flac/internal/utf8"
)
// A Frame contains the header and subframes of an audio frame. It holds the
// encoded samples from a block (a part) of the audio stream. Each subframe
// holding the samples from one of its channel.
//
// ref: https://www.xiph.org/flac/format.html#frame
type Frame struct {
// Audio frame header.
Header
// One subframe per channel, containing encoded audio samples.
Subframes []*Subframe
// CRC-16 hash sum, calculated by read operations on hr.
crc hashutil.Hash16
// A bit reader, wrapping read operations to hr.
br *bits.Reader
// A CRC-16 hash reader, wrapping read operations to r.
hr io.Reader
// Underlying io.Reader.
r io.Reader
}
// New creates a new Frame for accessing the audio samples of r. It reads and
// parses an audio frame header. It returns io.EOF to signal a graceful end of
// FLAC stream.
//
// Call Frame.Parse to parse the audio samples of its subframes.
func New(r io.Reader) (frame *Frame, err error) {
// Create a new CRC-16 hash reader which adds the data from all read
// operations to a running hash.
crc := crc16.NewIBM()
hr := io.TeeReader(r, crc)
// Parse frame header.
frame = &Frame{crc: crc, hr: hr, r: r}
err = frame.parseHeader()
return frame, err
}
// Parse reads and parses the header, and the audio samples from each subframe
// of a frame. If the samples are inter-channel decorrelated between the
// subframes, it correlates them. It returns io.EOF to signal a graceful end of
// FLAC stream.
//
// ref: https://www.xiph.org/flac/format.html#interchannel
func Parse(r io.Reader) (frame *Frame, err error) {
// Parse frame header.
frame, err = New(r)
if err != nil {
return frame, err
}
// Parse subframes.
err = frame.Parse()
return frame, err
}
// Parse reads and parses the audio samples from each subframe of the frame. If
// the samples are inter-channel decorrelated between the subframes, it
// correlates them.
//
// ref: https://www.xiph.org/flac/format.html#interchannel
func (frame *Frame) Parse() error {
// Parse subframes.
frame.Subframes = make([]*Subframe, frame.Channels.Count())
var err error
for channel := range frame.Subframes {
// The side channel requires an extra bit per sample when using
// inter-channel decorrelation.
bps := uint(frame.BitsPerSample)
switch frame.Channels {
case ChannelsSideRight:
// channel 0 is the side channel.
if channel == 0 {
bps++
}
case ChannelsLeftSide, ChannelsMidSide:
// channel 1 is the side channel.
if channel == 1 {
bps++
}
}
// Parse subframe.
frame.Subframes[channel], err = frame.parseSubframe(frame.br, bps)
if err != nil {
return err
}
}
// Inter-channel correlation of subframe samples.
frame.Correlate()
// 2 bytes: CRC-16 checksum.
var want uint16
if err = binary.Read(frame.r, binary.BigEndian, &want); err != nil {
return unexpected(err)
}
got := frame.crc.Sum16()
if got != want {
return fmt.Errorf("frame.Frame.Parse: CRC-16 checksum mismatch; expected 0x%04X, got 0x%04X", want, got)
}
return nil
}
// Hash adds the decoded audio samples of the frame to a running MD5 hash. It
// can be used in conjunction with StreamInfo.MD5sum to verify the integrity of
// the decoded audio samples.
//
// Note: The audio samples of the frame must be decoded before calling Hash.
func (frame *Frame) Hash(md5sum hash.Hash) {
// Write decoded samples to a running MD5 hash.
bps := frame.BitsPerSample
var buf [3]byte
for i := 0; i < int(frame.BlockSize); i++ {
for _, subframe := range frame.Subframes {
sample := subframe.Samples[i]
switch {
case 1 <= bps && bps <= 8:
buf[0] = uint8(sample)
md5sum.Write(buf[:1])
case 9 <= bps && bps <= 16:
buf[0] = uint8(sample)
buf[1] = uint8(sample >> 8)
md5sum.Write(buf[:2])
case 17 <= bps && bps <= 24:
buf[0] = uint8(sample)
buf[1] = uint8(sample >> 8)
buf[2] = uint8(sample >> 16)
md5sum.Write(buf[:])
default:
log.Printf("frame.Frame.Hash: support for %d-bit sample size not yet implemented", bps)
}
}
}
}
// A Header contains the basic properties of an audio frame, such as its sample
// rate and channel count. To facilitate random access decoding each frame
// header starts with a sync-code. This allows the decoder to synchronize and
// locate the start of a frame header.
//
// ref: https://www.xiph.org/flac/format.html#frame_header
type Header struct {
// Specifies if the block size is fixed or variable.
HasFixedBlockSize bool
// Block size in inter-channel samples, i.e. the number of audio samples in
// each subframe.
BlockSize uint16
// Sample rate in Hz; a 0 value implies unknown, get sample rate from
// StreamInfo.
SampleRate uint32
// Specifies the number of channels (subframes) that exist in the frame,
// their order and possible inter-channel decorrelation.
Channels Channels
// Sample size in bits-per-sample; a 0 value implies unknown, get sample size
// from StreamInfo.
BitsPerSample uint8
// Specifies the frame number if the block size is fixed, and the first
// sample number in the frame otherwise. When using fixed block size, the
// first sample number in the frame can be derived by multiplying the frame
// number with the block size (in samples).
Num uint64
}
// Errors returned by Frame.parseHeader.
var (
ErrInvalidSync = errors.New("frame.Frame.parseHeader: invalid sync-code")
)
// parseHeader reads and parses the header of an audio frame.
func (frame *Frame) parseHeader() error {
// Create a new CRC-8 hash reader which adds the data from all read
// operations to a running hash.
h := crc8.NewATM()
hr := io.TeeReader(frame.hr, h)
// Create bit reader.
br := bits.NewReader(hr)
frame.br = br
// 14 bits: sync-code (11111111111110)
x, err := br.Read(14)
if err != nil {
// This is the only place an audio frame may return io.EOF, which signals
// a graceful end of a FLAC stream.
return err
}
if x != 0x3FFE {
return ErrInvalidSync
}
// 1 bit: reserved.
x, err = br.Read(1)
if err != nil {
return unexpected(err)
}
if x != 0 {
return errors.New("frame.Frame.parseHeader: non-zero reserved value")
}
// 1 bit: HasFixedBlockSize.
x, err = br.Read(1)
if err != nil {
return unexpected(err)
}
if x == 0 {
frame.HasFixedBlockSize = true
}
// 4 bits: BlockSize. The block size parsing is simplified by deferring it to
// the end of the header.
blockSize, err := br.Read(4)
if err != nil {
return unexpected(err)
}
// 4 bits: SampleRate. The sample rate parsing is simplified by deferring it
// to the end of the header.
sampleRate, err := br.Read(4)
if err != nil {
return unexpected(err)
}
// Parse channels.
if err := frame.parseChannels(br); err != nil {
return err
}
// Parse bits per sample.
if err := frame.parseBitsPerSample(br); err != nil {
return err
}
// 1 bit: reserved.
x, err = br.Read(1)
if err != nil {
return unexpected(err)
}
if x != 0 {
return errors.New("frame.Frame.parseHeader: non-zero reserved value")
}
// if (fixed block size)
// 1-6 bytes: UTF-8 encoded frame number.
// else
// 1-7 bytes: UTF-8 encoded sample number.
frame.Num, err = utf8.Decode(hr)
if err != nil {
return unexpected(err)
}
// Parse block size.
if err := frame.parseBlockSize(br, blockSize); err != nil {
return err
}
// Parse sample rate.
if err := frame.parseSampleRate(br, sampleRate); err != nil {
return err
}
// 1 byte: CRC-8 checksum.
var want uint8
if err = binary.Read(frame.hr, binary.BigEndian, &want); err != nil {
return unexpected(err)
}
got := h.Sum8()
if want != got {
return fmt.Errorf("frame.Frame.parseHeader: CRC-8 checksum mismatch; expected 0x%02X, got 0x%02X", want, got)
}
return nil
}
// parseBitsPerSample parses the bits per sample of the header.
func (frame *Frame) parseBitsPerSample(br *bits.Reader) error {
// 3 bits: BitsPerSample.
x, err := br.Read(3)
if err != nil {
return unexpected(err)
}
// The 3 bits are used to specify the sample size as follows:
// 000: unknown sample size; get from StreamInfo.
// 001: 8 bits-per-sample.
// 010: 12 bits-per-sample.
// 011: reserved.
// 100: 16 bits-per-sample.
// 101: 20 bits-per-sample.
// 110: 24 bits-per-sample.
// 111: reserved.
switch x {
case 0x0:
// 000: unknown bits-per-sample; get from StreamInfo.
case 0x1:
// 001: 8 bits-per-sample.
frame.BitsPerSample = 8
case 0x2:
// 010: 12 bits-per-sample.
frame.BitsPerSample = 12
case 0x4:
// 100: 16 bits-per-sample.
frame.BitsPerSample = 16
case 0x5:
// 101: 20 bits-per-sample.
frame.BitsPerSample = 20
case 0x6:
// 110: 24 bits-per-sample.
frame.BitsPerSample = 24
default:
// 011: reserved.
// 111: reserved.
return fmt.Errorf("frame.Frame.parseHeader: reserved sample size bit pattern (%03b)", x)
}
return nil
}
// parseChannels parses the channels of the header.
func (frame *Frame) parseChannels(br *bits.Reader) error {
// 4 bits: Channels.
//
// The 4 bits are used to specify the channels as follows:
// 0000: (1 channel) mono.
// 0001: (2 channels) left, right.
// 0010: (3 channels) left, right, center.
// 0011: (4 channels) left, right, left surround, right surround.
// 0100: (5 channels) left, right, center, left surround, right surround.
// 0101: (6 channels) left, right, center, LFE, left surround, right surround.
// 0110: (7 channels) left, right, center, LFE, center surround, side left, side right.
// 0111: (8 channels) left, right, center, LFE, left surround, right surround, side left, side right.
// 1000: (2 channels) left, side; using inter-channel decorrelation.
// 1001: (2 channels) side, right; using inter-channel decorrelation.
// 1010: (2 channels) mid, side; using inter-channel decorrelation.
// 1011: reserved.
// 1100: reserved.
// 1101: reserved.
// 1111: reserved.
x, err := br.Read(4)
if err != nil {
return unexpected(err)
}
if x >= 0xB {
return fmt.Errorf("frame.Frame.parseHeader: reserved channels bit pattern (%04b)", x)
}
frame.Channels = Channels(x)
return nil
}
// parseBlockSize parses the block size of the header.
func (frame *Frame) parseBlockSize(br *bits.Reader, blockSize uint64) error {
// The 4 bits of n are used to specify the block size as follows:
// 0000: reserved.
// 0001: 192 samples.
// 0010-0101: 576 * 2^(n-2) samples.
// 0110: get 8 bit (block size)-1 from the end of the header.
// 0111: get 16 bit (block size)-1 from the end of the header.
// 1000-1111: 256 * 2^(n-8) samples.
n := blockSize
switch {
case n == 0x0:
// 0000: reserved.
return errors.New("frame.Frame.parseHeader: reserved block size bit pattern (0000)")
case n == 0x1:
// 0001: 192 samples.
frame.BlockSize = 192
case n >= 0x2 && n <= 0x5:
// 0010-0101: 576 * 2^(n-2) samples.
frame.BlockSize = 576 * (1 << (n - 2))
case n == 0x6:
// 0110: get 8 bit (block size)-1 from the end of the header.
x, err := br.Read(8)
if err != nil {
return unexpected(err)
}
frame.BlockSize = uint16(x + 1)
case n == 0x7:
// 0111: get 16 bit (block size)-1 from the end of the header.
x, err := br.Read(16)
if err != nil {
return unexpected(err)
}
frame.BlockSize = uint16(x + 1)
default:
// 1000-1111: 256 * 2^(n-8) samples.
frame.BlockSize = 256 * (1 << (n - 8))
}
return nil
}
// parseSampleRate parses the sample rate of the header.
func (frame *Frame) parseSampleRate(br *bits.Reader, sampleRate uint64) error {
// The 4 bits are used to specify the sample rate as follows:
// 0000: unknown sample rate; get from StreamInfo.
// 0001: 88.2 kHz.
// 0010: 176.4 kHz.
// 0011: 192 kHz.
// 0100: 8 kHz.
// 0101: 16 kHz.
// 0110: 22.05 kHz.
// 0111: 24 kHz.
// 1000: 32 kHz.
// 1001: 44.1 kHz.
// 1010: 48 kHz.
// 1011: 96 kHz.
// 1100: get 8 bit sample rate (in kHz) from the end of the header.
// 1101: get 16 bit sample rate (in Hz) from the end of the header.
// 1110: get 16 bit sample rate (in daHz) from the end of the header.
// 1111: invalid.
switch sampleRate {
case 0x0:
// 0000: unknown sample rate; get from StreamInfo.
case 0x1:
// 0001: 88.2 kHz.
frame.SampleRate = 88200
case 0x2:
// 0010: 176.4 kHz.
frame.SampleRate = 176400
// TODO(u): Remove log message when the test cases have been extended.
log.Printf("frame.Frame.parseHeader: The flac library test cases do not yet include any audio files with sample rate %d. If possible please consider contributing this audio sample to improve the reliability of the test cases.", frame.SampleRate)
case 0x3:
// 0011: 192 kHz.
frame.SampleRate = 192000
case 0x4:
// 0100: 8 kHz.
frame.SampleRate = 8000
case 0x5:
// 0101: 16 kHz.
frame.SampleRate = 16000
case 0x6:
// 0110: 22.05 kHz.
frame.SampleRate = 22050
case 0x7:
// 0111: 24 kHz.
frame.SampleRate = 24000
// TODO(u): Remove log message when the test cases have been extended.
log.Printf("frame.Frame.parseHeader: The flac library test cases do not yet include any audio files with sample rate %d. If possible please consider contributing this audio sample to improve the reliability of the test cases.", frame.SampleRate)
case 0x8:
// 1000: 32 kHz.
frame.SampleRate = 32000
case 0x9:
// 1001: 44.1 kHz.
frame.SampleRate = 44100
case 0xA:
// 1010: 48 kHz.
frame.SampleRate = 48000
case 0xB:
// 1011: 96 kHz.
frame.SampleRate = 96000
case 0xC:
// 1100: get 8 bit sample rate (in kHz) from the end of the header.
x, err := br.Read(8)
if err != nil {
return unexpected(err)
}
frame.SampleRate = uint32(x * 1000)
case 0xD:
// 1101: get 16 bit sample rate (in Hz) from the end of the header.
x, err := br.Read(16)
if err != nil {
return unexpected(err)
}
frame.SampleRate = uint32(x)
case 0xE:
// 1110: get 16 bit sample rate (in daHz) from the end of the header.
x, err := br.Read(16)
if err != nil {
return unexpected(err)
}
frame.SampleRate = uint32(x * 10)
default:
// 1111: invalid.
return errors.New("frame.Frame.parseHeader: invalid sample rate bit pattern (1111)")
}
return nil
}
// Channels specifies the number of channels (subframes) that exist in a frame,
// their order and possible inter-channel decorrelation.
type Channels uint8
// Channel assignments. The following abbreviations are used:
//
// C: center (directly in front)
// R: right (standard stereo)
// Sr: side right (directly to the right)
// Rs: right surround (back right)
// Cs: center surround (rear center)
// Ls: left surround (back left)
// Sl: side left (directly to the left)
// L: left (standard stereo)
// Lfe: low-frequency effect (placed according to room acoustics)
//
// The first 6 channel constants follow the SMPTE/ITU-R channel order:
//
// L R C Lfe Ls Rs
const (
ChannelsMono Channels = iota // 1 channel: mono.
ChannelsLR // 2 channels: left, right.
ChannelsLRC // 3 channels: left, right, center.
ChannelsLRLsRs // 4 channels: left, right, left surround, right surround.
ChannelsLRCLsRs // 5 channels: left, right, center, left surround, right surround.
ChannelsLRCLfeLsRs // 6 channels: left, right, center, LFE, left surround, right surround.
ChannelsLRCLfeCsSlSr // 7 channels: left, right, center, LFE, center surround, side left, side right.
ChannelsLRCLfeLsRsSlSr // 8 channels: left, right, center, LFE, left surround, right surround, side left, side right.
ChannelsLeftSide // 2 channels: left, side; using inter-channel decorrelation.
ChannelsSideRight // 2 channels: side, right; using inter-channel decorrelation.
ChannelsMidSide // 2 channels: mid, side; using inter-channel decorrelation.
)
// nChannels specifies the number of channels used by each channel assignment.
var nChannels = [...]int{
ChannelsMono: 1,
ChannelsLR: 2,
ChannelsLRC: 3,
ChannelsLRLsRs: 4,
ChannelsLRCLsRs: 5,
ChannelsLRCLfeLsRs: 6,
ChannelsLRCLfeCsSlSr: 7,
ChannelsLRCLfeLsRsSlSr: 8,
ChannelsLeftSide: 2,
ChannelsSideRight: 2,
ChannelsMidSide: 2,
}
// Count returns the number of channels (subframes) used by the provided channel
// assignment.
func (channels Channels) Count() int {
return nChannels[channels]
}
// Correlate reverts any inter-channel decorrelation between the samples of the
// subframes.
//
// An encoder decorrelates audio samples as follows:
//
// mid = (left + right)/2
// side = left - right
func (frame *Frame) Correlate() {
switch frame.Channels {
case ChannelsLeftSide:
// 2 channels: left, side; using inter-channel decorrelation.
left := frame.Subframes[0].Samples
side := frame.Subframes[1].Samples
for i := range side {
// right = left - side
side[i] = left[i] - side[i]
}
case ChannelsSideRight:
// 2 channels: side, right; using inter-channel decorrelation.
side := frame.Subframes[0].Samples
right := frame.Subframes[1].Samples
for i := range side {
// left = right + side
side[i] = right[i] + side[i]
}
case ChannelsMidSide:
// 2 channels: mid, side; using inter-channel decorrelation.
mid := frame.Subframes[0].Samples
side := frame.Subframes[1].Samples
for i := range side {
// left = (2*mid + side)/2
// right = (2*mid - side)/2
m := mid[i]
s := side[i]
m *= 2
// Notice that the integer division in mid = (left + right)/2 discards
// the least significant bit. It can be reconstructed however, since a
// sum A+B and a difference A-B has the same least significant bit.
//
// ref: Data Compression: The Complete Reference (ch. 7, Decorrelation)
m |= s & 1
mid[i] = (m + s) / 2
side[i] = (m - s) / 2
}
}
}
// Decorrelate performs inter-channel decorrelation between the samples of the
// subframes.
//
// An encoder decorrelates audio samples as follows:
//
// mid = (left + right)/2
// side = left - right
func (frame *Frame) Decorrelate() {
switch frame.Channels {
case ChannelsLeftSide:
// 2 channels: left, side; using inter-channel decorrelation.
left := frame.Subframes[0].Samples // already left; no change after inter-channel decorrelation.
right := frame.Subframes[1].Samples // set to side after inter-channel decorrelation.
for i := range left {
l := left[i]
r := right[i]
// inter-channel decorrelation:
// side = left - right
side := l - r
right[i] = side
}
case ChannelsSideRight:
// 2 channels: side, right; using inter-channel decorrelation.
left := frame.Subframes[0].Samples // set to side after inter-channel decorrelation.
right := frame.Subframes[1].Samples // already right; no change after inter-channel decorrelation.
for i := range left {
l := left[i]
r := right[i]
// inter-channel decorrelation:
// side = left - right
side := l - r
left[i] = side
}
case ChannelsMidSide:
// 2 channels: mid, side; using inter-channel decorrelation.
left := frame.Subframes[0].Samples // set to mid after inter-channel decorrelation.
right := frame.Subframes[1].Samples // set to side after inter-channel decorrelation.
for i := range left {
// inter-channel decorrelation:
// mid = (left + right)/2
// side = left - right
l := left[i]
r := right[i]
mid := int32((int64(l) + int64(r)) >> 1) // NOTE: using `(left + right) >> 1`, not the same as `(left + right) / 2`.
side := l - r
left[i] = mid
right[i] = side
}
}
}
// SampleNumber returns the first sample number contained within the frame.
func (frame *Frame) SampleNumber() uint64 {
if frame.HasFixedBlockSize {
return frame.Num * uint64(frame.BlockSize)
}
return frame.Num
}
// unexpected returns io.ErrUnexpectedEOF if err is io.EOF, and returns err
// otherwise.
func unexpected(err error) error {
if err == io.EOF {
return io.ErrUnexpectedEOF
}
return err
}

193
frame/frame_test.go Executable file
View File

@@ -0,0 +1,193 @@
package frame_test
import (
"bytes"
"crypto/md5"
"io"
"testing"
"github.com/mewkiz/flac"
)
var golden = []struct {
path string
}{
{path: "../testdata/love.flac"},
{path: "../testdata/19875.flac"},
{path: "../testdata/44127.flac"},
{path: "../testdata/59996.flac"},
{path: "../testdata/80574.flac"},
{path: "../testdata/172960.flac"},
{path: "../testdata/189983.flac"},
{path: "../testdata/191885.flac"},
{path: "../testdata/212768.flac"},
{path: "../testdata/220014.flac"},
{path: "../testdata/243749.flac"},
{path: "../testdata/256529.flac"},
{path: "../testdata/257344.flac"},
// IETF test cases.
{path: "../testdata/flac-test-files/subset/01 - blocksize 4096.flac"},
{path: "../testdata/flac-test-files/subset/02 - blocksize 4608.flac"},
{path: "../testdata/flac-test-files/subset/03 - blocksize 16.flac"},
{path: "../testdata/flac-test-files/subset/04 - blocksize 192.flac"},
{path: "../testdata/flac-test-files/subset/05 - blocksize 254.flac"},
{path: "../testdata/flac-test-files/subset/06 - blocksize 512.flac"},
{path: "../testdata/flac-test-files/subset/07 - blocksize 725.flac"},
{path: "../testdata/flac-test-files/subset/08 - blocksize 1000.flac"},
{path: "../testdata/flac-test-files/subset/09 - blocksize 1937.flac"},
{path: "../testdata/flac-test-files/subset/10 - blocksize 2304.flac"},
{path: "../testdata/flac-test-files/subset/11 - partition order 8.flac"},
{path: "../testdata/flac-test-files/subset/12 - qlp precision 15 bit.flac"},
{path: "../testdata/flac-test-files/subset/13 - qlp precision 2 bit.flac"},
{path: "../testdata/flac-test-files/subset/14 - wasted bits.flac"},
{path: "../testdata/flac-test-files/subset/15 - only verbatim subframes.flac"},
{path: "../testdata/flac-test-files/subset/16 - partition order 8 containing escaped partitions.flac"},
{path: "../testdata/flac-test-files/subset/17 - all fixed orders.flac"},
{path: "../testdata/flac-test-files/subset/18 - precision search.flac"},
{path: "../testdata/flac-test-files/subset/19 - samplerate 35467Hz.flac"},
{path: "../testdata/flac-test-files/subset/20 - samplerate 39kHz.flac"},
{path: "../testdata/flac-test-files/subset/21 - samplerate 22050Hz.flac"},
{path: "../testdata/flac-test-files/subset/22 - 12 bit per sample.flac"},
{path: "../testdata/flac-test-files/subset/23 - 8 bit per sample.flac"},
{path: "../testdata/flac-test-files/subset/24 - variable blocksize file created with flake revision 264.flac"},
{path: "../testdata/flac-test-files/subset/25 - variable blocksize file created with flake revision 264, modified to create smaller blocks.flac"},
{path: "../testdata/flac-test-files/subset/26 - variable blocksize file created with CUETools.Flake 2.1.6.flac"},
{path: "../testdata/flac-test-files/subset/27 - old format variable blocksize file created with Flake 0.11.flac"},
{path: "../testdata/flac-test-files/subset/28 - high resolution audio, default settings.flac"},
{path: "../testdata/flac-test-files/subset/29 - high resolution audio, blocksize 16384.flac"},
{path: "../testdata/flac-test-files/subset/30 - high resolution audio, blocksize 13456.flac"},
{path: "../testdata/flac-test-files/subset/31 - high resolution audio, using only 32nd order predictors.flac"},
{path: "../testdata/flac-test-files/subset/32 - high resolution audio, partition order 8 containing escaped partitions.flac"},
{path: "../testdata/flac-test-files/subset/33 - samplerate 192kHz.flac"},
{path: "../testdata/flac-test-files/subset/34 - samplerate 192kHz, using only 32nd order predictors.flac"},
{path: "../testdata/flac-test-files/subset/35 - samplerate 134560Hz.flac"},
{path: "../testdata/flac-test-files/subset/36 - samplerate 384kHz.flac"},
{path: "../testdata/flac-test-files/subset/37 - 20 bit per sample.flac"},
{path: "../testdata/flac-test-files/subset/38 - 3 channels (3.0).flac"},
{path: "../testdata/flac-test-files/subset/39 - 4 channels (4.0).flac"},
{path: "../testdata/flac-test-files/subset/40 - 5 channels (5.0).flac"},
{path: "../testdata/flac-test-files/subset/41 - 6 channels (5.1).flac"},
{path: "../testdata/flac-test-files/subset/42 - 7 channels (6.1).flac"},
{path: "../testdata/flac-test-files/subset/43 - 8 channels (7.1).flac"},
{path: "../testdata/flac-test-files/subset/44 - 8-channel surround, 192kHz, 24 bit, using only 32nd order predictors.flac"},
{path: "../testdata/flac-test-files/subset/45 - no total number of samples set.flac"},
{path: "../testdata/flac-test-files/subset/46 - no min-max framesize set.flac"},
{path: "../testdata/flac-test-files/subset/47 - only STREAMINFO.flac"},
{path: "../testdata/flac-test-files/subset/48 - Extremely large SEEKTABLE.flac"},
{path: "../testdata/flac-test-files/subset/49 - Extremely large PADDING.flac"},
{path: "../testdata/flac-test-files/subset/50 - Extremely large PICTURE.flac"},
{path: "../testdata/flac-test-files/subset/51 - Extremely large VORBISCOMMENT.flac"},
{path: "../testdata/flac-test-files/subset/52 - Extremely large APPLICATION.flac"},
{path: "../testdata/flac-test-files/subset/53 - CUESHEET with very many indexes.flac"},
{path: "../testdata/flac-test-files/subset/54 - 1000x repeating VORBISCOMMENT.flac"},
{path: "../testdata/flac-test-files/subset/55 - file 48-53 combined.flac"},
{path: "../testdata/flac-test-files/subset/56 - JPG PICTURE.flac"},
{path: "../testdata/flac-test-files/subset/57 - PNG PICTURE.flac"},
{path: "../testdata/flac-test-files/subset/58 - GIF PICTURE.flac"},
{path: "../testdata/flac-test-files/subset/59 - AVIF PICTURE.flac"},
{path: "../testdata/flac-test-files/subset/60 - mono audio.flac"},
{path: "../testdata/flac-test-files/subset/61 - predictor overflow check, 16-bit.flac"},
{path: "../testdata/flac-test-files/subset/62 - predictor overflow check, 20-bit.flac"},
// TODO: fix decoding of "subset/63 - ...flac": MD5 checksum mismatch for decoded audio samples; expected e4e4a6b3a672a849a3e2157c11ad23c6, got a0343afaaaa6229266d78ccf3175eb8d
{path: "../testdata/flac-test-files/subset/63 - predictor overflow check, 24-bit.flac"},
{path: "../testdata/flac-test-files/subset/64 - rice partitions with escape code zero.flac"},
}
func TestFrameHash(t *testing.T) {
var zeroHash [md5.Size]byte
for _, g := range golden {
t.Run(g.path, func(t *testing.T) {
stream, err := flac.Open(g.path)
if err != nil {
t.Fatal(err)
}
defer stream.Close()
// Skip frame hash test if no MD5 hash was set in StreamInfo.
want := stream.Info.MD5sum[:]
if bytes.Equal(want, zeroHash[:]) {
t.Skipf("path=%q, skipping frame hash test as no MD5 hash was set in StreamInfo", g.path)
return
}
md5sum := md5.New()
for frameNum := 0; ; frameNum++ {
frame, err := stream.ParseNext()
if err != nil {
if err == io.EOF {
break
}
t.Errorf("path=%q, frameNum=%d: error while parsing frame; %v", g.path, frameNum, err)
continue
}
frame.Hash(md5sum)
}
got := md5sum.Sum(nil)
// Verify the decoded audio samples by comparing the MD5 checksum that is
// stored in StreamInfo with the computed one.
if !bytes.Equal(got, want) {
t.Errorf("path=%q: MD5 checksum mismatch for decoded audio samples; expected %32x, got %32x", g.path, want, got)
}
})
}
}
func BenchmarkFrameParse(b *testing.B) {
// The file 151185.flac is a 119.5 MB public domain FLAC file used to
// benchmark the flac library. Because of its size, it has not been included
// in the repository, but is available for download at
//
// http://freesound.org/people/jarfil/sounds/151185/
for i := 0; i < b.N; i++ {
stream, err := flac.Open("../testdata/benchmark/151185.flac")
if err != nil {
b.Fatal(err)
}
for {
_, err := stream.ParseNext()
if err != nil {
if err == io.EOF {
break
}
stream.Close()
b.Fatal(err)
}
}
stream.Close()
}
}
func BenchmarkFrameHash(b *testing.B) {
// The file 151185.flac is a 119.5 MB public domain FLAC file used to
// benchmark the flac library. Because of its size, it has not been included
// in the repository, but is available for download at
//
// http://freesound.org/people/jarfil/sounds/151185/
for i := 0; i < b.N; i++ {
stream, err := flac.Open("../testdata/benchmark/151185.flac")
if err != nil {
b.Fatal(err)
}
md5sum := md5.New()
for {
frame, err := stream.ParseNext()
if err != nil {
if err == io.EOF {
break
}
stream.Close()
b.Fatal(err)
}
frame.Hash(md5sum)
}
stream.Close()
want := stream.Info.MD5sum[:]
got := md5sum.Sum(nil)
// Verify the decoded audio samples by comparing the MD5 checksum that is
// stored in StreamInfo with the computed one.
if !bytes.Equal(got, want) {
b.Fatalf("MD5 checksum mismatch for decoded audio samples; expected %32x, got %32x", want, got)
}
}
}

534
frame/subframe.go Executable file
View File

@@ -0,0 +1,534 @@
package frame
import (
"errors"
"fmt"
"github.com/mewkiz/flac/internal/bits"
)
// A Subframe contains the encoded audio samples from one channel of an audio
// block (a part of the audio stream).
//
// ref: https://www.xiph.org/flac/format.html#subframe
type Subframe struct {
// Subframe header.
SubHeader
// Unencoded audio samples. Samples is initially nil, and gets populated by a
// call to Frame.Parse.
//
// Samples is used by decodeFixed and decodeFIR to temporarily store
// residuals. Before returning they call decodeLPC which decodes the audio
// samples.
Samples []int32
// Number of audio samples in the subframe.
NSamples int
}
// parseSubframe reads and parses the header, and the audio samples of a
// subframe.
func (frame *Frame) parseSubframe(br *bits.Reader, bps uint) (subframe *Subframe, err error) {
// Parse subframe header.
subframe = new(Subframe)
if err = subframe.parseHeader(br); err != nil {
return subframe, err
}
// Adjust bps of subframe for wasted bits-per-sample.
bps -= subframe.Wasted
// Decode subframe audio samples.
subframe.NSamples = int(frame.BlockSize)
subframe.Samples = make([]int32, 0, subframe.NSamples)
switch subframe.Pred {
case PredConstant:
err = subframe.decodeConstant(br, bps)
case PredVerbatim:
err = subframe.decodeVerbatim(br, bps)
case PredFixed:
err = subframe.decodeFixed(br, bps)
case PredFIR:
err = subframe.decodeFIR(br, bps)
}
// Left shift to account for wasted bits-per-sample.
for i, sample := range subframe.Samples {
subframe.Samples[i] = sample << subframe.Wasted
}
return subframe, err
}
// A SubHeader specifies the prediction method and order of a subframe.
//
// ref: https://www.xiph.org/flac/format.html#subframe_header
type SubHeader struct {
// Specifies the prediction method used to encode the audio sample of the
// subframe.
Pred Pred
// Prediction order used by fixed and FIR linear prediction decoding.
Order int
// Wasted bits-per-sample.
Wasted uint
// Residual coding method used by fixed and FIR linear prediction decoding.
ResidualCodingMethod ResidualCodingMethod
// Coefficients' precision in bits used by FIR linear prediction decoding.
CoeffPrec uint
// Predictor coefficient shift needed in bits used by FIR linear prediction
// decoding.
CoeffShift int32
// Predictor coefficients used by FIR linear prediction decoding.
Coeffs []int32
// Rice-coding subframe fields used by residual coding methods rice1 and
// rice2; nil if unused.
RiceSubframe *RiceSubframe
}
// RiceSubframe holds rice-coding subframe fields used by residual coding
// methods rice1 and rice2.
type RiceSubframe struct {
// Partition order used by fixed and FIR linear prediction decoding
// (for residual coding methods, rice1 and rice2).
PartOrder int // TODO: remove PartOrder and infer from int(math.Log2(float64(len(Partitions))))?
// Rice partitions.
Partitions []RicePartition
}
// RicePartition is a partition containing a subset of the residuals of a
// subframe.
type RicePartition struct {
// Rice parameter.
Param uint
// Residual sample size in bits-per-sample used by escaped partitions.
EscapedBitsPerSample uint
}
// parseHeader reads and parses the header of a subframe.
func (subframe *Subframe) parseHeader(br *bits.Reader) error {
// 1 bit: zero-padding.
x, err := br.Read(1)
if err != nil {
return unexpected(err)
}
if x != 0 {
return errors.New("frame.Subframe.parseHeader: non-zero padding")
}
// 6 bits: Pred.
x, err = br.Read(6)
if err != nil {
return unexpected(err)
}
// The 6 bits are used to specify the prediction method and order as follows:
// 000000: Constant prediction method.
// 000001: Verbatim prediction method.
// 00001x: reserved.
// 0001xx: reserved.
// 001xxx:
// if (xxx <= 4)
// Fixed prediction method; xxx=order
// else
// reserved.
// 01xxxx: reserved.
// 1xxxxx: FIR prediction method; xxxxx=order-1
switch {
case x < 1:
// 000000: Constant prediction method.
subframe.Pred = PredConstant
case x < 2:
// 000001: Verbatim prediction method.
subframe.Pred = PredVerbatim
case x < 8:
// 00001x: reserved.
// 0001xx: reserved.
return fmt.Errorf("frame.Subframe.parseHeader: reserved prediction method bit pattern (%06b)", x)
case x < 16:
// 001xxx:
// if (xxx <= 4)
// Fixed prediction method; xxx=order
// else
// reserved.
order := int(x & 0x07)
if order > 4 {
return fmt.Errorf("frame.Subframe.parseHeader: reserved prediction method bit pattern (%06b)", x)
}
subframe.Pred = PredFixed
subframe.Order = order
case x < 32:
// 01xxxx: reserved.
return fmt.Errorf("frame.Subframe.parseHeader: reserved prediction method bit pattern (%06b)", x)
default:
// 1xxxxx: FIR prediction method; xxxxx=order-1
subframe.Pred = PredFIR
subframe.Order = int(x&0x1F) + 1
}
// 1 bit: hasWastedBits.
x, err = br.Read(1)
if err != nil {
return unexpected(err)
}
if x != 0 {
// k wasted bits-per-sample in source subblock, k-1 follows, unary coded;
// e.g. k=3 => 001 follows, k=7 => 0000001 follows.
x, err = br.ReadUnary()
if err != nil {
return unexpected(err)
}
subframe.Wasted = uint(x) + 1
}
return nil
}
// Pred specifies the prediction method used to encode the audio samples of a
// subframe.
type Pred uint8
// Prediction methods.
const (
// PredConstant specifies that the subframe contains a constant sound. The
// audio samples are encoded using run-length encoding. Since every audio
// sample has the same constant value, a single unencoded audio sample is
// stored in practice. It is replicated a number of times, as specified by
// BlockSize in the frame header.
PredConstant Pred = iota
// PredVerbatim specifies that the subframe contains unencoded audio samples.
// Random sound is often stored verbatim, since no prediction method can
// compress it sufficiently.
PredVerbatim
// PredFixed specifies that the subframe contains linear prediction coded
// audio samples. The coefficients of the prediction polynomial are selected
// from a fixed set, and can represent 0th through fourth-order polynomials.
// The prediction order (0 through 4) is stored within the subframe along
// with the same number of unencoded warm-up samples, which are used to kick
// start the prediction polynomial. The remainder of the subframe stores
// encoded residuals (signal errors) which specify the difference between the
// predicted and the original audio samples.
PredFixed
// PredFIR specifies that the subframe contains linear prediction coded audio
// samples. The coefficients of the prediction polynomial are stored in the
// subframe, and can represent 0th through 32nd-order polynomials. The
// prediction order (0 through 32) is stored within the subframe along with
// the same number of unencoded warm-up samples, which are used to kick start
// the prediction polynomial. The remainder of the subframe stores encoded
// residuals (signal errors) which specify the difference between the
// predicted and the original audio samples.
PredFIR
)
// signExtend interprets x as a signed n-bit integer value and sign extends it
// to 32 bits.
func signExtend(x uint64, n uint) int32 {
// x is signed if its most significant bit is set.
if x&(1<<(n-1)) != 0 {
// Sign extend x.
return int32(x | ^uint64(0)<<n)
}
return int32(x)
}
// decodeConstant reads an unencoded audio sample of the subframe. Each sample
// of the subframe has this constant value. The constant encoding can be thought
// of as run-length encoding.
//
// ref: https://www.xiph.org/flac/format.html#subframe_constant
func (subframe *Subframe) decodeConstant(br *bits.Reader, bps uint) error {
// (bits-per-sample) bits: Unencoded constant value of the subblock.
x, err := br.Read(bps)
if err != nil {
return unexpected(err)
}
// Each sample of the subframe has the same constant value.
sample := signExtend(x, bps)
for i := 0; i < subframe.NSamples; i++ {
subframe.Samples = append(subframe.Samples, sample)
}
return nil
}
// decodeVerbatim reads the unencoded audio samples of the subframe.
//
// ref: https://www.xiph.org/flac/format.html#subframe_verbatim
func (subframe *Subframe) decodeVerbatim(br *bits.Reader, bps uint) error {
// Parse the unencoded audio samples of the subframe.
for i := 0; i < subframe.NSamples; i++ {
// (bits-per-sample) bits: Unencoded constant value of the subblock.
x, err := br.Read(bps)
if err != nil {
return unexpected(err)
}
sample := signExtend(x, bps)
subframe.Samples = append(subframe.Samples, sample)
}
return nil
}
// FixedCoeffs maps from prediction order to the LPC coefficients used in fixed
// encoding.
//
// x_0[n] = 0
// x_1[n] = x[n-1]
// x_2[n] = 2*x[n-1] - x[n-2]
// x_3[n] = 3*x[n-1] - 3*x[n-2] + x[n-3]
// x_4[n] = 4*x[n-1] - 6*x[n-2] + 4*x[n-3] - x[n-4]
var FixedCoeffs = [...][]int32{
// ref: Section 2.2 of http://www.hpl.hp.com/techreports/1999/HPL-1999-144.pdf
1: {1},
2: {2, -1},
3: {3, -3, 1},
// ref: Data Compression: The Complete Reference (7.10.1)
4: {4, -6, 4, -1},
}
// decodeFixed decodes the linear prediction coded samples of the subframe,
// using a fixed set of predefined polynomial coefficients.
//
// ref: https://www.xiph.org/flac/format.html#subframe_fixed
func (subframe *Subframe) decodeFixed(br *bits.Reader, bps uint) error {
// Parse unencoded warm-up samples.
for i := 0; i < subframe.Order; i++ {
// (bits-per-sample) bits: Unencoded warm-up sample.
x, err := br.Read(bps)
if err != nil {
return unexpected(err)
}
sample := signExtend(x, bps)
subframe.Samples = append(subframe.Samples, sample)
}
// Decode subframe residuals.
if err := subframe.decodeResiduals(br); err != nil {
return err
}
// Predict the audio samples of the subframe using a polynomial with
// predefined coefficients of a given order. Correct signal errors using the
// decoded residuals.
const shift = 0
return subframe.decodeLPC(FixedCoeffs[subframe.Order], shift)
}
// decodeFIR decodes the linear prediction coded samples of the subframe, using
// polynomial coefficients stored in the stream.
//
// ref: https://www.xiph.org/flac/format.html#subframe_lpc
func (subframe *Subframe) decodeFIR(br *bits.Reader, bps uint) error {
// Parse unencoded warm-up samples.
for i := 0; i < subframe.Order; i++ {
// (bits-per-sample) bits: Unencoded warm-up sample.
x, err := br.Read(bps)
if err != nil {
return unexpected(err)
}
sample := signExtend(x, bps)
subframe.Samples = append(subframe.Samples, sample)
}
// 4 bits: (coefficients' precision in bits) - 1.
x, err := br.Read(4)
if err != nil {
return unexpected(err)
}
if x == 0xF {
return errors.New("frame.Subframe.decodeFIR: invalid coefficient precision bit pattern (1111)")
}
prec := uint(x) + 1
subframe.CoeffPrec = prec
// 5 bits: predictor coefficient shift needed in bits.
x, err = br.Read(5)
if err != nil {
return unexpected(err)
}
shift := signExtend(x, 5)
subframe.CoeffShift = shift
// Parse coefficients.
coeffs := make([]int32, subframe.Order)
for i := range coeffs {
// (prec) bits: Predictor coefficient.
x, err = br.Read(prec)
if err != nil {
return unexpected(err)
}
coeffs[i] = signExtend(x, prec)
}
subframe.Coeffs = coeffs
// Decode subframe residuals.
if err := subframe.decodeResiduals(br); err != nil {
return err
}
// Predict the audio samples of the subframe using a polynomial with
// predefined coefficients of a given order. Correct signal errors using the
// decoded residuals.
return subframe.decodeLPC(coeffs, shift)
}
// ResidualCodingMethod specifies a residual coding method.
type ResidualCodingMethod uint8
// Residual coding methods.
const (
// Rice coding with a 4-bit Rice parameter (rice1).
ResidualCodingMethodRice1 ResidualCodingMethod = 0
// Rice coding with a 5-bit Rice parameter (rice2).
ResidualCodingMethodRice2 ResidualCodingMethod = 1
)
// decodeResiduals decodes the encoded residuals (prediction method error
// signals) of the subframe.
//
// ref: https://www.xiph.org/flac/format.html#residual
func (subframe *Subframe) decodeResiduals(br *bits.Reader) error {
// 2 bits: Residual coding method.
x, err := br.Read(2)
if err != nil {
return unexpected(err)
}
residualCodingMethod := ResidualCodingMethod(x)
subframe.ResidualCodingMethod = residualCodingMethod
// The 2 bits are used to specify the residual coding method as follows:
// 00: Rice coding with a 4-bit Rice parameter.
// 01: Rice coding with a 5-bit Rice parameter.
// 10: reserved.
// 11: reserved.
switch residualCodingMethod {
case 0x0:
return subframe.decodeRicePart(br, 4)
case 0x1:
return subframe.decodeRicePart(br, 5)
default:
return fmt.Errorf("frame.Subframe.decodeResiduals: reserved residual coding method bit pattern (%02b)", uint8(residualCodingMethod))
}
}
// decodeRicePart decodes a Rice partition of encoded residuals from the
// subframe, using a Rice parameter of the specified size in bits.
//
// ref: https://www.xiph.org/flac/format.html#partitioned_rice
// ref: https://www.xiph.org/flac/format.html#partitioned_rice2
func (subframe *Subframe) decodeRicePart(br *bits.Reader, paramSize uint) error {
// 4 bits: Partition order.
x, err := br.Read(4)
if err != nil {
return unexpected(err)
}
partOrder := int(x)
riceSubframe := &RiceSubframe{
PartOrder: partOrder,
}
subframe.RiceSubframe = riceSubframe
// Parse Rice partitions; in total 2^partOrder partitions.
//
// ref: https://www.xiph.org/flac/format.html#rice_partition
// ref: https://www.xiph.org/flac/format.html#rice2_partition
nparts := 1 << partOrder
partitions := make([]RicePartition, nparts)
riceSubframe.Partitions = partitions
for i := 0; i < nparts; i++ {
partition := &partitions[i]
// (4 or 5) bits: Rice parameter.
x, err = br.Read(paramSize)
if err != nil {
return unexpected(err)
}
param := uint(x)
partition.Param = param
// Determine the number of Rice encoded samples in the partition.
var nsamples int
if partOrder == 0 {
nsamples = subframe.NSamples - subframe.Order
} else if i != 0 {
nsamples = subframe.NSamples / nparts
} else {
nsamples = subframe.NSamples/nparts - subframe.Order
}
if paramSize == 4 && param == 0xF || paramSize == 5 && param == 0x1F {
// 1111 or 11111: Escape code, meaning the partition is in unencoded
// binary form using n bits per sample; n follows as a 5-bit number.
x, err := br.Read(5)
if err != nil {
return unexpected(err)
}
n := uint(x)
partition.EscapedBitsPerSample = n
for j := 0; j < nsamples; j++ {
sample, err := br.Read(n)
if err != nil {
return unexpected(err)
}
// ref: https://datatracker.ietf.org/doc/draft-ietf-cellar-flac/
//
// From section 9.2.7.1. Escaped partition:
//
// The residual samples themselves are stored signed two's
// complement. For example, when a partition is escaped and each
// residual sample is stored with 3 bits, the number -1 is
// represented as 0b111.
subframe.Samples = append(subframe.Samples, int32(bits.IntN(sample, n)))
}
continue
}
// Decode the Rice encoded residuals of the partition.
for j := 0; j < nsamples; j++ {
residual, err := subframe.decodeRiceResidual(br, param)
if err != nil {
return err
}
subframe.Samples = append(subframe.Samples, residual)
}
}
return nil
}
// decodeRiceResidual decodes and returns a Rice encoded residual (error
// signal).
func (subframe *Subframe) decodeRiceResidual(br *bits.Reader, k uint) (int32, error) {
// Read unary encoded most significant bits.
high, err := br.ReadUnary()
if err != nil {
return 0, unexpected(err)
}
// Read binary encoded least significant bits.
low, err := br.Read(k)
if err != nil {
return 0, unexpected(err)
}
folded := uint32(high<<k | low)
// ZigZag decode.
residual := bits.DecodeZigZag(folded)
return residual, nil
}
// decodeLPC decodes linear prediction coded audio samples, using the
// coefficients of a given polynomial, a couple of unencoded warm-up samples,
// and the signal errors of the prediction as specified by the residuals.
func (subframe *Subframe) decodeLPC(coeffs []int32, shift int32) error {
if len(coeffs) != subframe.Order {
return fmt.Errorf("frame.Subframe.decodeLPC: prediction order (%d) differs from number of coefficients (%d)", subframe.Order, len(coeffs))
}
if shift < 0 {
return fmt.Errorf("frame.Subframe.decodeLPC: invalid negative shift")
}
if subframe.NSamples != len(subframe.Samples) {
return fmt.Errorf("frame.Subframe.decodeLPC: subframe sample count mismatch; expected %d, got %d", subframe.NSamples, len(subframe.Samples))
}
for i := subframe.Order; i < subframe.NSamples; i++ {
var sample int64
for j, c := range coeffs {
sample += int64(c) * int64(subframe.Samples[i-j-1])
}
subframe.Samples[i] += int32(sample >> uint(shift))
}
return nil
}