acid-drop/lib/ESP32-audioI2S/src/flac_decoder/flac_decoder.cpp

557 lines
22 KiB
C++
Raw Normal View History

2024-05-23 22:42:03 +00:00
/*
* flac_decoder.cpp
* Java source code from https://www.nayuki.io/page/simple-flac-implementation
* adapted to ESP32
*
* Created on: Jul 03,2020
* Updated on: Jul 03,2021
*
* Author: Wolle
*
*
*/
#include "flac_decoder.h"
#include "vector"
using namespace std;
FLACFrameHeader_t *FLACFrameHeader;
FLACMetadataBlock_t *FLACMetadataBlock;
FLACsubFramesBuff_t *FLACsubFramesBuff;
vector<int32_t>coefs;
const uint16_t outBuffSize = 2048;
uint16_t m_blockSize=0;
uint16_t m_blockSizeLeft = 0;
uint16_t m_validSamples = 0;
uint8_t m_status = 0;
uint8_t* m_inptr;
int16_t m_bytesAvail;
int16_t m_bytesDecoded = 0;
float m_compressionRatio = 0;
uint16_t m_rIndex=0;
uint64_t m_bitBuffer = 0;
uint8_t m_bitBufferLen = 0;
bool m_f_OggS_found = false;
//----------------------------------------------------------------------------------------------------------------------
// FLAC INI SECTION
//----------------------------------------------------------------------------------------------------------------------
bool FLACDecoder_AllocateBuffers(void){
if(psramFound()) {
// PSRAM found, Buffer will be allocated in PSRAM
if(!FLACFrameHeader) {FLACFrameHeader = (FLACFrameHeader_t*) ps_malloc(sizeof(FLACFrameHeader_t));}
if(!FLACMetadataBlock) {FLACMetadataBlock = (FLACMetadataBlock_t*) ps_malloc(sizeof(FLACMetadataBlock_t));}
if(!FLACsubFramesBuff) {FLACsubFramesBuff = (FLACsubFramesBuff_t*) ps_malloc(sizeof(FLACsubFramesBuff_t));}
}
else {
if(!FLACFrameHeader) {FLACFrameHeader = (FLACFrameHeader_t*) malloc(sizeof(FLACFrameHeader_t));}
if(!FLACMetadataBlock) {FLACMetadataBlock = (FLACMetadataBlock_t*) malloc(sizeof(FLACMetadataBlock_t));}
if(!FLACsubFramesBuff) {FLACsubFramesBuff = (FLACsubFramesBuff_t*) malloc(sizeof(FLACsubFramesBuff_t));}
}
if(!FLACFrameHeader || !FLACMetadataBlock || !FLACsubFramesBuff ){
log_e("not enough memory to allocate flacdecoder buffers");
return false;
}
FLACDecoder_ClearBuffer();
return true;
}
//----------------------------------------------------------------------------------------------------------------------
void FLACDecoder_ClearBuffer(){
memset(FLACFrameHeader, 0, sizeof(FLACFrameHeader_t));
memset(FLACMetadataBlock, 0, sizeof(FLACMetadataBlock_t));
memset(FLACsubFramesBuff, 0, sizeof(FLACsubFramesBuff_t));
m_status = DECODE_FRAME;
return;
}
//----------------------------------------------------------------------------------------------------------------------
void FLACDecoder_FreeBuffers(){
if(FLACFrameHeader) {free(FLACFrameHeader); FLACFrameHeader = NULL;}
if(FLACMetadataBlock) {free(FLACMetadataBlock); FLACMetadataBlock = NULL;}
if(FLACsubFramesBuff) {free(FLACsubFramesBuff); FLACsubFramesBuff = NULL;}
}
//----------------------------------------------------------------------------------------------------------------------
// B I T R E A D E R
//----------------------------------------------------------------------------------------------------------------------
uint32_t readUint(uint8_t nBits){
while (m_bitBufferLen < nBits){
uint8_t temp = *(m_inptr + m_rIndex);
m_rIndex++;
m_bytesAvail--;
if(m_bytesAvail < 0) { log_i("error in bitreader"); }
m_bitBuffer = (m_bitBuffer << 8) | temp;
m_bitBufferLen += 8;
}
m_bitBufferLen -= nBits;
uint32_t result = m_bitBuffer >> m_bitBufferLen;
if (nBits < 32)
result &= (1 << nBits) - 1;
return result;
}
int32_t readSignedInt(int nBits){
int32_t temp = readUint(nBits) << (32 - nBits);
temp = temp >> (32 - nBits); // The C++ compiler uses the sign bit to fill vacated bit positions
return temp;
}
int64_t readRiceSignedInt(uint8_t param){
long val = 0;
while (readUint(1) == 0)
val++;
val = (val << param) | readUint(param);
return (val >> 1) ^ -(val & 1);
}
void alignToByte() {
m_bitBufferLen -= m_bitBufferLen % 8;
}
//----------------------------------------------------------------------------------------------------------------------
// F L A C - D E C O D E R
//----------------------------------------------------------------------------------------------------------------------
void FLACSetRawBlockParams(uint8_t Chans, uint32_t SampRate, uint8_t BPS, uint32_t tsis, uint32_t AuDaLength){
FLACMetadataBlock->numChannels = Chans;
FLACMetadataBlock->sampleRate = SampRate;
FLACMetadataBlock->bitsPerSample = BPS;
FLACMetadataBlock->totalSamples = tsis; // total samples in stream
FLACMetadataBlock->audioDataLength = AuDaLength;
}
//----------------------------------------------------------------------------------------------------------------------
void FLACDecoderReset(){ // set var to default
m_status = DECODE_FRAME;
m_bitBuffer = 0;
m_bitBufferLen = 0;
}
//----------------------------------------------------------------------------------------------------------------------
int FLACFindSyncWord(unsigned char *buf, int nBytes) {
int i;
/* find byte-aligned syncword - need 13 matching bits */
for (i = 0; i < nBytes - 1; i++) {
if ((buf[i + 0] & 0xFF) == 0xFF && (buf[i + 1] & 0xF8) == 0xF8) {
FLACDecoderReset();
return i;
}
}
return -1;
}
//----------------------------------------------------------------------------------------------------------------------
int FLACFindOggSyncWord(unsigned char *buf, int nBytes){
int i;
/* find byte-aligned syncword - need 13 matching bits */
for (i = 0; i < nBytes - 1; i++) {
if ((buf[i + 0] & 0xFF) == 0xFF && (buf[i + 1] & 0xF8) == 0xF8) {
FLACDecoderReset();
log_i("FLAC sync found");
return i;
}
}
/* find byte-aligned OGG Magic - OggS */
for (i = 0; i < nBytes - 1; i++) {
if ((buf[i + 0] == 'O') && (buf[i + 1] == 'g') && (buf[i + 2] == 'g') && (buf[i + 3] == 'S')) {
FLACDecoderReset();
log_i("OggS found");
m_f_OggS_found = true;
return i;
}
}
return -1;
}
//----------------------------------------------------------------------------------------------------------------------
int FLACparseOggHeader(unsigned char *buf){
uint8_t i = 0;
uint8_t ssv = *(buf + i); // stream_structure_version
(void)ssv;
i++;
uint8_t htf = *(buf + i); // header_type_flag
(void)htf;
i++;
uint32_t tmp = 0; // absolute granule position
for (int j = 0; j < 4; j++) {
tmp += *(buf + j + i) << (4 -j - 1) * 8;
}
i += 4;
uint64_t agp = (uint64_t) tmp << 32;
for (int j = 0; j < 4; j++) {
agp += *(buf + j + i) << (4 -j - 1) * 8;
}
i += 4;
uint32_t ssnr = 0; // stream serial number
for (int j = 0; j < 4; j++) {
ssnr += *(buf + j + i) << (4 -j - 1) * 8;
}
i += 4;
uint32_t psnr = 0; // page sequence no
for (int j = 0; j < 4; j++) {
psnr += *(buf + j + i) << (4 -j - 1) * 8;
}
i += 4;
uint32_t pchk = 0; // page checksum
for (int j = 0; j < 4; j++) {
pchk += *(buf + j + i) << (4 -j - 1) * 8;
}
i += 4;
uint8_t psegm = *(buf + i);
i++;
uint8_t psegmBuff[256];
uint32_t pageLen = 0;
for(uint8_t j = 0; j < psegm; j++){
psegmBuff[j] = *(buf + i);
pageLen += psegmBuff[j];
i++;
}
return i;
}
//----------------------------------------------------------------------------------------------------------------------
int8_t FLACDecode(uint8_t *inbuf, int *bytesLeft, short *outbuf){
if(m_f_OggS_found == true){
m_f_OggS_found = false;
*bytesLeft -= FLACparseOggHeader(inbuf);
return ERR_FLAC_NONE;
}
if(m_status != OUT_SAMPLES){
m_rIndex = 0;
m_bytesAvail = (*bytesLeft);
m_inptr = inbuf;
}
if(m_status == DECODE_FRAME){ // Read a ton of header fields, and ignore most of them
if ((inbuf[0] == 'O') && (inbuf[1] == 'g') && (inbuf[2] == 'g') && (inbuf[3] == 'S')){
*bytesLeft -= 4;
m_f_OggS_found = true;
return ERR_FLAC_NONE;
}
uint32_t temp = readUint(8);
uint16_t sync = temp << 6 |readUint(6);
if (sync != 0x3FFE){
log_i("Sync code expected 0x3FFE but received %X", sync);
return ERR_FLAC_SYNC_CODE_NOT_FOUND;
}
readUint(1);
FLACFrameHeader->blockingStrategy = readUint(1);
FLACFrameHeader->blockSizeCode = readUint(4);
FLACFrameHeader->sampleRateCode = readUint(4);
FLACFrameHeader->chanAsgn = readUint(4);
FLACFrameHeader->sampleSizeCode = readUint(3);
if(!FLACMetadataBlock->numChannels){
if(FLACFrameHeader->chanAsgn == 0) FLACMetadataBlock->numChannels = 1;
if(FLACFrameHeader->chanAsgn == 1) FLACMetadataBlock->numChannels = 2;
if(FLACFrameHeader->chanAsgn > 7) FLACMetadataBlock->numChannels = 2;
}
if(FLACMetadataBlock->numChannels < 1) return ERR_FLAC_UNKNOWN_CHANNEL_ASSIGNMENT;
if(!FLACMetadataBlock->bitsPerSample){
if(FLACFrameHeader->sampleSizeCode == 1) FLACMetadataBlock->bitsPerSample = 8;
if(FLACFrameHeader->sampleSizeCode == 2) FLACMetadataBlock->bitsPerSample = 12;
if(FLACFrameHeader->sampleSizeCode == 4) FLACMetadataBlock->bitsPerSample = 16;
if(FLACFrameHeader->sampleSizeCode == 5) FLACMetadataBlock->bitsPerSample = 20;
if(FLACFrameHeader->sampleSizeCode == 6) FLACMetadataBlock->bitsPerSample = 24;
}
if(FLACMetadataBlock->bitsPerSample > 16) return ERR_FLAC_BITS_PER_SAMPLE_TOO_BIG;
if(FLACMetadataBlock->bitsPerSample < 8 ) return ERR_FLAG_BITS_PER_SAMPLE_UNKNOWN;
if(!FLACMetadataBlock->sampleRate){
if(FLACFrameHeader->sampleRateCode == 1) FLACMetadataBlock->sampleRate = 88200;
if(FLACFrameHeader->sampleRateCode == 2) FLACMetadataBlock->sampleRate = 176400;
if(FLACFrameHeader->sampleRateCode == 3) FLACMetadataBlock->sampleRate = 192000;
if(FLACFrameHeader->sampleRateCode == 4) FLACMetadataBlock->sampleRate = 8000;
if(FLACFrameHeader->sampleRateCode == 5) FLACMetadataBlock->sampleRate = 16000;
if(FLACFrameHeader->sampleRateCode == 6) FLACMetadataBlock->sampleRate = 22050;
if(FLACFrameHeader->sampleRateCode == 7) FLACMetadataBlock->sampleRate = 24000;
if(FLACFrameHeader->sampleRateCode == 8) FLACMetadataBlock->sampleRate = 32000;
if(FLACFrameHeader->sampleRateCode == 9) FLACMetadataBlock->sampleRate = 44100;
if(FLACFrameHeader->sampleRateCode == 10) FLACMetadataBlock->sampleRate = 48000;
if(FLACFrameHeader->sampleRateCode == 11) FLACMetadataBlock->sampleRate = 96000;
}
readUint(1);
temp = (readUint(8) << 24);
temp = ~temp;
uint32_t shift = 0x80000000; // Number of leading zeros
int8_t count = 0;
for(int i=0; i<32; i++){
if((temp & shift) == 0) {count++; shift >>= 1;}
else break;
}
count--;
for (int i = 0; i < count; i++) readUint(8);
m_blockSize = 0;
if (FLACFrameHeader->blockSizeCode == 1)
m_blockSize = 192;
else if (2 <= FLACFrameHeader->blockSizeCode && FLACFrameHeader->blockSizeCode <= 5)
m_blockSize = 576 << (FLACFrameHeader->blockSizeCode - 2);
else if (FLACFrameHeader->blockSizeCode == 6)
m_blockSize = readUint(8) + 1;
else if (FLACFrameHeader->blockSizeCode == 7)
m_blockSize = readUint(16) + 1;
else if (8 <= FLACFrameHeader->blockSizeCode && FLACFrameHeader->blockSizeCode <= 15)
m_blockSize = 256 << (FLACFrameHeader->blockSizeCode - 8);
else{
return ERR_FLAC_RESERVED_BLOCKSIZE_UNSUPPORTED;
}
if(m_blockSize > 8192){
log_e("Error: blockSize too big");
return ERR_FLAC_BLOCKSIZE_TOO_BIG;
}
if(FLACFrameHeader->sampleRateCode == 12)
readUint(8);
else if (FLACFrameHeader->sampleRateCode == 13 || FLACFrameHeader->sampleRateCode == 14){
readUint(16);
}
readUint(8);
m_status = DECODE_SUBFRAMES;
*bytesLeft = m_bytesAvail;
m_blockSizeLeft = m_blockSize;
return ERR_FLAC_NONE;
}
if(m_status == DECODE_SUBFRAMES){
// Decode each channel's subframe, then skip footer
int ret = decodeSubframes();
if(ret != 0) return ret;
m_status = OUT_SAMPLES;
}
if(m_status == OUT_SAMPLES){ // Write the decoded samples
// blocksize can be much greater than outbuff, so we can't stuff all in once
// therefore we need often more than one loop (split outputblock into pieces)
uint16_t blockSize;
static uint16_t offset = 0;
if(m_blockSize < outBuffSize + offset) blockSize = m_blockSize - offset;
else blockSize = outBuffSize;
for (int i = 0; i < blockSize; i++) {
for (int j = 0; j < FLACMetadataBlock->numChannels; j++) {
int val = FLACsubFramesBuff->samplesBuffer[j][i + offset];
if (FLACMetadataBlock->bitsPerSample == 8) val += 128;
outbuf[2*i+j] = val;
}
}
m_validSamples = blockSize * FLACMetadataBlock->numChannels;
offset += blockSize;
if(offset != m_blockSize) return GIVE_NEXT_LOOP;
offset = 0;
if(offset > m_blockSize) { log_e("offset has a wrong value"); }
}
alignToByte();
readUint(16);
m_bytesDecoded = *bytesLeft - m_bytesAvail;
// log_i("m_bytesDecoded %i", m_bytesDecoded);
// m_compressionRatio = (float)m_bytesDecoded / (float)m_blockSize * FLACMetadataBlock->numChannels * (16/8);
// log_i("m_compressionRatio % f", m_compressionRatio);
*bytesLeft = m_bytesAvail;
m_status = DECODE_FRAME;
return ERR_FLAC_NONE;
}
//----------------------------------------------------------------------------------------------------------------------
uint16_t FLACGetOutputSamps(){
int vs = m_validSamples;
m_validSamples=0;
return vs;
}
//----------------------------------------------------------------------------------------------------------------------
uint64_t FLACGetTotoalSamplesInStream(){
return FLACMetadataBlock->totalSamples;
}
//----------------------------------------------------------------------------------------------------------------------
uint8_t FLACGetBitsPerSample(){
return FLACMetadataBlock->bitsPerSample;
}
//----------------------------------------------------------------------------------------------------------------------
uint8_t FLACGetChannels(){
return FLACMetadataBlock->numChannels;
}
//----------------------------------------------------------------------------------------------------------------------
uint32_t FLACGetSampRate(){
return FLACMetadataBlock->sampleRate;
}
//----------------------------------------------------------------------------------------------------------------------
uint32_t FLACGetBitRate(){
if(FLACMetadataBlock->totalSamples){
float BitsPerSamp = (float)FLACMetadataBlock->audioDataLength / (float)FLACMetadataBlock->totalSamples * 8;
return ((uint32_t)BitsPerSamp * FLACMetadataBlock->sampleRate);
}
return 0;
}
//----------------------------------------------------------------------------------------------------------------------
uint32_t FLACGetAudioFileDuration() {
if(FLACGetSampRate()){
uint32_t afd = FLACGetTotoalSamplesInStream()/ FLACGetSampRate(); // AudioFileDuration
return afd;
}
return 0;
}
//----------------------------------------------------------------------------------------------------------------------
int8_t decodeSubframes(){
if(FLACFrameHeader->chanAsgn <= 7) {
for (int ch = 0; ch < FLACMetadataBlock->numChannels; ch++)
decodeSubframe(FLACMetadataBlock->bitsPerSample, ch);
}
else if (8 <= FLACFrameHeader->chanAsgn && FLACFrameHeader->chanAsgn <= 10) {
decodeSubframe(FLACMetadataBlock->bitsPerSample + (FLACFrameHeader->chanAsgn == 9 ? 1 : 0), 0);
decodeSubframe(FLACMetadataBlock->bitsPerSample + (FLACFrameHeader->chanAsgn == 9 ? 0 : 1), 1);
if(FLACFrameHeader->chanAsgn == 8) {
for (int i = 0; i < m_blockSize; i++)
FLACsubFramesBuff->samplesBuffer[1][i] = (
FLACsubFramesBuff->samplesBuffer[0][i] -
FLACsubFramesBuff->samplesBuffer[1][i]);
}
else if (FLACFrameHeader->chanAsgn == 9) {
for (int i = 0; i < m_blockSize; i++)
FLACsubFramesBuff->samplesBuffer[0][i] += FLACsubFramesBuff->samplesBuffer[1][i];
}
else if (FLACFrameHeader->chanAsgn == 10) {
for (int i = 0; i < m_blockSize; i++) {
long side = FLACsubFramesBuff->samplesBuffer[1][i];
long right = FLACsubFramesBuff->samplesBuffer[0][i] - (side >> 1);
FLACsubFramesBuff->samplesBuffer[1][i] = right;
FLACsubFramesBuff->samplesBuffer[0][i] = right + side;
}
}
else {
log_e("unknown channel assignment");
return ERR_FLAC_UNKNOWN_CHANNEL_ASSIGNMENT;
}
}
else{
log_e("Reserved channel assignment");
return ERR_FLAC_RESERVED_CHANNEL_ASSIGNMENT;
}
return ERR_FLAC_NONE;
}
//----------------------------------------------------------------------------------------------------------------------
int8_t decodeSubframe(uint8_t sampleDepth, uint8_t ch) {
int8_t ret = 0;
readUint(1);
uint8_t type = readUint(6);
int shift = readUint(1);
if (shift == 1) {
while (readUint(1) == 0)
shift++;
}
sampleDepth -= shift;
if(type == 0){ // Constant coding
int16_t s= readSignedInt(sampleDepth);
for(int i=0; i < m_blockSize; i++){
FLACsubFramesBuff->samplesBuffer[ch][i] = s;
}
}
else if (type == 1) { // Verbatim coding
for (int i = 0; i < m_blockSize; i++)
FLACsubFramesBuff->samplesBuffer[ch][i] = readSignedInt(sampleDepth);
}
else if (8 <= type && type <= 12){
ret = decodeFixedPredictionSubframe(type - 8, sampleDepth, ch);
if(ret) return ret;
}
else if (32 <= type && type <= 63){
ret = decodeLinearPredictiveCodingSubframe(type - 31, sampleDepth, ch);
if(ret) return ret;
}
else{
return ERR_FLAC_RESERVED_SUB_TYPE;
}
if(shift>0){
for (int i = 0; i < m_blockSize; i++){
FLACsubFramesBuff->samplesBuffer[ch][i] <<= shift;
}
}
return ERR_FLAC_NONE;
}
//----------------------------------------------------------------------------------------------------------------------
int8_t decodeFixedPredictionSubframe(uint8_t predOrder, uint8_t sampleDepth, uint8_t ch) {
uint8_t ret = 0;
for(uint8_t i = 0; i < predOrder; i++)
FLACsubFramesBuff->samplesBuffer[ch][i] = readSignedInt(sampleDepth);
ret = decodeResiduals(predOrder, ch);
if(ret) return ret;
coefs.clear();
if(predOrder == 0) coefs.resize(0);
if(predOrder == 1) coefs.push_back(1); // FIXED_PREDICTION_COEFFICIENTS
if(predOrder == 2){coefs.push_back(2); coefs.push_back(-1);}
if(predOrder == 3){coefs.push_back(3); coefs.push_back(-3); coefs.push_back(1);}
if(predOrder == 4){coefs.push_back(4); coefs.push_back(-6); coefs.push_back(4); coefs.push_back(-1);}
if(predOrder > 4) return ERR_FLAC_PREORDER_TOO_BIG; // Error: preorder > 4"
restoreLinearPrediction(ch, 0);
return ERR_FLAC_NONE;
}
//----------------------------------------------------------------------------------------------------------------------
int8_t decodeLinearPredictiveCodingSubframe(int lpcOrder, int sampleDepth, uint8_t ch){
int8_t ret = 0;
for (int i = 0; i < lpcOrder; i++)
FLACsubFramesBuff->samplesBuffer[ch][i] = readSignedInt(sampleDepth);
int precision = readUint(4) + 1;
int shift = readSignedInt(5);
coefs.resize(0);
for (uint8_t i = 0; i < lpcOrder; i++)
coefs.push_back(readSignedInt(precision));
ret = decodeResiduals(lpcOrder, ch);
if(ret) return ret;
restoreLinearPrediction(ch, shift);
return ERR_FLAC_NONE;
}
//----------------------------------------------------------------------------------------------------------------------
int8_t decodeResiduals(uint8_t warmup, uint8_t ch) {
int method = readUint(2);
if (method >= 2)
return ERR_FLAC_RESERVED_RESIDUAL_CODING; // Reserved residual coding method
uint8_t paramBits = method == 0 ? 4 : 5;
int escapeParam = (method == 0 ? 0xF : 0x1F);
int partitionOrder = readUint(4);
int numPartitions = 1 << partitionOrder;
if (m_blockSize % numPartitions != 0)
return ERR_FLAC_WRONG_RICE_PARTITION_NR; //Error: Block size not divisible by number of Rice partitions
int partitionSize = m_blockSize/ numPartitions;
for (int i = 0; i < numPartitions; i++) {
int start = i * partitionSize + (i == 0 ? warmup : 0);
int end = (i + 1) * partitionSize;
int param = readUint(paramBits);
if (param < escapeParam) {
for (int j = start; j < end; j++){
FLACsubFramesBuff->samplesBuffer[ch][j] = readRiceSignedInt(param);
}
} else {
int numBits = readUint(5);
for (int j = start; j < end; j++){
FLACsubFramesBuff->samplesBuffer[ch][j] = readSignedInt(numBits);
}
}
}
return ERR_FLAC_NONE;
}
//----------------------------------------------------------------------------------------------------------------------
void restoreLinearPrediction(uint8_t ch, uint8_t shift) {
for (int i = coefs.size(); i < m_blockSize; i++) {
int32_t sum = 0;
for (int j = 0; j < coefs.size(); j++){
sum += FLACsubFramesBuff->samplesBuffer[ch][i - 1 - j] * coefs[j];
}
FLACsubFramesBuff->samplesBuffer[ch][i] += (sum >> shift);
}
}
//----------------------------------------------------------------------------------------------------------------------