
#include <NDS.h>

#include <stdio.h>
#include <stdlib.h>

#include "_console.h"
#include "memtool.h"
#include "cp15.h"

#include "../../ipcex.h"

#include "arm9tcm.h"

#include "strpcm.h"

static volatile bool strpcmPause;

volatile bool strpcmRequestStop;

volatile bool strpcmRingEmptyFlag;
volatile u32 strpcmRingBufReadIndex;
volatile u32 strpcmRingBufWriteIndex;

s16 *strpcmRingLBuf,*strpcmRingRBuf;

static int strpcmVolume16;

static void strpcmUpdate(void);

// ------------------------------------------

#define CACHE_LINE_SIZE (32)

CODE_IN_ITCM void ins_DC_FlushRangeOverrun(void *v,u32 size)
{
  static void (*lp_DC_FlushRange)(void *base, u32 size)=DC_FlushRange;
  static void (*lp_DC_InvalidateRange)(void *base, u32 size)=DC_InvalidateRange;
  
  size&=~(CACHE_LINE_SIZE-1);
  size+=CACHE_LINE_SIZE;
  
//  if(v==NULL) return;
//  if(size==0) return;
  
  lp_DC_FlushRange(v,size);
  lp_DC_InvalidateRange(v,size);
}

CODE_IN_ITCM void InterruptHandler_IPC_SYNC(void)
{
  switch(IPCEX->IPCREQ){
    case IPCREQ_NULL: {
    } break;
    case IPCREQ_NextSoundData: {
      {
        static void (*lp_DC_FlushAll)(void)=DC_FlushAll;
        lp_DC_FlushAll();
      }
      
      strpcmUpdate();
      
      const u32 Samples=IPCEX->strpcmSamples;
      const u32 Channels=IPCEX->strpcmChannels;
      
      ins_DC_FlushRangeOverrun(IPCEX->strpcmLBuf,Samples*2);
      if(Channels==2) ins_DC_FlushRangeOverrun(IPCEX->strpcmRBuf,Samples*2);
      
      IPCEX->strpcmWriteRequest=0;
    } break;
  }
  
  IPCEX->IPCREQ=IPCREQ_NULL;
  
}

void InitInterrupts(void)
{
  irqInit();
  irqSet(IRQ_IPC_SYNC, (VoidFunctionPointer)InterruptHandler_IPC_SYNC);
  irqEnable(IRQ_IPC_SYNC);
  REG_IPC_SYNC=IPC_SYNC_IRQ_ENABLE;
}

void strpcmInit(void)
{
  strpcmPause=false;
  
  strpcmRequestStop=false;
  
  strpcmRingEmptyFlag=false;
  strpcmRingBufReadIndex=0;
  strpcmRingBufWriteIndex=0;
  
  strpcmRingLBuf=NULL;
  strpcmRingRBuf=NULL;
  
  strpcmVolume16=16;
}

void strpcmStart(bool FastStart,u32 SampleRate,u32 SamplePerBuf,u32 ChannelCount,u32 strpcmFormat)
{
  DC_FlushAll();
  while(IPCEX->strpcmControl!=strpcmControl_NOP){
//    DC_FlushAll();
//    swiWaitForVBlank();
  }
  
  strpcmRequestStop=false;
  strpcmPause=false;
  
  u32 Samples=SamplePerBuf;
  u32 RingSamples=Samples*strpcmRingBufCount;
  
  strpcmRingEmptyFlag=false;
  strpcmRingBufReadIndex=0;
  if(FastStart==false){
    strpcmRingBufWriteIndex=strpcmRingBufCount-1;
    }else{
    strpcmRingBufWriteIndex=1;
  }
  
  strpcmRingLBuf=(s16*)safemalloc(RingSamples*2);
  strpcmRingRBuf=(s16*)safemalloc(RingSamples*2);
  
  MemSet16DMA3(0,strpcmRingLBuf,RingSamples*2);
  MemSet16DMA3(0,strpcmRingRBuf,RingSamples*2);
  
  IPCEX->strpcmFreq=SampleRate;
  IPCEX->strpcmSamples=Samples;
  IPCEX->strpcmChannels=ChannelCount;
  IPCEX->strpcmFormat=strpcmFormat;
  
  // ------
  
  IPCEX->strpcmLBuf=(s16*)safemalloc(Samples*2);
  IPCEX->strpcmRBuf=(s16*)safemalloc(Samples*2);
/*
  IPCEX->strpcmLBuf=(s16*)IPCEX->strpcmLBufArr;
  IPCEX->strpcmRBuf=(s16*)IPCEX->strpcmRBufArr;
*/
  
  MemSet16DMA3(0,IPCEX->strpcmLBuf,Samples*2);
  MemSet16DMA3(0,IPCEX->strpcmRBuf,Samples*2);
  
  // ------
  
  DC_FlushAll();
  IPCEX->strpcmControl=strpcmControl_Play;
  
  DC_FlushAll();
  while(IPCEX->strpcmControl!=strpcmControl_NOP){
//    DC_FlushAll();
//    swiWaitForVBlank();
  }
}

void strpcmStop(void)
{
  strpcmRequestStop=true;
  
  DC_FlushAll();
  while(IPCEX->strpcmControl!=strpcmControl_NOP){
    DC_FlushAll();
  }
  
  IPCEX->strpcmControl=strpcmControl_Stop;
  
  DC_FlushAll();
  while(IPCEX->strpcmControl!=strpcmControl_NOP){
    DC_FlushAll();
  }
  
  strpcmRequestStop=false;
  strpcmPause=false;
  
  strpcmRingEmptyFlag=false;
  strpcmRingBufReadIndex=0;
  strpcmRingBufWriteIndex=0;
  
  if(strpcmRingLBuf!=NULL){
    safefree((void*)strpcmRingLBuf); strpcmRingLBuf=NULL;
  }
  if(strpcmRingRBuf!=NULL){
    safefree((void*)strpcmRingRBuf); strpcmRingRBuf=NULL;
  }
  
  IPCEX->strpcmFreq=0;
  IPCEX->strpcmSamples=0;
  IPCEX->strpcmChannels=0;
  
  if(IPCEX->strpcmLBuf!=NULL){
    safefree(IPCEX->strpcmLBuf); IPCEX->strpcmLBuf=NULL;
  }
  if(IPCEX->strpcmRBuf!=NULL){
    safefree(IPCEX->strpcmRBuf); IPCEX->strpcmRBuf=NULL;
  }
}

// ----------------------------------------------

static u32 DMAFIXSRC;

CODE_IN_ITCM static void ins_MemSet16DMA2(u16 v,void *dst,u32 len)
{
  if(false){
    len/=2;
    if(len==0) return;
    u16 *pdst16=(u16*)dst;
    for(u32 idx=0;idx<len;idx++){
      pdst16[idx]=v;
    }
    return;
  }
  
  DMAFIXSRC=(vu32)v+((vu32)v<<16);
  
  ins_DC_FlushRangeOverrun(&DMAFIXSRC,4);
  ins_DC_FlushRangeOverrun(dst,len);
  
  u8 *_dst=(u8*)dst;
  DMA2_SRC = (uint32)&DMAFIXSRC;
  
  DMA2_DEST = (uint32)_dst;
  DMA2_CR = DMA_ENABLE | DMA_SRC_FIX | DMA_DST_INC | DMA_16_BIT | (len>>1);
  while(DMA2_CR & DMA_BUSY);
}

CODE_IN_ITCM static void ins_MemCopy16DMA2(void *src,void *dst,u32 len)
{
  if(false){
    len/=2;
    if(len==0) return;
    u16 *psrc16=(u16*)src;
    u16 *pdst16=(u16*)dst;
    for(u32 idx=0;idx<len;idx++){
      pdst16[idx]=psrc16[idx];
    }
    return;
  }
  
  ins_DC_FlushRangeOverrun(src,len);
  ins_DC_FlushRangeOverrun(dst,len);
  
  u8 *_src=(u8*)src;
  u8 *_dst=(u8*)dst;
  
  DMA2_SRC = (uint32)_src;
  DMA2_DEST = (uint32)_dst;
  DMA2_CR = DMA_ENABLE | DMA_SRC_INC | DMA_DST_INC | DMA_16_BIT | (len>>1);
  while(DMA2_CR & DMA_BUSY);
}

CODE_IN_ITCM static void ins_MemCopy32swi256bit(void *src,void *dst,u32 len)
{
  if(false){
    ins_MemCopy16DMA2(src,dst,len);
    return;
  }
  swiFastCopy(src,dst,COPY_MODE_COPY | (len/4));
}

void strpcmUpdate(void)
{
//  while(DMA2_CR & DMA_BUSY);
//  while(DMA3_CR & DMA_BUSY);
  
  u32 Samples=IPCEX->strpcmSamples;
  const u32 Channels=IPCEX->strpcmChannels;
  
  s16 *ldst=IPCEX->strpcmLBuf;
  s16 *rdst=IPCEX->strpcmRBuf;
  
  if((ldst==NULL)||(rdst==NULL)) return;
//  _consolePrintf("%x,%x\n",ldst,rdst);
  
  if((strpcmRingLBuf==NULL)||(strpcmRingRBuf==NULL)){
    ins_MemSet16DMA2(0,ldst,Samples*2);
    if(Channels==2) ins_MemSet16DMA2(0,rdst,Samples*2);
    return;
  }
  
  bool IgnoreFlag=false;
  
  u32 CurIndex=(strpcmRingBufReadIndex+1) & strpcmRingBufBitMask;
  
  s16 *lsrc=&strpcmRingLBuf[Samples*CurIndex];
  s16 *rsrc=&strpcmRingRBuf[Samples*CurIndex];
  
  if(strpcmPause==true) IgnoreFlag=true;
//  if(strpcmRequestStop==true) IgnoreFlag=true;
  
  if(CurIndex==strpcmRingBufWriteIndex){
    strpcmRingEmptyFlag=true;
    IgnoreFlag=true;
  }
  
  u32 bytelen=Samples*2;
  
  if(IgnoreFlag==true){
    ins_MemSet16DMA2(0,ldst,bytelen);
    if(Channels==2) ins_MemSet16DMA2(0,rdst,bytelen);
    return;
  }
  
  ins_MemCopy16DMA2(lsrc,ldst,bytelen);
  if(Channels==2) ins_MemCopy16DMA2(rsrc,rdst,bytelen);
  
/*
  if((bytelen&31)!=0){
    ins_MemCopy16DMA2(lsrc,ldst,bytelen);
    if(Channels==2) ins_MemCopy16DMA2(rsrc,rdst,bytelen);
    }else{
    ins_MemCopy32swi256bit(lsrc,ldst,bytelen);
    if(Channels==2) ins_MemCopy32swi256bit(rsrc,rdst,bytelen);
  }
*/
  
  strpcmRingBufReadIndex=CurIndex;
}

void strpcmSetVolume16(int v)
{
  if(v<0) v=0;
  if(64<v) v=64;
  
  strpcmVolume16=v;
  
  IPCEX->strpcmVolume16=strpcmVolume16;
}

int strpcmGetVolume16(void)
{
  return(strpcmVolume16);
}

void strpcmSetPause(bool v)
{
  strpcmPause=v;
}

bool strpcmGetPause(void)
{
  return(strpcmPause);
}

