
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <NDS.h>

#include "_console.h"
#include "_const.h"

#include "memtool.h"

//static u32 DMAFIXSRC;

#define CACHE_LINE_SIZE (32)

#define MEMCHK_COPY(align,p1,p2,p3) { \
  if((p1==NULL)||(p2==NULL)||(p3==0)||(((u32)p1&align)!=0)||(((u32)p2&align)!=0)){ \
    _consolePrintf("COPY. Hooked memory address error. %s%d (%d) p1=0x%x,p2=0x%x,p3=0x%x\n",__FILE__,__LINE__,align,p1,p2,p3); \
    ShowLogHalt(); \
  } \
}

#define MEMCHK_SET(align,p1,p2,p3) { \
  if((p2==NULL)||(p3==0)||(((u32)p2&align)!=0)){ \
    _consolePrintf("SET. Hooked memory address error. %s%d (%d) p1=0x%x,p2=0x%x,p3=0x%x\n",__FILE__,__LINE__,align,p1,p2,p3); \
    ShowLogHalt(); \
  } \
}

void MemCopy8CPU(void *src,void *dst,u32 len)
{MEMCHK_COPY(0,src,dst,len);
  if((len&1)==0){
    if( (((u32)src&1)==0) && (((u32)dst&1)==0) ){
      MemCopy16CPU(src,dst,len);
      return;
    }
  }
  
  len>>=0;
  if(len==0) return;
  
  u8 *_src=(u8*)src;
  u8 *_dst=(u8*)dst;
  
  for(u32 idx=0;idx<len;idx++){
    _dst[idx]=_src[idx];
  }
}

void MemCopy16CPU(void *src,void *dst,u32 len)
{MEMCHK_COPY(1,src,dst,len);
  if((len&3)==0){
    if( (((u32)src&3)==0) && (((u32)dst&3)==0) ){
      MemCopy32CPU(src,dst,len);
      return;
    }
  }
  
  len>>=1;
  if(len==0) return;
  
  u16 *_src=(u16*)src;
  u16 *_dst=(u16*)dst;
  
  for(u32 idx=0;idx<len;idx++){
    _dst[idx]=_src[idx];
  }
  return;
}

void __attribute__((noinline)) MemCopy32CPU(void *src,void *dst,u32 len)
{MEMCHK_COPY(3,src,dst,len);
  asm volatile(
    "movs %2,%2,lsr #2 \n"
    "beq MemCopy32CPU_32bitEnd \n"
    
    "ands r3,%2,#3 \n"
    "beq MemCopy32CPU_4x32bitLoop \n"
    
    "MemCopy32CPU_32bitLoop: \n"
    "ldr r4,[%0],#4 \n"
    "str r4,[%1],#4 \n"
    "sub %2,#1 \n"
    "subs r3,#1 \n"
    "bne MemCopy32CPU_32bitLoop \n"
    
    "cmps %2,#0 \n"
    "beq MemCopy32CPU_32bitEnd \n"
    
    "MemCopy32CPU_4x32bitLoop: \n"
    "ldmia %0!,{r3,r4,r5,r6} \n"
    "stmia %1!,{r3,r4,r5,r6} \n"
    "subs %2,#4 \n"
    "bne MemCopy32CPU_4x32bitLoop \n"
    
    "MemCopy32CPU_32bitEnd: \n"
    
    : "+r"(src), "+r"(dst), "+r"(len) :
    : "r3","r4","r5","r6"
  );
}

void MemSet8CPU(u8 v,void *dst,u32 len)
{MEMCHK_SET(0,v,dst,len);
  if(len<1) return;
  
  u8 *_dst=(u8*)dst;
  
  for(u32 cnt=0;cnt<len;cnt++){
    _dst[cnt]=v;
  }
}

void MemSet16CPU(u16 v,void *dst,u32 len)
{MEMCHK_SET(1,v,dst,len);
  len>>=1;
  if(len==0) return;
  
  u16 *_dst=(u16*)dst;
  
  for(u32 cnt=0;cnt<len;cnt++){
    _dst[cnt]=v;
  }
}

void MemSet32CPU(u32 v,void *dst,u32 len)
{MEMCHK_SET(3,v,dst,len);
  len>>=2;
  if(len==0) return;
  
  u32 *_dst=(u32*)dst;
  
  for(u32 cnt=0;cnt<len;cnt++){
    _dst[cnt]=v;
  }
}

void MemCopy16DMA3(void *src,void *dst,u32 len)
{MEMCHK_COPY(1,src,dst,len);
  MemCopy16CPU(src,dst,len);
}

void MemCopy32DMA3(void *src,void *dst,u32 len)
{MEMCHK_COPY(3,src,dst,len);
  MemCopy32CPU(src,dst,len);
}

void MemSet8DMA3(u8 v,void *dst,u32 len)
{MEMCHK_SET(0,v,dst,len);
  MemSet8CPU(v,dst,len);
}

void MemSet16DMA3(u16 v,void *dst,u32 len)
{MEMCHK_SET(1,v,dst,len);
  MemSet16CPU(v,dst,len);
}

void MemSet32DMA3(u32 v,void *dst,u32 len)
{MEMCHK_SET(3,v,dst,len);
  MemSet32CPU(v,dst,len);
}

void MemCopy16DMA2(void *src,void *dst,u32 len)
{MEMCHK_COPY(1,src,dst,len);
  MemCopy16CPU(src,dst,len);
}

void MemSet16DMA2(u16 v,void *dst,u32 len)
{MEMCHK_SET(1,v,dst,len);
  MemSet16CPU(v,dst,len);
}

void MemSet32swi256bit(u32 v,void *dst,u32 len)
{MEMCHK_SET(3,v,dst,len);
  swiFastCopy(&v,dst,COPY_MODE_WORD | COPY_MODE_FILL | (len/4));
}

void MemCopy32swi256bit(void *src,void *dst,u32 len)
{MEMCHK_COPY(3,src,dst,len);
  swiFastCopy(src,dst,COPY_MODE_WORD | COPY_MODE_COPY | (len/4));
}

// ----------------------------------------------

const char *pmalloctext="";

typedef struct {
  u32 adr,size;
  bool locked;
  const char *ptext;
} Tatype;

#define atypecount (512)
static Tatype atype[atypecount];

void atype_init(void)
{
  for(u32 idx=0;idx<atypecount;idx++){
    Tatype *patype=&atype[idx];
    patype->adr=0;
    patype->size=0;
    patype->locked=false;
    patype->ptext="";
  }
}

void atype_showallocated(void)
{
  _consolePrint("Allocated memory information.\n");
  
  for(u32 idx=0;idx<atypecount;idx++){
    Tatype *patype=&atype[idx];
    if(patype->adr!=0){
      _consolePrintf("idx=%d adr=0x%08x size=%d %s ",idx,patype->adr,patype->size,patype->ptext);
      if(patype->locked==false){
        _consolePrint("\n");
        }else{
        _consolePrint("locked.\n");
      }
    }
  }
  
  _consolePrint("------------------\n");
}

void safemalloc_halt(void)
{
  atype_showallocated();
  ShowLogHalt();
}

void atype_lockall(void)
{
  for(u32 idx=0;idx<atypecount;idx++){
    Tatype *patype=&atype[idx];
    if(patype->adr!=0) patype->locked=true;
  }
}

void atype_set(u32 adr,u32 size)
{
  for(u32 idx=0;idx<atypecount;idx++){
    Tatype *patype=&atype[idx];
    if(patype->adr==0){
      patype->adr=adr;
      patype->size=size;
      patype->ptext=pmalloctext;
      pmalloctext="";
      return;
    }
  }
  _consolePrint("Fatal error! atype array overflow.\n");
  safemalloc_halt();
}

void atype_clear(u32 adr)
{
  for(u32 idx=0;idx<atypecount;idx++){
    Tatype *patype=&atype[idx];
    if(patype->adr==adr){
      if(patype->locked==true){
        _consolePrintf("Fatal error. This addres is locked. (0x%08x)\n",adr);
        safemalloc_halt();
        return;
      }
      patype->adr=0;
      patype->size=0;
      return;
    }
  }
  _consolePrintf("Fatal error! can not found clear atype array. (0x%08x)\n",adr);
  safemalloc_halt();
}

u32 atype_getsize(u32 adr)
{
  for(u32 idx=0;idx<atypecount;idx++){
    Tatype *patype=&atype[idx];
    if(patype->adr==adr){
      return(patype->size);
    }
  }
  _consolePrintf("Fatal error! not found atype adr. (0x%08x)\n",adr);
  safemalloc_halt();
  return(0);
}

void atype_checkmemoryleak(void)
{
  bool haltflag=false;
  
  for(u32 idx=0;idx<atypecount;idx++){
    Tatype *patype=&atype[idx];
    if((patype->locked==false)&&(patype->adr!=0)){
      haltflag=true;
      u32 size=patype->size;
      u8 *pbuf=(u8*)patype->adr;
      _consolePrint("Memory leak detected.\n");
      _consolePrintf("adr=0x%08x, size=%d\n %s",pbuf,size,patype->ptext);
      for(u32 idx=0;idx<8;idx++){
        _consolePrintf("%02x,",pbuf[idx]);
      }
      for(u32 idx=0;idx<8;idx++){
        if((0x20<=(u8)pbuf[idx])&&((u8)pbuf[idx]<0xff)){
          _consolePrintf("%c",pbuf[idx]);
          }else{
          _consolePrintf("_");
        }
      }
      _consolePrint("\n");
    }
  }
  
  if(haltflag==true){
    _consolePrint("Halt.\n");
    ShowLogHalt();
  }
}

void atype_checkoverrange(void)
{
  bool haltflag=false;
  
  for(u32 idx=0;idx<atypecount;idx++){
    Tatype *patype=&atype[idx];
    if(patype->adr!=0){
      u32 size=patype->size;
      u8 *pbuf=(u8*)patype->adr;
      
      if((pbuf[-8]!=0xa8)||(pbuf[-7]!=0xa7)||(pbuf[-6]!=0xa6)||(pbuf[-5]!=0xa5)||(pbuf[-4]!=0xa4)||(pbuf[-3]!=0xa3)||(pbuf[-2]!=0xa2)||(pbuf[-1]!=0xa1)||
         (pbuf[size+0]!=0xb0)||(pbuf[size+1]!=0xb1)||(pbuf[size+2]!=0xb2)||(pbuf[size+3]!=0xb3)||(pbuf[size+4]!=0xb4)||(pbuf[size+5]!=0xb5)||(pbuf[size+6]!=0xb6)||(pbuf[size+7]!=0xb7)){
        haltflag=true;
        _consolePrint("Memory check error. Ignore writing code?\n");
        _consolePrintf("adr=0x%08x, size=%d\n %s",pbuf,size,patype->ptext);
        for(u32 idx=8;idx>0;idx--){
          _consolePrintf("%02x,",pbuf[-idx]);
        }
        _consolePrintf("\n");
        for(u32 idx=0;idx<8;idx++){
          _consolePrintf("%02x,",pbuf[idx]);
        }
        _consolePrintf("\n");
        for(u32 idx=0;idx<8;idx++){
          _consolePrintf("%02x,",pbuf[size+idx]);
        }
        _consolePrint("\n");
      }
    }
  }
  
  if(haltflag==true){
    _consolePrint("Halt.\n");
    ShowLogHalt();
  }
}

bool (*safemalloc_RequestFreeArea)(void)=NULL;

void *safemalloc(int size)
{
  atype_checkoverrange();
  
//  return(malloc(size));
  
  if(size<=0) return(NULL);
  
  void *ptr;
  u32 adr;
  
  while(1){
    ptr=malloc(size+(32*1024)); // 32kb͕Kc
    if(ptr!=NULL) break;
    if(safemalloc_RequestFreeArea==NULL){
      _consolePrintf("malloc(%dbyte) fail. safemalloc_RequestFreeArea==NULL\n",size);
      safemalloc_halt();
    }
    if(safemalloc_RequestFreeArea()==false){
      _consolePrintf("malloc(%dbyte) fail. safemalloc_RequestFreeArea result is false.\n",size);
      safemalloc_halt();
    }
  }
  
  if(ptr==NULL){
    _consolePrintf("malloc(%d) fail NULL\n",size);
    safemalloc_halt();
    return(NULL);
  }
  
  adr=(u32)ptr;
  
  if((adr&3)!=0){ // 4byteACgĂȂ
    _consolePrintf("malloc(%d) fail not 4bytealian\n",size);
    safemalloc_halt();
    return(NULL);
  }
  
  if(adr<0x02000000){ // 擪AhXCO
    _consolePrintf("malloc(%d) fail adr<MemArea\n",size);
    safemalloc_halt();
    return(NULL);
  }
  
  if((0x02000000+(4*1024*1024))<=(adr+size)){ // I[AhXCゾ
    _consolePrintf("malloc(%d) fail MemArea<adr+size\n",size);
    safemalloc_halt();
    return(NULL);
  }
  
  // 64kb]mۂ蒼
  
  void *checkedptr=ptr;
  
  free(ptr);
  ptr=malloc(size+(8*2)); // 擪OƏI[ɌR[h
  
  if(ptr==NULL){
    _consolePrintf("malloc(%d) fail reallocerror-1.%x,%x==NULL\n",size,(u32)checkedptr,(u32)ptr);
  }
  
  ptr=(void*)((u32)ptr+8);
  
  adr=(u32)ptr;
  
/*
  if(checkedptr!=ptr){ // `FbNς݂̎擾|C^ƈ
    free(ptr);
    _consolePrintf("malloc(%d) fail reallocerror0.%x!=%x\n",size,(u32)checkedptr,(u32)ptr);
    safemalloc_halt();
    return(NULL);
  }
*/
  
  if((adr&3)!=0){ // 4byteACgĂȂ
    free(ptr);
    _consolePrintf("malloc(%d) fail reallocerror1.%x!=%x\n",size,(u32)checkedptr,(u32)ptr);
    safemalloc_halt();
    return(NULL);
  }
  
  if(adr<0x02000000){ // 擪AhXCO
    free(ptr);
    _consolePrintf("malloc(%d) fail reallocerror2.%x!=%x\n",size,(u32)checkedptr,(u32)ptr);
    safemalloc_halt();
    return(NULL);
  }
  
  if((0x02000000+(4*1024*1024))<=(adr+size)){ // I[AhXCゾ
    free(ptr);
    _consolePrintf("malloc(%d) fail reallocerror3.%x!=%x\n",size,(u32)checkedptr,(u32)ptr);
    safemalloc_halt();
    return(NULL);
  }
  
//  _consolePrintf("malloc(%d)=0x%x\n",size,ptr);
  
  atype_set((u32)ptr,size);
  
  {
    u8 *pbuf=(u8*)ptr;
    pbuf[-8]=0xa8;
    pbuf[-7]=0xa7;
    pbuf[-6]=0xa6;
    pbuf[-5]=0xa5;
    pbuf[-4]=0xa4;
    pbuf[-3]=0xa3;
    pbuf[-2]=0xa2;
    pbuf[-1]=0xa1;
    pbuf[size+0]=0xb0;
    pbuf[size+1]=0xb1;
    pbuf[size+2]=0xb2;
    pbuf[size+3]=0xb3;
    pbuf[size+4]=0xb4;
    pbuf[size+5]=0xb5;
    pbuf[size+6]=0xb6;
    pbuf[size+7]=0xb7;
  }
  
//  _consolePrintf("[%x]",(u32)ptr);
  return(ptr);

}

void *safemalloc_nocheck(int size)
{
  atype_checkoverrange();
  
  if(size<=0) return(NULL);
  
  void *ptr;
  u32 adr;
  
  ptr=malloc(size+(64*1024)); // 64kb͕Kc
  
  if(ptr==NULL) return(NULL);
  
  adr=(u32)ptr;
  
  if((adr&3)!=0){ // 4byteACgĂȂ
    _consolePrintf("malloc(%d) fail not 4bytealian\n",size);
    safemalloc_halt();
    return(NULL);
  }
  
  if(adr<0x02000000){ // 擪AhXCO
    _consolePrintf("malloc(%d) fail adr<MemArea\n",size);
    safemalloc_halt();
    return(NULL);
  }
  
  if((0x02000000+(4*1024*1024))<=(adr+size)){ // I[AhXCゾ
    _consolePrintf("malloc(%d) fail MemArea<adr+size\n",size);
    safemalloc_halt();
    return(NULL);
  }
  
  // 64kb]mۂ蒼
  
  void *checkedptr=ptr;
  
  free(ptr);
  ptr=malloc(size+(8*2)); // 擪OƏI[ɌR[h
  
  if(ptr==NULL){
    _consolePrintf("malloc(%d) fail reallocerror-1.%x,%x==NULL\n",size,(u32)checkedptr,(u32)ptr);
  }
  
  ptr=(void*)((u32)ptr+8);
  
  adr=(u32)ptr;
  
/*
  if(checkedptr!=ptr){ // `FbNς݂̎擾|C^ƈ
    free(ptr);
    _consolePrintf("malloc(%d) fail reallocerror0.%x!=%x\n",size,(u32)checkedptr,(u32)ptr);
    safemalloc_halt();
    return(NULL);
  }
*/
  
  if((adr&3)!=0){ // 4byteACgĂȂ
    free(ptr);
    _consolePrintf("malloc(%d) fail reallocerror1.%x!=%x\n",size,(u32)checkedptr,(u32)ptr);
    safemalloc_halt();
    return(NULL);
  }
  
  if(adr<0x02000000){ // 擪AhXCO
    free(ptr);
    _consolePrintf("malloc(%d) fail reallocerror2.%x!=%x\n",size,(u32)checkedptr,(u32)ptr);
    safemalloc_halt();
    return(NULL);
  }
  
  if((0x02000000+(4*1024*1024))<=(adr+size)){ // I[AhXCゾ
    free(ptr);
    _consolePrintf("malloc(%d) fail reallocerror3.%x!=%x\n",size,(u32)checkedptr,(u32)ptr);
    safemalloc_halt();
    return(NULL);
  }
  
  atype_set((u32)ptr,size);
  
  {
    u8 *pbuf=(u8*)ptr;
    pbuf[-8]=0xa8;
    pbuf[-7]=0xa7;
    pbuf[-6]=0xa6;
    pbuf[-5]=0xa5;
    pbuf[-4]=0xa4;
    pbuf[-3]=0xa3;
    pbuf[-2]=0xa2;
    pbuf[-1]=0xa1;
    pbuf[size+0]=0xb0;
    pbuf[size+1]=0xb1;
    pbuf[size+2]=0xb2;
    pbuf[size+3]=0xb3;
    pbuf[size+4]=0xb4;
    pbuf[size+5]=0xb5;
    pbuf[size+6]=0xb6;
    pbuf[size+7]=0xb7;
  }
  
//  _consolePrintf("[%x]",(u32)ptr);
  return(ptr);

}

void safefree(void *ptr)
{
  atype_checkoverrange();
  
//  _consolePrintf("free(0x%x)\n",ptr);
  
  if(ptr==NULL){
    _consolePrint("safefree Request NullPointer.\n");
    safemalloc_halt();
    return;
  }
  
  {
    u32 size=atype_getsize((u32)ptr);
    u8 *pbuf=(u8*)ptr;
    
    if((pbuf[-8]!=0xa8)||(pbuf[-7]!=0xa7)||(pbuf[-6]!=0xa6)||(pbuf[-5]!=0xa5)||(pbuf[-4]!=0xa4)||(pbuf[-3]!=0xa3)||(pbuf[-2]!=0xa2)||(pbuf[-1]!=0xa1)||
       (pbuf[size+0]!=0xb0)||(pbuf[size+1]!=0xb1)||(pbuf[size+2]!=0xb2)||(pbuf[size+3]!=0xb3)||(pbuf[size+4]!=0xb4)||(pbuf[size+5]!=0xb5)||(pbuf[size+6]!=0xb6)||(pbuf[size+7]!=0xb7)){
      _consolePrint("Memory check error. Ignore writing code?\n");
      _consolePrintf("adr=0x%08x, size=%d\n",pbuf,size);
      for(u32 idx=8;idx>0;idx--){
        _consolePrintf("%02x,",pbuf[-idx]);
      }
      _consolePrint("\n");
      for(u32 idx=0;idx<24;idx++){
        _consolePrintf("%02x,",pbuf[size+idx-8]);
      }
      _consolePrint("\n");
      safemalloc_halt();
    }
    
  }
  
  atype_clear((u32)ptr);
  
  ptr=(void*)((u32)ptr-8);
  free(ptr);
}

bool testmalloc(int size)
{
  if(size<=0) return(false);
  
  void *ptr;
  u32 adr;
  
  ptr=malloc(size+(64*1024)); // 64kb͕Kc
  
  if(ptr==NULL) return(false);
  
  adr=(u32)ptr;
  free(ptr);
  
  if((adr&3)!=0){ // 4byteACgĂȂ
    return(false);
  }
  
  if((adr+size)<0x02000000){ // 擪AhXCO
    return(false);
  }
  
  if((0x02000000+(4*1024*1024))<=adr){ // I[AhXCゾ
    return(false);
  }
  
  return(true);
}

#define PrintFreeMem_Seg (8*128)

void PrintFreeMem(void)
{
  s32 i;
  u32 FreeMemSize=0;
  
  for(i=1*PrintFreeMem_Seg;i<4096*1024;i+=PrintFreeMem_Seg){
    if(testmalloc(i)==false) break;
    FreeMemSize=i;
  }
  
  _consolePrintf("FreeMem=%dbyte    \n",FreeMemSize);
}

u32 GetFreeMem(void)
{
  s32 i;
  u32 FreeMemSize=0;
  
  for(i=1*PrintFreeMem_Seg;i<4096*1024;i+=PrintFreeMem_Seg){
    if(testmalloc(i)==false) break;
    FreeMemSize=i;
  }
  
  return(FreeMemSize);
}

void PrintAccuracyFreeMem(void)
{
  PrintFreeMem();
  return;
  
  const u32 maxsize=4*1024*1024;
  const u32 segsize=16*1024;
  const u32 count=maxsize/segsize;
  u32 ptrs[count];
  
  u32 FreeMemSize=0;
  
  for(u32 idx=0;idx<count;idx++){
    u32 size=maxsize-(segsize*idx);
    ptrs[idx]=(u32)malloc(size);
    if(ptrs[idx]!=0) FreeMemSize+=size;
  }
  
  _consolePrintf("AccuracyFreeMem=%dbyte    \n",FreeMemSize);
  
  for(u32 idx=0;idx<count;idx++){
    if(ptrs[idx]!=0){
      free((void*)ptrs[idx]); ptrs[idx]=0;
    }
  }
}

