/*

Pentium-4/1.7, Windows 2000, VS 6.0, 80 GB HD, 7200 rpm        Athlon/0.7, Linux 2.2.17, gcc 2.95.3, 60 GB HD, 5400 rpm
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~        ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                               222.9x     normal decode (from Cache)                                              156.8x
                               631.1x     decode with --scale 0 (from Cache)                                      493.0x
3600.0 s :    1.1204 ms =  3213228.3x     FastCached, notoptimized                3600.0 s :    1.0078 ms =  3571982.0x
3600.0 s :    0.4342 ms =  8290771.0x     FastCached, Addition optimized          3600.0 s :    0.1972 ms = 18253801.0x
3599.7 s : 1417.5366 ms =     2539.4x     FAT32/ext2,  fseek/fread                1464.3 s :  181.8882 ms =     8050.7x
3599.7 s : 1242.3945 ms =     2897.4x     FAT32/ext2,  lseek/read                 1464.3 s :  109.3096 ms =    13396.1x
3599.7 s :  180.8211 ms =    19907.4x     FAT32/ext2,  fread                      1464.3 s :  101.8035 ms =    14383.8x
3599.7 s :  159.8485 ms =    22519.3x     FAT32/ext2,  read                       1464.3 s :  101.4027 ms =    14440.6x
3599.7 s : 1431.0977 ms =     2515.3x     NTFS/reiser, fseek/fread                1464.3 s :  183.1137 ms =     7996.8x
3599.7 s : 1251.7135 ms =     2875.8x     NTFS/reiser, lseek/read                 1464.3 s :  127.6082 ms =    11475.1x
3599.7 s :  186.2180 ms =    19330.4x     NTFS/reiser, fread                      1464.3 s :  103.0833 ms =    14205.2x
3599.7 s :  163.7307 ms =    21985.3x     NTFS/reiser, read                       1464.3 s :  102.6841 ms =    14260.4x
3599.7 s :22066.5936 ms =      163.1x     NTFS/reiser, uncached, fseek/fread    1464.3 s : 1671.0040 ms =      876.3x
3599.7 s :23131.4043 ms =      155.6x     NTFS/reiser, uncached, lseek/read     1464.3 s : 1519.3631 ms =      963.8x
                                          NTFS/reiser, uncached, fread          1464.3 s : 1515.6563 ms =      966.1x
3599.7 s : 9520.1631 ms =      378.1x     NTFS/reiser, uncached, read           1464.3 s : 1516.3300 ms =      965.7x
                                          FAT32, uncached, fseek/fread
                                          FAT32, uncached, lseek/read
                                          FAT32, uncached, fread
                                          FAT32, uncached, read

*/

#include "mppdec.h"


#ifdef _WIN32
Int64_t  __rdtscll ( void )
{
    __asm { rdtsc };
}
# define rdtscll(__var)     (__var = __rdtscll() )
#else
# include <asm/msr.h>
#endif


#ifdef _WIN32
# define TESTDATEI1       "D:\\AUDIO\\MIKE OLDFIELD\\AMAROK -- [01] Amarok.mpc"         // FAT32
# define TESTDATEI2       "C:\\AUDIO\\AMAROK.mpc"                                       // NTFS
# define TESTDATEIFRAMES  137800
# define FLUSHCMD         "copy /B \"D:\\AUDIO\\Tangerine Dream\\*\" nul 1> nul"        // should read RAM size of data
# define RAMSIZE          512                                                           // RAM size in MByte
#else
# define TESTDATEI1       "CD.mpc"                                                      // ext2
# define TESTDATEI2       "/Archive/CD.mpc"                                             // reiserFS
# define TESTDATEIFRAMES  56056
# define FLUSHCMD         "cat /Archive/Audio/Sting/Nada*.pac > /dev/null"              // should read RAM size of data
# define RAMSIZE          128                                                           // RAM size in MByte
#endif

static void
flushing ( void )
{
    size_t  len = 1048576L * RAMSIZE;
    long*   p;
    int     i;
    int     j;
    time_t  t1;
    time_t  t2;

    fprintf ( stderr, "Flushing system cache ...   ");
    p = malloc ( len );
    for ( i = 0; i <= 64; i++ ) {
        fprintf ( stderr, "\b\b%2u", i );
        time (&t1);
        for ( j = 0; j < len/256*i; j += 1024 )
            p[j] = i;
        time (&t2);
        if (t2-t1 > 8)
            break;
    }
    free (p);
    for ( i = 0; i < 2; i++ ) {
        fprintf ( stderr, ".", i );
        system  ( FLUSHCMD );
    }
    p = malloc ( len );
    fprintf ( stderr, "  ");
    for ( i = 0; i <= 64; i++ ) {
        fprintf ( stderr, "\b\b%2u", i );
        time (&t1);
        for ( j = 0; j < len/256*i; j += 1024 )
            p[j] = i;
        time (&t2);
        if (t2-t1 > 8)
            break;
    }
    free (p);
    fprintf ( stderr, "\n\n");
}



unsigned int  mask [33] = {
    0x00000000, 0x00000001, 0x00000003, 0x00000007,
    0x0000000F, 0x0000001F, 0x0000003F, 0x0000007F,
    0x000000FF, 0x000001FF, 0x000003FF, 0x000007FF,
    0x00000FFF, 0x00001FFF, 0x00003FFF, 0x00007FFF,
    0x0000FFFF, 0x0001FFFF, 0x0003FFFF, 0x0007FFFF,
    0x000FFFFF, 0x001FFFFF, 0x003FFFFF, 0x007FFFFF,
    0x00FFFFFF, 0x01FFFFFF, 0x03FFFFFF, 0x07FFFFFF,
    0x0FFFFFFF, 0x1FFFFFFF, 0x3FFFFFFF, 0x7FFFFFFF,
    0xFFFFFFFF
};

#define MEMSIZE   8192
#define MEMSIZE2  (MEMSIZE/2)
#define MEMMASK   (MEMSIZE-1)


unsigned short  tab [137812];
unsigned long   Speicher [MEMSIZE];
unsigned long   dword;
unsigned int    Zaehler;
unsigned int    pos;


static Uint32_t
Bitstream_read ( Int bits )
{
    unsigned int  out = dword;

    pos += bits;

    if ( pos < 32 ) {
        out >>= 32 - pos;
    }
    else {
        dword = Speicher [Zaehler = (Zaehler+1) & MEMMASK];
        pos  -= 32;
        if ( pos ) {
            out <<= pos;
            out  |= dword >> (32-pos);
        }
    }

    return out & mask [bits];
}


unsigned long
test1 ( unsigned short* tab, unsigned int len )
{
    unsigned long  sum = 0;
    unsigned int   i;

    for ( i = 0; i < len; i++ )
        if ( tab[i] != 0 )
            sum += tab [i];
        else
            printf ("%u\n", i );
    return sum;
}


unsigned long
test2 ( unsigned short* tab, unsigned int len )
{
    unsigned long   sum1 = 0;
    unsigned long   sum2 = 0;
    int             i;
    unsigned long*  p = (unsigned long*) tab;

    i = len >> 1;

    for ( ; i--; p++ ) {
        sum1 += *p;
        sum2 += *p >> 16;
    }

    sum2 += (sum1 - (sum2 << 16));

    if (len & 1)
        sum2 += tab[len-1];

    return sum2;
}


static void
Helper1 ( FILE* fp, unsigned long fpos )
{
    fseek ( fp, (fpos>>5) * 4, SEEK_SET );
    fread ( Speicher, sizeof(int), 2, fp );
    dword = Speicher [ Zaehler = 0];
    pos   = fpos & 31;
}


unsigned long
test3 ( FILE* fp, unsigned int len )
{
    unsigned long  fpos = 200;
    unsigned int   i;

    for ( i = 0; i < len; i++ ) {
        Helper1 ( fp, fpos );
        fpos += 20 + Bitstream_read (20);
    }
    return fpos;
}


static void
Helper2 ( int fd, unsigned long fpos )
{
    lseek ( fd, (fpos>>5) * 4, SEEK_SET );
    read ( fd, Speicher, sizeof(int)*2 );
    dword = Speicher [ Zaehler = 0];
    pos   = fpos & 31;
}


unsigned long
test4 ( FILE* fp, unsigned int len )
{
    unsigned long  fpos = 200;
    unsigned int   i;
    int            fd = fileno(fp);

    for ( i = 0; i < len; i++ ) {
        Helper2 ( fd, fpos );
        fpos += 20 + Bitstream_read (20);
    }
    return fpos;
}


static unsigned int  RING;


static void
Helper3 ( FILE* fp, unsigned long fpos )
{
    unsigned int  NEWRING = (fpos>>5) & MEMSIZE2;

    if ( RING != NEWRING ) {
        fread ( Speicher+RING, 1, sizeof(Speicher)/2, fp );
        RING = NEWRING;
    }

    dword = Speicher [Zaehler = (fpos>>5) & MEMMASK];
    pos   = fpos & 31;
}


unsigned long
test5 ( FILE* fp, unsigned int len )
{
    unsigned long  fpos = 200;
    unsigned int   i;

    rewind (fp);
    fread ( Speicher, 1, sizeof(Speicher), fp );
    RING = 0;

    for ( i = 0; i < len; i++ ) {
        Helper3 ( fp, fpos );
        fpos += 20 + Bitstream_read (20);
    }

    return fpos;
}


static void
Helper4 ( int fd, unsigned long fpos )
{
    unsigned int  NEWRING = (fpos>>5) & MEMSIZE2;

    if ( RING != NEWRING ) {
        read ( fd, Speicher+RING, sizeof(Speicher)/2 );
        RING = NEWRING;
    }

    dword = Speicher [Zaehler = (fpos>>5) & MEMMASK];
    pos   = fpos & 31;
}


unsigned long
test6 ( FILE* fp, unsigned int len )
{
    unsigned long  fpos = 200;
    unsigned int   i;
    int            fd = fileno(fp);

    lseek (fd, 0l, SEEK_SET );
    read ( fd, Speicher, sizeof(Speicher) );
    RING = 0;

    for ( i = 0; i < len; i++ ) {
        Helper4 ( fd, fpos );
        fpos += 20 + Bitstream_read (20);
    }

    return fpos;
}


int
main ( void )
{
    Int64_t   t1;
    Int64_t   t2;
    int       i;
    double    TIME1;
    double    TIME2;
    FILE*     fp;

    fp = fopen ( TESTDATEI1, "rb" );
    if ( fp == NULL ) {
        fprintf ( stderr, "\nCan't open '%s'\n\n", TESTDATEI1 );
        return 1;
    }

    for ( i = 0; i < sizeof(tab)/sizeof(*tab); i++ )
        tab[i] = sqrt ( i+1 );
    memset ( Speicher, 0, sizeof Speicher );

    if ( test1 ( tab, sizeof(tab)/sizeof(*tab) ) != test2 ( tab, sizeof(tab)/sizeof(*tab) )) {
        printf ("1=%lu 2=%lu\n", test1 ( tab, sizeof(tab)/sizeof(*tab) ), test2 ( tab, sizeof(tab)/sizeof(*tab) ) );
    }

    if ( test3 ( fp, TESTDATEIFRAMES ) != test4 ( fp, TESTDATEIFRAMES )) {
        printf ("3=%lu 4=%lu\n", test3 ( fp, TESTDATEIFRAMES ), test4 ( fp, TESTDATEIFRAMES ) );
    }

    if ( test3 ( fp, TESTDATEIFRAMES ) != test5 ( fp, TESTDATEIFRAMES )) {
        printf ("3=%lu 5=%lu\n", test3 ( fp, TESTDATEIFRAMES ), test5 ( fp, TESTDATEIFRAMES ) );
    }

    if ( test3 ( fp, TESTDATEIFRAMES ) != test6 ( fp, TESTDATEIFRAMES )) {
        printf ("3=%lu 6=%lu\n", test3 ( fp, TESTDATEIFRAMES ), test6 ( fp, TESTDATEIFRAMES ) );
    }

    //--------------------------------------------------------------
    test1 ( tab, sizeof(tab)/sizeof(*tab) );
    rdtscll (t1);
    for ( i = 0; i < 5000; i++ )
        test1 ( tab, sizeof(tab)/sizeof(*tab) );
    rdtscll (t2);
    TIME1 = sizeof(tab)/sizeof(*tab)*1152. / 44100.;
    TIME2 = (t2-t1) / 700.e6 / 5000;
    printf ("%6.1f s : %9.4f ms = %10.1fx\n", TIME1, 1.e3*TIME2, TIME1/TIME2 );

    //---------------------------------------------------------------
    test2 ( tab, sizeof(tab)/sizeof(*tab) );
    rdtscll (t1);
    for ( i = 0; i < 25000; i++ )
        test2 ( tab, sizeof(tab)/sizeof(*tab) );
    rdtscll (t2);
    TIME1 = sizeof(tab)/sizeof(*tab)*1152. / 44100.;
    TIME2 = (t2-t1) / 700.e6 / 25000;
    printf ("%6.1f s : %9.4f ms = %10.1fx\n", TIME1, 1.e3*TIME2, TIME1/TIME2 );

    //---------------------------------------------------------------
    test3 ( fp, TESTDATEIFRAMES );
    rdtscll (t1);
    for ( i = 0; i < 25; i++ )
        test3 ( fp, TESTDATEIFRAMES );
    rdtscll (t2);
    TIME1 = TESTDATEIFRAMES*1152. / 44100.;
    TIME2 = (t2-t1) / 700.e6 / 25;
    printf ("%6.1f s : %9.4f ms = %10.1fx\n", TIME1, 1.e3*TIME2, TIME1/TIME2 );

    //---------------------------------------------------------------
    test4 ( fp, TESTDATEIFRAMES );
    rdtscll (t1);
    for ( i = 0; i < 50; i++ )
        test4 ( fp, TESTDATEIFRAMES );
    rdtscll (t2);
    TIME1 = TESTDATEIFRAMES*1152. / 44100.;
    TIME2 = (t2-t1) / 700.e6 / 50;
    printf ("%6.1f s : %9.4f ms = %10.1fx\n", TIME1, 1.e3*TIME2, TIME1/TIME2 );

    //---------------------------------------------------------------
    test5 ( fp, TESTDATEIFRAMES );
    rdtscll (t1);
    for ( i = 0; i < 50; i++ )
        test5 ( fp, TESTDATEIFRAMES );
    rdtscll (t2);
    TIME1 = TESTDATEIFRAMES*1152. / 44100.;
    TIME2 = (t2-t1) / 700.e6 / 50;
    printf ("%6.1f s : %9.4f ms = %10.1fx\n", TIME1, 1.e3*TIME2, TIME1/TIME2 );

    //---------------------------------------------------------------
    test6 ( fp, TESTDATEIFRAMES );
    rdtscll (t1);
    for ( i = 0; i < 50; i++ )
        test6 ( fp, TESTDATEIFRAMES );
    rdtscll (t2);
    TIME1 = TESTDATEIFRAMES*1152. / 44100.;
    TIME2 = (t2-t1) / 700.e6 / 50;
    printf ("%6.1f s : %9.4f ms = %10.1fx\n", TIME1, 1.e3*TIME2, TIME1/TIME2 );

    fclose (fp);
    fp = fopen ( TESTDATEI2, "rb" );
    if ( fp == NULL ) {
        fprintf ( stderr, "\nCan't open '%s'\n\n", TESTDATEI2 );
        return 1;
    }

    //---------------------------------------------------------------
    test3 ( fp, TESTDATEIFRAMES );
    rdtscll (t1);
    for ( i = 0; i < 25; i++ )
        test3 ( fp, TESTDATEIFRAMES );
    rdtscll (t2);
    TIME1 = TESTDATEIFRAMES*1152. / 44100.;
    TIME2 = (t2-t1) / 700.e6 / 25;
    printf ("%6.1f s : %9.4f ms = %10.1fx\n", TIME1, 1.e3*TIME2, TIME1/TIME2 );

    //---------------------------------------------------------------
    test4 ( fp, TESTDATEIFRAMES );
    rdtscll (t1);
    for ( i = 0; i < 50; i++ )
        test4 ( fp, TESTDATEIFRAMES );
    rdtscll (t2);
    TIME1 = TESTDATEIFRAMES*1152. / 44100.;
    TIME2 = (t2-t1) / 700.e6 / 50;
    printf ("%6.1f s : %9.4f ms = %10.1fx\n", TIME1, 1.e3*TIME2, TIME1/TIME2 );

    //---------------------------------------------------------------
    test5 ( fp, TESTDATEIFRAMES );
    rdtscll (t1);
    for ( i = 0; i < 50; i++ )
        test5 ( fp, TESTDATEIFRAMES );
    rdtscll (t2);
    TIME1 = TESTDATEIFRAMES*1152. / 44100.;
    TIME2 = (t2-t1) / 700.e6 / 50;
    printf ("%6.1f s : %9.4f ms = %10.1fx\n", TIME1, 1.e3*TIME2, TIME1/TIME2 );

    //---------------------------------------------------------------
    test6 ( fp, TESTDATEIFRAMES );
    rdtscll (t1);
    for ( i = 0; i < 50; i++ )
        test6 ( fp, TESTDATEIFRAMES );
    rdtscll (t2);
    TIME1 = TESTDATEIFRAMES*1152. / 44100.;
    TIME2 = (t2-t1) / 700.e6 / 50;
    printf ("%6.1f s : %9.4f ms = %10.1fx\n", TIME1, 1.e3*TIME2, TIME1/TIME2 );

    fclose (fp);

    //------------------------------------------------------------------
    flushing ();
    fp = fopen ( TESTDATEI1, "rb" );
    if ( fp == NULL ) {
        fprintf ( stderr, "\nCan't open '%s'\n\n", TESTDATEI1 );
        return 1;
    }
    test3 ( fp, TESTDATEIFRAMES );
    test3 ( fp, TESTDATEIFRAMES );
    fclose (fp);
    fp = fopen ( TESTDATEI2, "rb" );
    if ( fp == NULL ) {
        fprintf ( stderr, "\nCan't open '%s'\n\n", TESTDATEI2 );
        return 1;
    }
    rdtscll (t1);
    test3 ( fp, TESTDATEIFRAMES );
    rdtscll (t2);
    TIME1 = TESTDATEIFRAMES*1152. / 44100.;
    TIME2 = (t2-t1) / 700.e6 / 1;
    printf ("%6.1f s : %9.4f ms = %10.1fx\n", TIME1, 1.e3*TIME2, TIME1/TIME2 );

    fclose (fp);

    //------------------------------------------------------------------
    flushing ();
    fp = fopen ( TESTDATEI1, "rb" );
    if ( fp == NULL ) {
        fprintf ( stderr, "\nCan't open '%s'\n\n", TESTDATEI2 );
        return 1;
    }
    test4 ( fp, TESTDATEIFRAMES );
    test4 ( fp, TESTDATEIFRAMES );
    fclose (fp);
    fp = fopen ( TESTDATEI2, "rb" );
    if ( fp == NULL ) {
        fprintf ( stderr, "\nCan't open '%s'\n\n", TESTDATEI2 );
        return 1;
    }
    rdtscll (t1);
    test4 ( fp, TESTDATEIFRAMES );
    rdtscll (t2);
    TIME1 = TESTDATEIFRAMES*1152. / 44100.;
    TIME2 = (t2-t1) / 700.e6 / 1;
    printf ("%6.1f s : %9.4f ms = %10.1fx\n", TIME1, 1.e3*TIME2, TIME1/TIME2 );

    fclose (fp);
    //------------------------------------------------------------------
    flushing ();
    fp = fopen ( TESTDATEI1, "rb" );
    if ( fp == NULL ) {
        fprintf ( stderr, "\nCan't open '%s'\n\n", TESTDATEI1 );
        return 1;
    }
    test5 ( fp, TESTDATEIFRAMES );
    test5 ( fp, TESTDATEIFRAMES );
    fclose (fp);
    fp = fopen ( TESTDATEI2, "rb" );
    if ( fp == NULL ) {
        fprintf ( stderr, "\nCan't open '%s'\n\n", TESTDATEI2 );
        return 1;
    }
    rdtscll (t1);
    test5 ( fp, TESTDATEIFRAMES );
    rdtscll (t2);
    TIME1 = TESTDATEIFRAMES*1152. / 44100.;
    TIME2 = (t2-t1) / 700.e6 / 1;
    printf ("%6.1f s : %9.4f ms = %10.1fx\n", TIME1, 1.e3*TIME2, TIME1/TIME2 );

    //------------------------------------------------------------------
    flushing ();
    fp = fopen ( TESTDATEI1, "rb" );
    if ( fp == NULL ) {
        fprintf ( stderr, "\nCan't open '%s'\n\n", TESTDATEI1 );
        return 1;
    }
    test6 ( fp, TESTDATEIFRAMES );
    test6 ( fp, TESTDATEIFRAMES );
    fclose (fp);
    fp = fopen ( TESTDATEI2, "rb" );
    if ( fp == NULL ) {
        fprintf ( stderr, "\nCan't open '%s'\n\n", TESTDATEI2 );
        return 1;
    }
    rdtscll (t1);
    test6 ( fp, TESTDATEIFRAMES );
    rdtscll (t2);
    TIME1 = TESTDATEIFRAMES*1152. / 44100.;
    TIME2 = (t2-t1) / 700.e6 / 1;
    printf ("%6.1f s : %9.4f ms = %10.1fx\n", TIME1, 1.e3*TIME2, TIME1/TIME2 );

    return 0;
}
