/*
(c) 2001 by Andre Reinald
fixed in 2009 by Paul Harris
unknown license
*/

#include <stdio.h>
#include <stdlib.h>
#include <assert.h>

#ifdef _WINDOWS
typedef unsigned long uint32_t;
typedef unsigned char uint8_t;
#else
#include <stdint.h>
#endif

// replaced byte with bitsOffset to avoid *8 operation in loop
static void radix (short byteOffset, const unsigned N, uint32_t *source, uint32_t *dest)
{
	// suppressed the need for index as it is reported in count
	uint32_t count[256];
	// added temp variables to simplify writing, understanding and compiler
	// optimization job most of them will be allocated as registers
	uint32_t *sp, *cp, s, c, i;
	uint8_t *bp;

	// faster than MemSet
	cp = count;
	for (i = 256; i > 0; --i, ++cp)
		*cp = 0;

	// count occurences of every byte value
	bp = ((uint8_t *) source) + byteOffset;
	for (i = N; i > 0; --i, bp += 4) {
		cp = count + *bp;
		++(*cp);
	}

	// transform count into index by summing elements and storing into same array
	s = 0;
	cp = count;
	for (i = 256; i > 0; --i, ++cp) {
		c = *cp;
		*cp = s;
		s += c;
	}

	// fill dest with the right values in the right place
	bp = ((uint8_t *) source) + byteOffset;
	sp = source;
	for (i = N; i > 0; --i, bp += 4, ++sp) {
		cp = count + *bp;
		dest[*cp] = *sp;
		++(*cp);
	}
}

static void radix_sort (uint32_t *source, unsigned N)
{
	// allocate heap memory to avoid the need of additional parameter
	uint32_t *temp = (uint32_t *) malloc (N * sizeof (uint32_t));
	assert (temp != NULL);

	radix (0, N, source, temp);
	radix (1, N, temp, source);
	radix (2, N, source, temp);
	radix (3, N, temp, source);

	free (temp);
}

static void make_random (uint32_t *data, unsigned N)
{
	for ( ; N > 0; --N, ++data)
		*data = rand () | (rand () << 16);
}

static void check_order (uint32_t *data, unsigned N)
{
	// only signal errors if any (should not be)
	for (--N ; N > 0; --N, ++data)
		assert (data[0] <= data[1]);
}

// test for big number of elements
static void test_radix (const unsigned N)
{
	int i;
	uint32_t *data = (uint32_t *) malloc (N * sizeof (uint32_t));
	assert (data != NULL);

	make_random (data, N);
	radix_sort (data, N);
	check_order (data, N);

	free (data);
}

int main ()
{
	test_radix (5000000);
	return 0;
}

