About sorting methods

Discussion about everything. New games, 3d math, development tips...
Post Reply
REAPER
Posts: 8
Joined: Sat May 08, 2004 10:52 pm
Location: Russia, Moscow

About sorting methods

Post by REAPER »

Not so long ago I finished learning different sorting methods in my university, so I decided to compare all methods.
Now I want to show it there.

Here helpfull function:

Code: Select all

void swap(int* array, const int pos1, const int pos2)
{
    array[pos2] = array[pos2] - array[pos1];
    array[pos1] = array[pos2] + array[pos1];
    array[pos2] = array[pos1] - array[pos2];
}
BUBBLE SORT

Code: Select all

void bubble_sort(int* array, const int size)
{
    int i, j, k;

    if ( size < 2 ) return; // No needs to sort one elements!
	
    for ( i = 0; i < size; i++ )
    {
        for ( j = size; j > i; j-- )
        {
            if ( array[j] < array[j - 1] )
                swap( array, j - 1, j );	
        }
    }
}
Array with 1000 elements simple bubble sort shows 15 ms,
10000 elements - 578 ms
20000 elements - 2453 ms
Uhhh!!!! VERY SLOW!!!!

SHEIKER BUBBLE SORT

Code: Select all

void sheiker_bubble_sort(int* array, int size)
{
    int j, k = size - 1;
    int lb = 1, ub = size - 1; // boundaries of not sorted array part

    if ( size < 2 ) return; // No needs to sort one elements!

    do
    {
        // DOWN - UP
        for ( j = ub; j > 0; j-- )
        {
            if ( array[j - 1] > array[j] )
            {
                swap(array, j - 1, j);
	k = j;
             }
         }

         lb = k + 1;

         // UP - DOWN
         for ( j = 1; j <= ub; j++ )
         {
             if ( array[j - 1] > array[j] )
             {
                 swap(array, j - 1, j);
	 k = j;
             }
          }

          ub = k - 1;
    } while ( lb < ub );
}
Array with 1000 elements simple bubble sort shows 7 ms,
10000 elements - 500 ms
20000 elements - 2078 ms
TOO SLOWLY!!!

INSERTION SORT

Code: Select all

void insertion_sort(int* array, int size)
{
	int i, j, t;

	if (size < 2) return; // No needs to sort one element!

	for (i = 1; i <= size; i++)
	{
		t = array[i];
		for (j = i - 1; j >= 0 && array[j] > t; j--)
			array[j + 1] = array[j];

		array[j + 1] = t;
	}
}
10000 elements - 69 ms
20000 elements - 313 ms
100000 elements - 9828 ms

SHELL SORT
In this method I use Robert Sedgewick formula to create a sequence:
/9 * 2 ^ s - 9 * 2 ^ (s / 2) + 1, if s is even
h(s) = |
\8 * 2 ^ s - 6 * 2 ^ ((s + 1) / 2) + 1, if s is not even
(1, 5, 19, 41, 109, 209, ...)
In bad case O(N^(4/3))

Code: Select all

int calc_step(int* inc, int size)
{
    int p1, p2, p3, s;

    p1 = p2 = p3 = 1;
    s = -1;

    do
    {	
        // Checking for even
        if (++s % 2) inc[s] = 8 * p1 - 6 * p2 + 1;
        else
        {
            inc[s] = 9 * p1 - 9 * p3 + 1;
            p2 *= 2;
            p3 *= 2;
         }
         p1 *= 2;
    } while ( (inc[s] >> 1) + inc[s] < size);  

    return s > 0 ? --s : 0;
}

void shell_sort(int* array, int size)
{
    int inc, i, j, seq[40];
    int s;

    if (size < 2) return; // No needs to sort one element!

    // calculating sorting step
    s = calc_step(seq, size);
    while (s >= 0)
    {
        inc = seq[s--];
        for ( i = inc; i < size; i++ )
        {
            int temp = array[i];
            for (j = i - inc; (j >= 0) && (array[j] > temp); j -= inc)
                array[j + inc] = array[j];

            array[j + inc] = temp;
         }
     }
}
100000 elements - 15 ms
200000 elements - 47 ms
300000 elements - 63 ms
1000000 elements - 218 ms
10000000 elements - 3687 ms

NOT BAD, but can be better

HEAP SORT
I used IrrLicht heap sort implementation, so I decided not to past code here. I only show my results:

100000 elements - 31 ms
200000 elements - 93 ms
300000 elements - 157 ms
1000000 elements - 500 ms

BAD, but no waste of memory

QUICK SORT
I used recursive implementation.

int partition(int* a, int lb, int ub)
{
int t, pivot;
int i, j, p;

/* pivot */
p = lb + ((ub - lb) >> 1);
pivot = a[p];
a[p] = a[lb];

/* sorting lb+1..ub */
i = lb + 1;
j = ub;
while (1)
{
while (i < j && pivot > a) i++;
while (j >= i && a[j] > pivot) j--;
if (i >= j) break;
t = a;
a = a[j];
a[j] = t;
j--; i++;
}

/* center to a[j] */
a[lb] = a[j];
a[j] = pivot;

return j;
}

void quickSort(int* a, int lb, int ub)
{
int m;

while (lb < ub)
{
m = partition (a, lb, ub);
if (m - lb <= ub - m)
{
quickSort(a, lb, m - 1);
lb = m + 1;
} else {
quickSort(a, m + 1, ub);
ub = m - 1;
}
}
}

100000 elements - 15 ms
200000 elements - 31 ms
300000 elements - 47 ms
1000000 elements - 172 ms
1000000 elements - 2015 ms

GOOD!!!

LINEAR SORT

Code: Select all

#define VAL_MAX	100   // maximum value of array elements

int* linear_sort(int* p, int n)
{
    int a, b;
    int count = 0;
    int* virtual_a; // sort array

    // allocating memory
    virtual_a = (int*)malloc(VAL_MAX * sizeof(int));
    if (!virtual_a) /* not enough memory */ return 0;

    // init
    memset(virtual_a, 0, VAL_MAX * sizeof(int));

    // sorting
    for (a = 0; a < n; a++)
        virtual_a[p[a]]++;

    for(a = 0; a < VAL_MAX; a++)
        for(b = 0; b < virtual_a[a]; b++)
            p[count++] = a;

    // deallocating memory
    free(virtual_a);

    return p;
}
Results:
100000 elements - 0 ms
200000 elements - 0 ms
300000 elements - 0 ms
1000000 elements - 15 ms
10000000 elements - 157 ms
20000000 elements - 344 ms
30000000 elements - 500 ms

EXCELENT!!!! But, but, but it uses unreal memory size.

--------------------------------------------------------------
| Data type | Memory size |
--------------------------------------------------------------
| char | 1 Kb |
| unsigned char | 512 Bytes |
| _int16 | 256 Kb |
| unsigned _int16 | 128 Kb |
| _int32 | 16 Gb |
| unsigned _int32 | 8 Gb |
--------------------------------------------------------------
:shock: :shock: :shock: :shock: :shock: :shock: :shock:

Operation systems like Windows 9x/NT limit address space of CPU by 4 Gb, and more than 2 Gb from them wastes for system processes, so maximum heap size is 1 - 1.5 Gb.
But, in not all cases sorting data uses all _int32 values:
from - 2 147 483 648 to 2 147 483 647. So we can decrease memory usage! Memory count is: Cmem = N_VAL*sizeof(cell), where N_VAL - count of all valid values and sizeof(cell) - cell size.
For example for sorting data with values [0; 1 000 000] will cost 4 Mb of memory. This is not very large value :!:
Post Reply