All sorts of sorts

To demonstrate sorting algorithms and their run times, I decided to write a program that would have Merge, Quick, and Bubble sort go through the same data and report back the process time. It was assumed that the speeds of Quick sort and Merge sort would be many times faster than Bubble sort, but I wanted to actually see the difference.

To test the sorts, I implemented a C++ program to generate a data set of 100,000 random integers that would then be sorted by all three sorts 10 times. To ensure that the results were accurate I enforced the sorting of the same 100,000 integers each time for each algorithm.

Header File

#ifndef SORTS_H
#define SORTS_H

#include <time.h>
#include <stdio.h>
#include <stdlib.h>

// For file output
#include <iostream>
#include <fstream>

// How many integers to generate
#define DATASET 100000

// How many times to run an algorithm on the data
#define DATARUNS 10

class Sorts  
{
public:

    // Everything will happen here
    Sorts();

private:

    // Something to hold the random numbers
    int generatedData[DATASET];

    // A place for them to be sorted
    int array[DATASET];

    // Results of the algorithms
    double bubbleSortTimes[DATARUNS];
    double quickSortTimes[DATARUNS];
    double mergeSortTimes[DATARUNS];

    // Generate the initial data
    void generateData();

    // Reset data
    void dataReset();

    // Calculate time taken
    double timediff(clock_t t1, clock_t t2);

    // Perform swap action on two integers
    void swap(int& a, int& b);

    // Perform bubble sort
    void bubblesort();

    // Perform quick sort
    int qpartition(int * arr, int left, int right);
    void quickSort(int * arr, int left, int right);

    // Perform merge sort
    void doMerge(int * arr, int left, int right);
    void mergeSort(int * arr, int left, int right);

    // Display the results of the multiple runs
    void displayResults();
};

#endif // SORTS_H

CPP File

#include "sorts.h"

Sorts::Sorts()  
{
    // Fills array with random integers of size DATASET
    generateData();

    // Do bubble sort runs
    for(int i = 0; i < DATARUNS; i++)
    {
        // Capture time
        clock_t t1 = clock();

        // Perform bubble sort
        bubblesort();

        //Capture time again
        clock_t t2 = clock();

        // Add run time to result array
        bubbleSortTimes[i] = timediff(t1, t2);

        // Reset operating data to originaly generated random numbers to ensure we are
        // comparing the sorting speed on the exact same random numbers
        dataReset();
    }

    // Repeat above process for other sorts

    // Do quick sort runs
    for(int i = 0; i < DATARUNS; i++)
    {
        clock_t t1 = clock();
        quickSort(array, 0, DATASET-1);
        clock_t t2 = clock();
        quickSortTimes[i] = timediff(t1, t2);
        dataReset();
    }

    // Do merge sort runs
    for(int i = 0; i < DATARUNS; i++)
    {
        clock_t t1 = clock();
        mergeSort(array, 0, DATASET-1);
        clock_t t2 = clock();
        mergeSortTimes[i] = timediff(t1, t2);

        dataReset();
    }

    // Display all the results
    displayResults();
}

void Sorts::generateData()  
{
    // Seed rand
    srand((unsigned)time(0));

    // Generate data
    for(int i = 0; i < DATASET; i++)
        generatedData[i] = rand();

    // Copy data to array that will be manipulated
    dataReset();
}

void Sorts::dataReset()  
{
    for(int i = 0; i < DATASET; i++)
        array[i] = generatedData[i];
}

double Sorts::timediff(clock_t t1, clock_t t2)  
{
    return (double)(t2 - t1) / CLOCKS_PER_SEC;
}

void Sorts::swap(int &a, int &b)  
{
    int t = a;
    a = b;
    b = t;
}

void Sorts::bubblesort()  
{
    int i, j;
    for(i=0;i<DATASET-1;i++)
        for(j=DATASET-1;j>i;--j)
            if(array[j] < array[j-1])
                swap(array[j],array[j-1]);
}

int Sorts::qpartition(int *arr, int left, int right)  
{
    // Use mid as the pivot
    int p = arr[(left+right)/2];

    // Perform Hoare-Partitioning
    while ( left < right )
    {
        while ( arr[left] < p )
            left++;

        while ( arr[right] > p )
            right--;

        if ( arr[left] == arr[right] )
            left++;
        else if ( left < right )
            swap(arr[left], arr[right]);
    }
    return right;
}

void Sorts::quickSort(int *arr, int left, int right)  
{
    if( left < right )
    {
        // Mid point
        int p = qpartition(arr, left, right);

        // Quicksort left of piv
        quickSort(arr, left, p-1);

        // Quicksort right of piv
        quickSort(arr, p+1, right);
    }
}

void Sorts::doMerge(int *arr, int left, int right)  
{
    int p = (left + right) / 2,     // Mid point
            ti = 0,                 // Temp  indexer
            l = left,               // Left  indexer
            k = p + 1,              // Right indexer
            temp[right-left+1];     // Temp array

    // Merge the two data sets
    while ( l <= p && k <= right )
        if ( arr[l] < arr[k] )
            temp[ti++] = arr[l++];
        else
            temp[ti++] = arr[k++];

    // Merge remaining data in left arr
    while ( l <= p )
        temp[ti++] = arr[l++];

    // Merge remaining data in right arr
    while ( k <= right )
        temp[ti++] = arr[k++];

    // Copy temp array to master array
    for ( int i = left; i <= right; i++ )
        arr[i] = temp[i-left];
}

void Sorts::mergeSort(int * arr, int left, int right)  
{
    if(left < right)
    {
        // Get mid point
        int p = (left + right) / 2;

        // Merge sort left side
        mergeSort(arr, left, p);

        // Merge sort right side
        mergeSort(arr, p + 1, right);

        // Perform the merging
        doMerge(arr, left, right);
    }
}

void Sorts::displayResults()  
{
    std::ofstream outfile;
    outfile.open("output.txt");

    printf("\n\n Bubble sort results: \n\n");
    outfile << "\n\n Bubble sort results: \n\n";
    for(int i = 0; i < DATARUNS; i++)
    {
        printf("\tRun [%i] : %f  \n", i, bubbleSortTimes[i]);
        outfile << "Run [" << i << "] : " << bubbleSortTimes[i] << std::endl;
    }

    printf("\n\n Quick sort results: \n\n");
    outfile << "\n\n Quick sort results: \n\n";
    for(int i = 0; i < DATARUNS; i++)
    {
        printf("\tRun [%i] : %f  \n", i, quickSortTimes[i]);
        outfile << "Run [" << i << "] : " << quickSortTimes[i] << std::endl;
    }

    printf("\n\n Merge sort results: \n\n");
    outfile << "\n\n Merge sort results: \n\n";
    for(int i = 0; i < DATARUNS; i++)
    {
        printf("\tRun [%i] : %f  \n", i, mergeSortTimes[i]);
        outfile << "Run [" << i << "] : " << mergeSortTimes[i] << std::endl;
    }

    outfile.close();
}

Output

Bubble sort results: 

Run [0] : 0.298  
Run [1] : 0.293  
Run [2] : 0.3  
Run [3] : 0.299  
Run [4] : 0.303  
Run [5] : 0.298  
Run [6] : 0.296  
Run [7] : 0.296  
Run [8] : 0.295  
Run [9] : 0.301    

Quick sort results: 

Run [0] : 0.004  
Run [1] : 0.004  
Run [2] : 0.004  
Run [3] : 0.004  
Run [4] : 0.004  
Run [5] : 0.004  
Run [6] : 0.004  
Run [7] : 0.004  
Run [8] : 0.004  
Run [9] : 0.004    

Merge sort results: 

Run [0] : 0.001  
Run [1] : 0.002  
Run [2] : 0.001  
Run [3] : 0.002  
Run [4] : 0.001  
Run [5] : 0.001  
Run [6] : 0.001  
Run [7] : 0.001  
Run [8] : 0.001  
Run [9] : 0.001

Thoughts

This is a very simple demonstration of how data sorting, something that at first seems trivial, can be greatly improved with a clever algorithm or two.

Everything in computer science seems to have a trade-off or edge-case, so where do these sorts fall short? Mostly when there are specific conditions met within the dataset. If the dataset is smaller than a handful (actual scientific measurement), Merge and Quick sort can actually be out performed by Bubble sort. It may seem crazy, but what is known as ‘pivot selection’ can cause uneven segmenting in small data and lead to all sorts of issues when sorting data. Since this isn’t meant to be a tutorial on how these work but rather, a demonstration, here are the Wikipedia links for the two algorithms.

[More on Merge Sort: https://en.wikipedia.org/wiki/Merge_sort]

[More on Quick Sort: https://en.wikipedia.org/wiki/Quicksort]