All sorts of sorts

To demonstrate sorting algorithms and their run times, I decided to write a program that would have Merge, Quick, and Bubble sort go through the same data and report back the process time. It was assumed that the speeds of Quick sort and Merge sort would be many times faster than Bubble sort, but I wanted to actually see the difference.

To test the sorts, I implemented a C++ program to generate a data set of 100,000 random integers that would then be sorted by all three sorts 10 times. To ensure that the results were accurate I enforced the sorting of the same 100,000 integers each time for each algorithm.

Header File

#ifndef SORTS_H
#define SORTS_H

#include <time.h>
#include <stdio.h>
#include <stdlib.h>

// For file output
#include <iostream>
#include <fstream>

// How many integers to generate
#define DATASET 100000

// How many times to run an algorithm on the data
#define DATARUNS 10

class Sorts  
{
public:

    // Everything will happen here
    Sorts();

private:

    // Something to hold the random numbers
    int generatedData[DATASET];

    // A place for them to be sorted
    int array[DATASET];

    // Results of the algorithms
    double bubbleSortTimes[DATARUNS];
    double quickSortTimes[DATARUNS];
    double mergeSortTimes[DATARUNS];

    // Generate the initial data
    void generateData();

    // Reset data
    void dataReset();

    // Calculate time taken
    double timediff(clock_t t1, clock_t t2);

    // Perform swap action on two integers
    void swap(int& a, int& b);

    // Perform bubble sort
    void bubblesort();

    // Perform quick sort
    int qpartition(int * arr, int left, int right);
    void quickSort(int * arr, int left, int right);

    // Perform merge sort
    void doMerge(int * arr, int left, int right);
    void mergeSort(int * arr, int left, int right);

    // Display the results of the multiple runs
    void displayResults();
};

#endif // SORTS_H

CPP File

#include "sorts.h"

Sorts::Sorts()  
{
    // Fills array with random integers of size DATASET
    generateData();

    // Do bubble sort runs
    for(int i = 0; i < DATARUNS; i++)
    {
        // Capture time
        clock_t t1 = clock();

        // Perform bubble sort
        bubblesort();

        //Capture time again
        clock_t t2 = clock();

        // Add run time to result array
        bubbleSortTimes[i] = timediff(t1, t2);

        // Reset operating data to originaly generated random numbers to ensure we are
        // comparing the sorting speed on the exact same random numbers
        dataReset();
    }

    // Repeat above process for other sorts

    // Do quick sort runs
    for(int i = 0; i < DATARUNS; i++)
    {
        clock_t t1 = clock();
        quickSort(array, 0, DATASET-1);
        clock_t t2 = clock();
        quickSortTimes[i] = timediff(t1, t2);
        dataReset();
    }

    // Do merge sort runs
    for(int i = 0; i < DATARUNS; i++)
    {
        clock_t t1 = clock();
        mergeSort(array, 0, DATASET-1);
        clock_t t2 = clock();
        mergeSortTimes[i] = timediff(t1, t2);

        dataReset();
    }

    // Display all the results
    displayResults();
}

void Sorts::generateData()  
{
    // Seed rand
    srand((unsigned)time(0));

    // Generate data
    for(int i = 0; i < DATASET; i++)
        generatedData[i] = rand();

    // Copy data to array that will be manipulated
    dataReset();
}

void Sorts::dataReset()  
{
    for(int i = 0; i < DATASET; i++)
        array[i] = generatedData[i];
}

double Sorts::timediff(clock_t t1, clock_t t2)  
{
    return (double)(t2 - t1) / CLOCKS_PER_SEC;
}

void Sorts::swap(int &a, int &b)  
{
    int t = a;
    a = b;
    b = t;
}

void Sorts::bubblesort()  
{
    int i, j;
    for(i=0;i<DATASET-1;i++)
        for(j=DATASET-1;j>i;--j)
            if(array[j] < array[j-1])
                swap(array[j],array[j-1]);
}

int Sorts::qpartition(int *arr, int left, int right)  
{
    // Use mid as the pivot
    int p = arr[(left+right)/2];

    // Perform Hoare-Partitioning
    while ( left < right )
    {
        while ( arr[left] < p )
            left++;

        while ( arr[right] > p )
            right--;

        if ( arr[left] == arr[right] )
            left++;
        else if ( left < right )
            swap(arr[left], arr[right]);
    }
    return right;
}

void Sorts::quickSort(int *arr, int left, int right)  
{
    if( left < right )
    {
        // Mid point
        int p = qpartition(arr, left, right);

        // Quicksort left of piv
        quickSort(arr, left, p-1);

        // Quicksort right of piv
        quickSort(arr, p+1, right);
    }
}

void Sorts::doMerge(int *arr, int left, int right)  
{
    int p = (left + right) / 2,     // Mid point
            ti = 0,                 // Temp  indexer
            l = left,               // Left  indexer
            k = p + 1,              // Right indexer
            temp[right-left+1];     // Temp array

    // Merge the two data sets
    while ( l <= p && k <= right )
        if ( arr[l] < arr[k] )
            temp[ti++] = arr[l++];
        else
            temp[ti++] = arr[k++];

    // Merge remaining data in left arr
    while ( l <= p )
        temp[ti++] = arr[l++];

    // Merge remaining data in right arr
    while ( k <= right )
        temp[ti++] = arr[k++];

    // Copy temp array to master array
    for ( int i = left; i <= right; i++ )
        arr[i] = temp[i-left];
}

void Sorts::mergeSort(int * arr, int left, int right)  
{
    if(left < right)
    {
        // Get mid point
        int p = (left + right) / 2;

        // Merge sort left side
        mergeSort(arr, left, p);

        // Merge sort right side
        mergeSort(arr, p + 1, right);

        // Perform the merging
        doMerge(arr, left, right);
    }
}

void Sorts::displayResults()  
{
    std::ofstream outfile;
    outfile.open("output.txt");

    printf("\n\n Bubble sort results: \n\n");
    outfile << "\n\n Bubble sort results: \n\n";
    for(int i = 0; i < DATARUNS; i++)
    {
        printf("\tRun [%i] : %f  \n", i, bubbleSortTimes[i]);
        outfile << "Run [" << i << "] : " << bubbleSortTimes[i] << std::endl;
    }

    printf("\n\n Quick sort results: \n\n");
    outfile << "\n\n Quick sort results: \n\n";
    for(int i = 0; i < DATARUNS; i++)
    {
        printf("\tRun [%i] : %f  \n", i, quickSortTimes[i]);
        outfile << "Run [" << i << "] : " << quickSortTimes[i] << std::endl;
    }

    printf("\n\n Merge sort results: \n\n");
    outfile << "\n\n Merge sort results: \n\n";
    for(int i = 0; i < DATARUNS; i++)
    {
        printf("\tRun [%i] : %f  \n", i, mergeSortTimes[i]);
        outfile << "Run [" << i << "] : " << mergeSortTimes[i] << std::endl;
    }

    outfile.close();
}

Output

Bubble sort results: 

Run [0] : 0.298  
Run [1] : 0.293  
Run [2] : 0.3  
Run [3] : 0.299  
Run [4] : 0.303  
Run [5] : 0.298  
Run [6] : 0.296  
Run [7] : 0.296  
Run [8] : 0.295  
Run [9] : 0.301    

Quick sort results: 

Run [0] : 0.004  
Run [1] : 0.004  
Run [2] : 0.004  
Run [3] : 0.004  
Run [4] : 0.004  
Run [5] : 0.004  
Run [6] : 0.004  
Run [7] : 0.004  
Run [8] : 0.004  
Run [9] : 0.004    

Merge sort results: 

Run [0] : 0.001  
Run [1] : 0.002  
Run [2] : 0.001  
Run [3] : 0.002  
Run [4] : 0.001  
Run [5] : 0.001  
Run [6] : 0.001  
Run [7] : 0.001  
Run [8] : 0.001  
Run [9] : 0.001

Thoughts

This is a very simple demonstration of how data sorting, something that at first seems trivial, can be greatly improved with a clever algorithm or two.

Everything in computer science seems to have a trade-off or edge-case, so where do these sorts fall short? Mostly when there are specific conditions met within the dataset. If the dataset is smaller than a handful (actual scientific measurement), Merge and Quick sort can actually be out performed by Bubble sort. It may seem crazy, but what is known as ‘pivot selection’ can cause uneven segmenting in small data and lead to all sorts of issues when sorting data. Since this isn’t meant to be a tutorial on how these work but rather, a demonstration, here are the Wikipedia links for the two algorithms.

[More on Merge Sort: https://en.wikipedia.org/wiki/Merge_sort]

[More on Quick Sort: https://en.wikipedia.org/wiki/Quicksort]

FizzBuzz

I’ve seen the FizzBuzz test come up at a couple of programming competitions, and I hear that it is an interview favorite amongst some employers. The test is easy, and allows an employer to see your coding style. While no style is shared by all programmers it is important to see if the candidate can at-least remain consistent.

So what is the FizzBuzz test?

The test can come in a multitude of varieties, the most common that I have seen is described as:
A program that counts from 1 to 100. When a number is divisible by 3, only print ‘Fizz.’ If the number is divisible by 5, only print ‘Buzz.’ If the number is divisible by 3 and 5, print ‘FizzBuzz.’ If the number is neither of the aforementioned cases, print the number.

It may seem somewhat difficult if you’re not familiar with the modulus operator (%), so we’ll get everyone caught up to speed. If we use the code cout << 9 % 2 << endl the console will output 1.This is because when we divided the integer 9, by 2, we get a remainder of 1.

We use the modulus operator in the FizzBuzz test to tell if the 3, and/or the 5, fit evenly into a given number by ensuring there isn’t a remainder.

if (0 == 6 % 3) will evaluate to TRUE, as 6 is evenly divided by 3.

Now that the modulus operator is in our tool box we can start laying out what we need to do. Looking at the description, we can tell that the first thing that is necessary is a basic loop. The loop will run from 1 to 100, and we will need to check every single number for divisions of 3 and 5.

#include <iostream>
using namespace std;
int main()
{
    cout << "[START]" << endl;
    for(int i = 1; i <= 100; i++)
    {
            cout << i;

            if( 0 == i%3)
                    cout << "Divisible by 3!";
            if( 0 == i%5)
                    cout << "Divisible by 5!";
            cout << endl;
    }
    return 0;
}

Seems like we’re almost done already, doesn’t it ? Thats because we almost are. What needs to be done now is changing our output statements to their respective ‘Fizz’ or ‘Buzz,’ and handle the combined case of ‘FizzBuzz.’

#include <iostream>
using namespace std;
int main()
{
    cout << "[START]" << endl;
    for(int i = 1; i <= 100; i++)
    {
            // If i is evenly divisible by 3
            if( 0 == i%3)
            { 
                 /*
                       Check if it is 3 and 5, 
                       or if it is just 3.
                 */
                 if( 0 == i%5)
                     cout << "FizzBuzz";
                 else
                    cout << "Fizz";
            }
            else if( 0 == i%5)
            {
                 /*
                       We chain the previous if with 
                       an else-if to ensure we don't 
                       print something silly like 
                       'FizzBuzzBuzz.'
                 */ 
                 cout << "Buzz";
            }
            else
            {
                 /*
                       Finally, if it isn't divisible by 3, 
                       and/or 5, we print the integer.
                 */ 
                    cout << i;
            }
            // End the line for readability 
            cout << endl;
    }
    return 0;
}

The first few lines of our output from the above code will be:

1
2
Fizz
4
Buzz
Fizz
7
8
Fizz
Buzz
11
Fizz
13
14
FizzBuzz 

It really is interesting to see the different ways people choose to accomplish this FizzBuzz test. Sometimes they choose to clean-up the output to something different, and sometimes they function-out the modulus operations (I don’t really know why though.) The important thing to take out of how someone completes the FizzBuzz test is their understanding of controlling program flow.

I really enjoyed doing the FizzBuzz test when I first heard about it, and once I completed it, I re-wrote it a few times to see what other ways it could be solved.

Here is a slightly different solution, that uses an extra modulus to format the output :

#include <iostream>

using namespace std;

int main()
{
    cout << "[START]" << endl;

    unsigned f = 1;
    for(int i = 1; i <= 100; i++)
    {
        if(0 == i%3)
        {
                if(0 == i%5)
                        cout << "Fizz-Buzz, ";
                else
                        cout << "Fizz, ";
                f = 0;
        }
        else if(0 == i%5)
        {
                cout << "Buzz, ";
                f = 0;
        }

        if(0 == i % 10)
                cout << endl;
        if( f )
                cout << i << ", ";
        f = 1;
    }
    cout << "[END]" << endl;

    return 0;
} 

Console Output :

[START]
1, 2, Fizz, 4, Buzz, Fizz, 7, 8, Fizz, Buzz, 
11, Fizz, 13, 14, Fizz-Buzz, 16, 17, Fizz, 19, Buzz, 
Fizz, 22, 23, Fizz, Buzz, 26, Fizz, 28, 29, Fizz-Buzz, 
31, 32, Fizz, 34, Buzz, Fizz, 37, 38, Fizz, Buzz, 
41, Fizz, 43, 44, Fizz-Buzz, 46, 47, Fizz, 49, Buzz, 
Fizz, 52, 53, Fizz, Buzz, 56, Fizz, 58, 59, Fizz-Buzz, 
61, 62, Fizz, 64, Buzz, Fizz, 67, 68, Fizz, Buzz, 
71, Fizz, 73, 74, Fizz-Buzz, 76, 77, Fizz, 79, Buzz, 
Fizz, 82, 83, Fizz, Buzz, 86, Fizz, 88, 89, Fizz-Buzz, 
91, 92, Fizz, 94, Buzz, Fizz, 97, 98, Fizz, Buzz, 
[END]

I encourage you to look for other ways to do the FizzBuzz test, and to look for any alterations to the rules that could further demonstrate the concept of control flow!