「CS61B」Note(9) Lab13 Radix Sorts

In this lab, the task is to implement counting sort and radix sort.t. The requirements are at this link.

Counting Sort

A naive implementation naiveCountingSort()is given which can only deal with arrays of positive numbers. The task is to implement a better counting sort so that it can support negative numbers.

public class CountingSort {
/**
 * Counting sort on the given int array. Returns a sorted version of the array.
 * Does not touch original array (non-destructive method).
 * DISCLAIMER: this method does not always work, find a case where it fails
 *
 * @param arr int array that will be sorted
 * @return the sorted array
 */
public static int[] naiveCountingSort(int[] arr) {
    // find max
    int max = Integer.MIN_VALUE;
    for (int i : arr) {
        max = max > i ? max : i;
    }

    // gather all the counts for each value
    int[] counts = new int[max + 1];
    for (int i : arr) {
        counts[i]++;
    }

    // when we're dealing with ints, we can just put each value
    // count number of times into the new array
    int[] sorted = new int[arr.length];
    int k = 0;
    for (int i = 0; i < counts.length; i += 1) {
        for (int j = 0; j < counts[i]; j += 1, k += 1) {
            sorted[k] = i;
        }
    }

    // however, below is a more proper, generalized implementation of
    // counting sort that uses start position calculation
    int[] starts = new int[max + 1];
    int pos = 0;
    for (int i = 0; i < starts.length; i += 1) {
        starts[i] = pos;
        pos += counts[i];
    }

    int[] sorted2 = new int[arr.length];
    for (int i = 0; i < arr.length; i += 1) {
        int item = arr[i];
        int place = starts[item];
        sorted2[place] = item;
        starts[item] += 1;
    }

    // return the sorted array
    return sorted;
}

I thought I cut a short path here because at first, I just add the min negative number to the array to make every number positive and put it to the naive “sorter”. It works in most situations, but will cut down the length of negative array it can handle. So it won’t pass the Autograder.

/**
 * Counting sort on the given int array, must work even with negative numbers.
 * Note, this code does not need to work for ranges of numbers greater
 * than 2 billion.
 * Does not touch original array (non-destructive method).
 *
 * @param arr int array that will be sorted
 */
public static int[] betterCountingSort(int[] arr) {
    // TODO make counting sort work with arrays containing negative numbers.
    // find min negative
    int minNegative = 0;
    for (int i : arr) {
        if (i < 0) {
            minNegative = Math.max(minNegative, Math.abs(i));
        }
    }
    minNegative = -minNegative;

    // add min negative to the array to make every number positive
    int[] arrCopy = new int[arr.length];
    for (int i = 0; i < arr.length; i++) {
        arrCopy[i] = arr[i] - minNegative;
    }

    arrCopy = naiveCountingSort(arrCopy);

    // recover the sorted array by adding min negative
    for (int i = 0; i < arr.length; i++) {
        arrCopy[i] = arrCopy[i] + minNegative;
    }
    return arrCopy;
}

LSD Radix Sort

In this part of lab the task is to write an implementation of radix sort for ASCII Strings. Normally, if we just had decimal numbers, we would say that we would have a radix of 10 (R = 10) since there are 10 possible digits at each index, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]. ASCII Strings have 256 possible characters (numbered 0-255 with the radix R = 256) and are of variable length. Implement the radix sort as an LSD (least significant digit) sort that returns a sorted copy of the input list of ASCII Strings.

/**
 * Does LSD radix sort on the passed in array with the following restrictions:
 * The array can only have ASCII Strings (sequence of 1 byte characters)
 * The sorting is stable and non-destructive
 * The Strings can be variable length (all Strings are not constrained to 1 length)
 *
 * @param asciis String[] that needs to be sorted
 *
 * @return String[] the sorted array
 */
private static int maxLength = Integer.MIN_VALUE;
public static String[] sort(String[] asciis) {
    // TODO: Implement LSD Sort
    // make a copy of the original asciis to keep non-destructive
    String[] arrCopy = new String[asciis.length];
    System.arraycopy(asciis, 0, arrCopy, 0, asciis.length);

    // find the max length of all strings in the asciis
    for (String ascii : asciis) {
        maxLength = Math.max(ascii.length(), maxLength);
    }

    for (int d = 0; d < maxLength; d++) {
        sortHelperLSD(arrCopy, maxLength - d - 1);
    }
    return arrCopy;
}

/**
 * LSD helper method that performs a destructive counting sort the array of
 * Strings based off characters at a specific index.
 * @param asciis Input array of Strings
 * @param index The position to sort the Strings on.
 */
private static void sortHelperLSD(String[] asciis, int index) {
    // Optional LSD helper method for required LSD radix sort
    // make a new asciis in which pad every shorter strings with '_'
    String[] asciisPadded = new String[asciis.length];
    for (int i = 0; i < asciis.length; i += 1) {
        if (asciis[i].length() < maxLength) {
            asciisPadded[i] = pad(asciis[i], maxLength - asciis[i].length());
        } else {
            asciisPadded[i] = asciis[i];
        }
    }

    // gather all the counts for each value
    int[] counts = new int[256];
    for (String i : asciisPadded) {
        counts[i.charAt(index)]++;
    }

    // calculate start position
    int[] starts = new int[256];
    int pos = 0;
    for (int i = 0; i < starts.length; i += 1) {
        starts[i] = pos;
        pos += counts[i];
    }

    String[] sorted = new String[asciis.length];
    for (int i = 0; i < asciis.length; i += 1) {
        String item = asciisPadded[i];
        int place = starts[item.charAt(index)];
        sorted[place] = asciis[i];  // remember to use the original asciis without '_'
        starts[item.charAt(index)] += 1;
    }
    System.arraycopy(sorted, 0, asciis, 0, sorted.length);
}

/**
 * pad the string s with '_' at the back end,
 * for example pad("2", 2) will return "2__"
 * @param s the string to be padded
 * @param numberOfPad the number of '_'
 * @return padded string
 */
private static String pad(String s, int numberOfPad) {
    for (int i = 0; i < numberOfPad; i++) {
        s += '_';
    }
    return s;
}

But I misunderstood a sentence from the document, which said “2” is after “100” so “2” is considered equivalently as “2__”, where “_” is a placeholder that comes before any other character. So I thought ‘_’ is the char that has the smallest ascii code, but it’s not. 😅😅😅

Fix

Fix betterCountingSort(), just let the radix R = max - min + 1.

public static int[] betterCountingSort(int[] arr) {
    // TODO make counting sort work with arrays containing negative numbers.
    // find min negative and max positive
    int minNegative = 0;
    int maxPositive = 0;
    for (int i : arr) {
        if (i < 0) {
            minNegative = Math.max(minNegative, Math.abs(i));
        } else {
            maxPositive = Math.max(maxPositive, i);
        }
    }
    minNegative = -minNegative;

    // gather all the counts for each value
    int[] counts = new int[maxPositive - minNegative + 1];
    for (int i : arr) {
        counts[i - minNegative]++;
    }

    // calculate start position
    int[] starts = new int[maxPositive - minNegative + 1];
    int pos = 0;
    for (int i = 0; i < starts.length; i += 1) {
        starts[i] = pos;
        pos += counts[i];
    }

    int[] sorted = new int[arr.length];
    for (int i = 0; i < arr.length; i += 1) {
        int item = arr[i];
        int place = starts[item - minNegative];
        sorted[place] = item;
        starts[item - minNegative] += 1;
    }

    // return the sorted array
    return sorted;
}

To fix LSD sort, I wrote a function getCharAsciiFromIndex()to get ascii code of the char in the index position of String s. If the index is out of length, return 0 to make sure it’s before every char. And because original ascii codes start at 0, so add 1 to make it start at 1. Let 0 represents the placeholder.

private static int maxLength = Integer.MIN_VALUE;
public static String[] sort(String[] asciis) {
    // TODO: Implement LSD Sort
    // make a copy of the original asciis to keep non-destructive
    String[] arrCopy = new String[asciis.length];
    System.arraycopy(asciis, 0, arrCopy, 0, asciis.length);

    // find the max length of all strings in the asciis
    for (String ascii : asciis) {
        maxLength = Math.max(ascii.length(), maxLength);
    }

    for (int d = 0; d < maxLength; d++) {
        sortHelperLSD(arrCopy, maxLength - d - 1);
    }
    return arrCopy;
}

private static void sortHelperLSD(String[] asciis, int index) {
    // Optional LSD helper method for required LSD radix sort
    // gather all the counts for each value
    int[] counts = new int[257];
    for (String i : asciis) {
        counts[getCharAsciiFromIndex(i, index)]++;
    }

    // calculate start position
    int[] starts = new int[257];
    int pos = 0;
    for (int i = 0; i < starts.length; i += 1) {
        starts[i] = pos;
        pos += counts[i];
    }

    String[] sorted = new String[asciis.length];
    for (int i = 0; i < asciis.length; i += 1) {
        String item = asciis[i];
        int place = starts[getCharAsciiFromIndex(item, index)];
        sorted[place] = asciis[i];
        starts[getCharAsciiFromIndex(item, index)] += 1;
    }
    System.arraycopy(sorted, 0, asciis, 0, sorted.length);
}

// get ascii code of the char in the index position of String s,
// if index is out of length, return 0 to make sure it's before every char
private static int getCharAsciiFromIndex(String s, int index) {
    if (index < s.length()) {
        return s.charAt(index) + 1; // original ascii codes start at 0, so add 1
    } else { return 0; }
}

MSD Radix Sort

MSD radix sort function recursively calls itself to achieve the sorted array. It’s a destructive method that changes the passed in array, asciis.

/**
 * MSD radix sort helper function that recursively calls itself to achieve the sorted array.
 * Destructive method that changes the passed in array, asciis.
 *
 * @param asciis String[] to be sorted
 * @param start int for where to start sorting in this method (includes String at start)
 * @param end int for where to end sorting in this method (does not include String at end)
 * @param index the index of the character the method is currently sorting on
 *
 **/
private static void sortHelperMSD(String[] asciis, int start, int end, int index) {
    // Optional MSD helper method for optional MSD radix sort
    if (index >= maxLength || start >= end + 1) { return; }
    String[] asciisSelect = new String[end - start];
    System.arraycopy(asciis, start + 0, asciisSelect, 0, asciisSelect.length);

    // gather all the counts for each value
    int[] counts = new int[257];
    for (String i : asciisSelect) {
        counts[getCharAsciiFromIndex(i, index)]++;
    }

    // calculate start position
    int[] starts = new int[257];
    int pos = 0;
    for (int i = 0; i < starts.length; i += 1) {
        starts[i] = pos;
        pos += counts[i];
    }
    int[] startsCopy = new int[257];
    System.arraycopy(starts, 0, startsCopy, 0, starts.length);

    String[] sorted = new String[asciisSelect.length];
    for (int i = 0; i < asciisSelect.length; i += 1) {
        String item = asciisSelect[i];
        int place = starts[getCharAsciiFromIndex(item, index)];
        sorted[place] = asciisSelect[i];
        starts[getCharAsciiFromIndex(item, index)] += 1;
    }
    // copy the sorted part to the original asciis
    System.arraycopy(sorted, 0, asciis, start, sorted.length);

    // use recursion to sort strings in every sorted subarray of asciis
    for (int i = 0; i < counts.length; i += 1) {
        if (counts[i] != 0) {
            sortHelperMSD(asciis, start + startsCopy[i], start + startsCopy[i] + counts[i], index + 1);
        }
    }
}

public static String[] sortMSD(String[] asciis) {
    for (String ascii : asciis) {
        maxLength = Math.max(ascii.length(), maxLength);
    }
    sortHelperMSD(asciis, 0, asciis.length, 0);
    return asciis;
}