Question

Bubble sort does not sort as expected

I have a problem with a C program that is supposed to sort the environmental variables by order based on their name. However, it doesn't seem to be sorting them correctly, as the strcmp is supposed to sort the variables in order by the ASCII value of the first character (a variable beginning with D goes before a variable beginning with H).

Here is the code I used:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

void swap(char **a, char **b) {
        char *temp = *a;
        *a = *b;
        *b = temp;
}

void sort (char *envp[]) {
        char *name1, *name2, *token;
        for (int i = 0; envp[i] != NULL; i++) {
                for (int j = 0; envp[j] != NULL; j++) {
                        token = strdup(envp[i]);
                        name1 = strtok(token, "=");
                        free(token);

                        token = strdup(envp[j]);
                        name2 = strtok(token, "=");
                        free(token);

                        if (strcmp(name1, name2) > 0) {
                                swap(&envp[i], &envp[j]);
                        }
                }
        }
}

int main(int argc, char *argv[], char *envp[]) {
        printf("Before sorting:\n");
        for (int i = 0; envp[i] != NULL; i++) {
                printf("%s\n", envp[i]);
        }

        sort(envp);

        printf("After sorting:\n");
        for (int i = 0; envp[i] != NULL; i++) {
                printf("%s\n", envp[i]);
        }

        return 0;
}

Here is the "after sorting" result:

LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=00:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=00;36:*.au=00;36:*.flac=00;36:*.m4a=00;36:*.mid=00;36:*.midi=00;36:*.mka=00;36:*.mp3=00;36:*.mpc=00;36:*.ogg=00;36:*.ra=00;36:*.wav=00;36:*.oga=00;36:*.opus=00;36:*.spx=00;36:*.xspf=00;36:
SSH_AUTH_SOCK=/tmp/ssh-sjl03UGnNi/agent.17850
PWD=/home/sgl24
LOGNAME=sgl24
XDG_SESSION_TYPE=tty
MOTD_SHOWN=pam
HOME=/home/sgl24
LANG=C.UTF-8
SHELL=/bin/bash
SSH_CONNECTION=35.235.244.34 36427 10.128.0.7 22
LESSCLOSE=/usr/bin/lesspipe %s %s
XDG_SESSION_CLASS=user
TERM=xterm-256color
LESSOPEN=| /usr/bin/lesspipe %s
USER=sgl24
SHLVL=1
XDG_SESSION_ID=94
XDG_RUNTIME_DIR=/run/user/1001
SSH_CLIENT=35.235.244.34 36427 22
XDG_DATA_DIRS=/usr/local/share:/usr/share:/var/lib/snapd/desktop
PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin
DBUS_SESSION_BUS_ADDRESS=unix:path=/run/user/1001/bus
SSH_TTY=/dev/pts/1
_=./a.out

Can you figure out how to make the program sort the variables in the correct order?

 3  107  3
1 Jan 1970

Solution

 4

One problem is here:

token = strdup(envp[i]);
name1 = strtok(token, "=");   // now name points into the token string,
                              // more precisely at the beginning of token
free(token);                  // here you free toke, so the memory
                              // points is no longer valid

...but after you use name1 which now points also to invalid memory.

Read the documentation of strtok closely.

You probably want this:

void sort(char* envp[]) {
  char *name1, *name2, *token1, *token2;
  for (int i = 0; envp[i] != NULL; i++) {
    for (int j = 0; envp[j] != NULL; j++) {
      token1 = strdup(envp[i]);
      name1 = strtok(token1, "=");

      token2 = strdup(envp[j]);
      name2 = strtok(token2, "=");

      if (strcmp(name1, name2) > 0) {
        swap(&envp[i], &envp[j]);
      }

      free(token1);  // free token pointers only once you're done with them
      free(token2);
    }
  }
}

Or better declare variables as you use them:

void sort(char* envp[]) {
  for (int i = 0; envp[i] != NULL; i++) {
    for (int j = 0; envp[j] != NULL; j++) {
      char *token1 = strdup(envp[i]);
      char *name1 = strtok(token1, "=");

      char *token2 = strdup(envp[j]);
      char *name2 = strtok(token2, "=");

      if (strcmp(name1, name2) > 0) {
        swap(&envp[i], &envp[j]);
      }

      free(token1);
      free(token2);
    }
  }
}

Please be aware that this code has still problems:

  1. on certain platforms you may not be allowed to modify envp directly. It works on my platform (Windows) but it might not work on your platform.
  2. The code is not very efficient because there are a lot of strdups and frees, you'd be better off writing your own kind of strcmp which considers = as string terminator, then you don't need all the strdup/free.
2024-07-02
Jabberwocky

Solution

 2

Using qsort() is simplest:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

extern char ** environ;

int pstrcmp( const void * a, const void * b )
{
    return strcmp( *(const char **)a, *(const char **)b );
}

size_t count_strings( char ** strings )
{
    char ** p = strings;
    while (*p) ++p;
    return p - strings;
}

int main(void)
{
    size_t environ_size = count_strings( environ );
    qsort( environ, environ_size, sizeof(char *), pstrcmp );

    printf( "%zu environment variables:\n", environ_size );
    for (char ** env = environ;  *env;  ++env)
    {
        char * p = strchr( *env, '=' );
        printf( "  %.*s\n", (int)(p - *env), *env );
    }

    return 0;
}

BTW, it is unclear whether environ is writable (but I think it typically is, just like argv[]).

BTW, on Windows you need to use _environ (with the leading underscore).

2024-07-02
D&#250;thomhas

Solution

 1

There are some small errors in the sort routine.

In the following, the changes needed to getting to work.

The main issue was that token was freed but name1 was used AFTER that token was freed, so name1 was pointing to a freed heap block, and its value was no more correct.

It is better to keep a couple of token pointers and free them after that name1 and name2 have been used.

Then there is some optimization with the loop indexes.

// better to know how many loops to do
void sort (char *envp[], size_t n) {
    for (int i = 0; i < n - 1 && envp[i] != NULL; i++) {
        // the inner loop index upper limits depend upon the outer loop
        for (int j = 0; j < n - i - 1 && envp[j] != NULL; j++) {

            // you have to compare elements j and j + 1

            // you have to use two distinct token pointers: you cannot free token1 and use the name1 pointer derived from it after the free!
            char * token1 = strdup(envp[j]);
            char * name1 = token1 ? strtok(token1, "=") : NULL;

            char * token2 = strdup(envp[j + 1]);
            char * name2 = token2 ? strtok(token2, "=") : NULL;

            if (name1 && name2)
            {
                if (strcmp(name1, name2) > 0) {
                        swap(&envp[j], &envp[j + 1]);
                }

            }

            // free AFTER you used name1 and name2!
            if (token1)
            {
                free(token1);
            }
            if (token2)
            {
                free(token2);
            }
        }
    }
}
2024-07-02
michele sponchiado

Solution

 1

The (serious) issues revolving around prematurely freeing aside, this ...

        char *name1, *name2, *token;
        for (int i = 0; envp[i] != NULL; i++) {
                for (int j = 0; envp[j] != NULL; j++) {
                        token = strdup(envp[i]);
                        name1 = strtok(token, "=");
                        free(token);

                        token = strdup(envp[j]);
                        name2 = strtok(token, "=");
                        free(token);

                        if (strcmp(name1, name2) > 0) {
                                swap(&envp[i], &envp[j]);
                        }
                }
        }

... is not a correct sort, neither Bubble sort nor any other.

Note that both i and j range over the whole array. Assume that the premature frees were fixed, and consider this very simple input: char *example = { "B=", "A=" };. A run would go something like this:

  • at i == 0, j == 0: compare example[0] with example[0], necessarily finding them equal. No swap.

  • at i == 0, j == 1, compare example[0] with example[1], finding the former greater than the latter. It swaps, reordering the array to { "A=", "B=" }. But it's not done at that point.

  • at i == 1, j == 0, compare example[1] with example[0], finding the former greater than the latter. It swaps again, reordering the array back to { "B=", "A=" }.

  • at i == 1, j == 1: compare example[1] with example[1], necessarily finding them equal. No swap.

The final result is { "B=", "A=" }, the original, wrong order.

To sort correctly, input[i] and input[j] should be swapped only when both of the following are true:

  • input[i] compares greater than input[j], AND
  • i compares less than j.

Or to put it another way, elements should be swapped when the relative order of their values differs from the relative order of their indices.

Furthermore, note that your approach performs a bunch of unneeded comparisons: it compares each element to itself, and it compares each pair of indices twice. The main issue and those ancillary ones would all be solved by running the j loop over only the tail of the array following index i. That would be more like this:

    for (int i = 0; envp[i] != NULL; i++) {
        for (int j = i + 1; envp[j] != NULL; j++) {  // NOTE change to lower bound
            char *name1 = strdup(envp[i]);
            strtok(name1, "="); // The first token starts at the beginning of the string
            char *name2 = strdup(envp[i]);
            strtok(name2, "=");

            if (strcmp(name1, name2) > 0) {
                swap(&envp[i], &envp[j]);
            }

            free(name2);
            free(name1);
        }
    }

That's still not a Bubble sort, though. It's a Selection sort.

Moreover, I would avoid all that memory allocation and deallocation. It's expensive, too easy to mess up, and in this case, unnecessary. Compare to this:

    for (int i = 0; envp[i] != NULL; i++) {
        size_t namelen1 = strcspn(envp[i], "=");

        for (int j = i + 1; envp[j] != NULL; j++) {
            size_t namelen2 = strcspn(envp[j], "=");
            size_t min_length = (namelen1 <= namelen2) ? namelen1 : namelen2;
            int comparison = strncmp(envp[i], envp[j], min_length);

            if (comparison > 0 || (comparison == 0 && namelen2 < namelen1)) {
                swap(&envp[i], &envp[j]);
                namelen1 = namelen2;
            } 
        }
    }
2024-07-02
John Bollinger

Solution

 0

Your sort is badly implemented, bubble sorting requires you to make pass after pass by comparing one array element with the next You have two loops comparing each element with all others. What you have written is not bubble sort.

This is bubble sort:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

void bubble_sort (const char *array[])
{
    /* if array has 0 or 1 element, there's no need to sort,
     * just return, this step avoids the case of checking
     * repeatedly in the inner for below, as one array of 0 or
     * one elements will make the inner loop test to be extended
     * with array[left] && array[right] and repeat the test at each
     * loop pass. */
    if (!array[0] || !array[1]) return;

    /* array has at least two elements */
    int any_exchange_done;
    do {
        any_exchange_done = 0;

        /* left is the left index, right is the right one.  right is 
         * always one more than left, so we compare one array element with
         * the next */
        for (int left = 0, right = 1; array[right]; left++, right++) {
            if (strcmp(array[left], array[right]) > 0) { // exchange pointers.
                const char *temp = array[left];
                array[left] = array[right];
                array[right] = temp;
                any_exchange_done = 1; // we did a change!
            }
        }
    } while (any_exchange_done);
    // no changes in last pass
}

int main(int argc, const char *argv[], const char *envp[])
{
    printf("Before sorting:\n");
    for (int i = 0; envp[i] != NULL; i++) {
        printf("%s\n", envp[i]);
    }

    bubble_sort(envp);

    printf("After sorting:\n");
    for (int i = 0; envp[i] != NULL; i++) {
        printf("%s\n", envp[i]);
    }

    return 0;
}

Note: as you have been advised, not all C environments allow you to change envp array elements, so the best way to be portable is to make a copy of the array before calling sort(). (mine allows :) )

2024-07-04
Luis Colorado