Skip to main content
  1. /classes/
  2. Classes, Spring 2026/
  3. CS 4310 Spring 2026: Course Site/

Lecture Notes: 03-02 Data Races

·872 words·5 mins·

Sum101

#include <stdio.h>
#include <assert.h>

// A billion.
const long TOP = 1000000000;

int
main(int _ac, char* _av[])
{
    printf("Summing numbers divisible by 101 from 0 to %ld.\n", TOP - 1);

    long sum = 0;
    for (long ii = 0; ii < TOP; ++ii) {
        if (ii % 101 == 0) {
            sum += ii;
        }
    }

    printf("Sum = %ld\n", sum);
    return 0;
}

Parallel version

#include <stdio.h>
#include <assert.h>

#include <sys/mman.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>

int
main(int _ac, char* _av[])
{
    // A billion.
    const long TOP = 1000000000;
    const long NPP = TOP / 10;

    printf("Summing numbers divisible by 101 from 0 to %ld.\n", TOP - 1);

    long* sum = mmap(0, sizeof(long), PROT_READ|PROT_WRITE, 
                     MAP_SHARED|MAP_ANONYMOUS, -1, 0);

    pid_t kids[10];

    for (int pp = 0; pp < 10; ++pp) {
        if ((kids[pp] = fork())) {
            // do nothing
        }
        else {
            int i0 = NPP*pp;
            int iN = NPP*pp + NPP;

            for (int ii = i0; ii < iN; ++ii) {
                if (ii % 101 == 0) {
                    *sum += ii;
                }
            }

            munmap(sum, sizeof(long));
            exit(0);
        }
    }

    for (long pp = 0; pp < 10; ++pp) {
        waitpid(kids[pp], 0, 0);
    }

    printf("Sum = %ld\n", *sum);

    munmap(sum, sizeof(long));
    return 0;
}

Add a lock:

#include <stdio.h>
#include <assert.h>
#include <sys/mman.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>

#include <semaphore.h>

void*
malloc_shared(size_t size)
{
    return mmap(0, size, PROT_READ|PROT_WRITE, 
                MAP_SHARED|MAP_ANONYMOUS, -1, 0);
}

void
free_shared(void* ptr, size_t size)
{
    munmap(ptr, size);
}

int
main(int _ac, char* _av[])
{
    // A billion.
    const long TOP = 1000000000;
    const long NPP = TOP / 10;

    printf("Summing numbers divisible by 101 from 0 to %ld.\n", TOP - 1);

    long* sum = malloc_shared(sizeof(long));

    sem_t* lock = malloc_shared(sizeof(sem_t));
    sem_init(lock, 1, 1);
    // Semaphores?

    pid_t kids[10];

    for (int pp = 0; pp < 10; ++pp) {
        if ((kids[pp] = fork())) {
            // do nothing
        }
        else {
            int i0 = NPP*pp;
            int iN = NPP*pp + NPP;

            for (int ii = i0; ii < iN; ++ii) {
                // try wait here
                if (ii % 101 == 0) {
                    sem_wait(lock);
                    *sum += ii;
                    sem_post(lock);
                }
                // and post here
            }

            free_shared(sum, sizeof(long));
            free_shared(lock, sizeof(sem_t));
            exit(0);
        }
    }

    for (long pp = 0; pp < 10; ++pp) {
        waitpid(kids[pp], 0, 0);
    }

    printf("Sum = %ld\n", *sum);

    free_shared(sum, sizeof(long));
    free_shared(lock, sizeof(sem_t));
    return 0;
}

Real work

#include <stdio.h>
#include <assert.h>
#include <sys/mman.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>

#include <semaphore.h>

void*
malloc_shared(size_t size)
{
    return mmap(0, size, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0);
}

void
free_shared(void* ptr, size_t size)
{
    munmap(ptr, size);
}

int
main(int _ac, char* _av[])
{
    sem_t* locks = malloc_shared(2 * sizeof(sem_t));
    sem_t* aa = &(locks[0]);
    sem_t* bb = &(locks[1]);

    sem_init(aa, 1, 1);
    sem_init(bb, 1, 1);

    int cpid;
    if ((cpid = fork())) {
        printf("In parent\n");

        sem_wait(aa);
        sleep(1);
        sem_wait(bb);

        printf("Doing real work in parent...");

        sem_post(bb);
        sem_post(aa);

        waitpid(cpid, 0, 0);
    }
    else {
        printf("In child\n");

        sem_wait(bb);
        sleep(1);
        sem_wait(aa);

        printf("Doing real work in child...\n");

        sem_post(bb);
        sem_post(aa);
    }

    free_shared(locks, sizeof(long));
    return 0;
}

Virtual Memory and Fork: A Review
#

  • Draw the virtual memory diagram.
  • Allocate some shared memory.
  • Fork.
  • Point out that shared memory is shared, and non-shared writable memory soon isn’t.

Introducing Threads
#

// create.c
#include <stdio.h>
#include <pthread.h>
#include <assert.h>

#define NN 10

void*
thread_main(void* thread_arg)
{
    int xx = *((int*)thread_arg);
    printf("thread %d: We're in a thread.\n", xx);
    *((int*)thread_arg) += xx;
    return thread_arg;
}

int
main(int _argc, char* _argv[])
{
    int nums[NN];
    int rv;
    pthread_t threads[NN];

    printf("main: Starting %d threads.\n", NN);

    for (int ii = 0; ii < NN; ++ii) {
        nums[ii] = ii;

        rv = pthread_create(&(threads[ii]), 0, thread_main, &(nums[ii]));
        assert(rv == 0);
    }

    printf("main: Started %d threads.\n", NN);

    for (int ii = 0; ii < NN; ++ii) {
        void* ret;
        rv = pthread_join(threads[ii], &ret);

        int yy = *((int*) ret);
        printf("main: Joined thread %d, rv = %d.\n", ii, yy);
    }

    printf("main: All threads joined.\n");

    return 0;
}
  • show create.c
  • Discuss how threads change the virtual memory story.

Threads vs. Processes
#

  • We can spawn multiple processes with fork()
  • We can execute multiple threads within a single process.

Key difference: With threads, all memory is shared by default.

  • Advantage: Allocating shared memory post-spawn.
  • Disadvantage: 100% data races

History
#

Early days
#

  • Before multi-processor systems parallelism didn’t matter.
  • Concurrency was still useful though:
    • Running multiple programs at once.
    • Having multiple logical tasks happening within one program.
  • On Unix style systems, processes were commonly used for concurrency.
  • On early Windows / Mac systems, concurrency within a program was represented by cooperative threading:
    • One thread could run at a time.
    • To let other threads run, explicitly call yield()
    • Some systems had an implicit yield when a thread blocked on I/O.
  • By the 90’s, systems had some sort of pre-emptive threading. This still didn’t work in parallel, but it would automatically schedule work between threads without explicit yield() calls.

Multiprocessors
#

  • Multiprocessor servers became widely available in the mid 90’s.
  • Windows and Solaris had decent parallel thread support.
  • Linux didn’t get fully functional threads until like 2002, so fork() was heavily optimized instead.
  • Result: Threads are much more efficient than processes on Windows.
  • Threads under Linux evolved from fork(), so the performance difference is small.
  • Multi-core desktop processors showed up around 2005, and suddenly parallelism became nessisary for performance.
Nat Tuck
Author
Nat Tuck