Skip to main content
  1. /classes/
  2. Classes, Spring 2026/
  3. CS 2470 Spring 2026: Course Site/

Lecture Notes: 04-17 Data Races

·814 words·4 mins·

Sum101

#include <stdio.h>
#include <assert.h>

// A billion.
const long TOP = 1000000000;

int
main(int _ac, char* _av[])
{
    printf("Summing numbers divisible by 101 from 0 to %ld.\n", TOP - 1);

    long sum = 0;
    for (long ii = 0; ii < TOP; ++ii) {
        if (ii % 101 == 0) {
            sum += ii;
        }
    }

    printf("Sum = %ld\n", sum);
    return 0;
}

Parallel version with threads (data race):

#include <stdio.h>
#include <pthread.h>

const long TOP = 1000000000;
const long NPP = TOP / 10;

long sum = 0;

void*
thread_main(void* arg)
{
    long pp = (long) arg;
    long i0 = NPP * pp;
    long iN = i0 + NPP;

    for (long ii = i0; ii < iN; ++ii) {
        if (ii % 101 == 0) {
            sum += ii;
        }
    }

    return 0;
}

int
main(int _ac, char* _av[])
{
    printf("Summing numbers divisible by 101 from 0 to %ld.\n", TOP - 1);

    pthread_t threads[10];

    for (long pp = 0; pp < 10; ++pp) {
        pthread_create(&threads[pp], 0, thread_main, (void*) pp);
    }

    for (int pp = 0; pp < 10; ++pp) {
        pthread_join(threads[pp], 0);
    }

    printf("Sum = %ld\n", sum);
    return 0;
}

Add a lock:

#include <stdio.h>
#include <pthread.h>
#include <semaphore.h>

const long TOP = 1000000000;
const long NPP = TOP / 10;

long sum = 0;
sem_t lock;

void*
thread_main(void* arg)
{
    long pp = (long) arg;
    long i0 = NPP * pp;
    long iN = i0 + NPP;

    for (long ii = i0; ii < iN; ++ii) {
        if (ii % 101 == 0) {
            sem_wait(&lock);
            sum += ii;
            sem_post(&lock);
        }
    }

    return 0;
}

int
main(int _ac, char* _av[])
{
    printf("Summing numbers divisible by 101 from 0 to %ld.\n", TOP - 1);

    sem_init(&lock, 0, 1);

    pthread_t threads[10];

    for (long pp = 0; pp < 10; ++pp) {
        pthread_create(&threads[pp], 0, thread_main, (void*) pp);
    }

    for (int pp = 0; pp < 10; ++pp) {
        pthread_join(threads[pp], 0);
    }

    printf("Sum = %ld\n", sum);

    sem_destroy(&lock);
    return 0;
}

Local sum (threads return local sum):

#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>

const long TOP = 1000000000;
const long NPP = TOP / 10;

void*
thread_main(void* arg)
{
    long pp = (long) arg;
    long i0 = NPP * pp;
    long iN = i0 + NPP;

    long* local_sum = malloc(sizeof(long));
    *local_sum = 0;

    for (long ii = i0; ii < iN; ++ii) {
        if (ii % 101 == 0) {
            *local_sum += ii;
        }
    }

    return local_sum;
}

int
main(int _ac, char* _av[])
{
    printf("Summing numbers divisible by 101 from 0 to %ld.\n", TOP - 1);

    pthread_t threads[10];

    for (long pp = 0; pp < 10; ++pp) {
        pthread_create(&threads[pp], 0, thread_main, (void*) pp);
    }

    long sum = 0;
    for (int pp = 0; pp < 10; ++pp) {
        void* ret;
        pthread_join(threads[pp], &ret);
        sum += *((long*) ret);
        free(ret);
    }

    printf("Sum = %ld\n", sum);
    return 0;
}

Deadlock example:

#include <stdio.h>
#include <pthread.h>
#include <semaphore.h>
#include <unistd.h>

sem_t aa;
sem_t bb;

void*
thread_a(void* arg)
{
    printf("In thread A\n");

    sem_wait(&aa);
    sleep(1);
    sem_wait(&bb);

    printf("Doing real work in thread A...\n");

    sem_post(&bb);
    sem_post(&aa);

    return 0;
}

void*
thread_b(void* arg)
{
    printf("In thread B\n");

    sem_wait(&bb);
    sleep(1);
    sem_wait(&aa);

    printf("Doing real work in thread B...\n");

    sem_post(&bb);
    sem_post(&aa);

    return 0;
}

int
main(int _ac, char* _av[])
{
    sem_init(&aa, 0, 1);
    sem_init(&bb, 0, 1);

    pthread_t ta, tb;

    pthread_create(&ta, 0, thread_a, 0);
    pthread_create(&tb, 0, thread_b, 0);

    pthread_join(ta, 0);
    pthread_join(tb, 0);

    sem_destroy(&aa);
    sem_destroy(&bb);
    return 0;
}

Virtual Memory and Threads
#

  • With threads, all memory is shared by default.
  • No need for mmap or special shared memory allocation.
  • Advantage: Allocating shared memory post-spawn is trivial.
  • Disadvantage: 100% data races on shared writable data.

Threads vs. Processes
#

  • We can spawn multiple processes with fork()
  • We can execute multiple threads within a single process.

Key difference: With threads, all memory is shared by default.

  • Advantage: Allocating shared memory post-spawn.
  • Disadvantage: 100% data races

History
#

Early days
#

  • Before multi-processor systems parallelism didn’t matter.
  • Concurrency was still useful though:
    • Running multiple programs at once.
    • Having multiple logical tasks happening within one program.
  • On Unix style systems, processes were commonly used for concurrency.
  • On early Windows / Mac systems, concurrency within a program was represented by cooperative threading:
    • One thread could run at a time.
    • To let other threads run, explicitly call yield()
    • Some systems had an implicit yield when a thread blocked on I/O.
  • By the 90’s, systems had some sort of pre-emptive threading. This still didn’t work in parallel, but it would automatically schedule work between threads without explicit yield() calls.

Multiprocessors
#

  • Multiprocessor servers became widely available in the mid 90’s.
  • Windows and Solaris had decent parallel thread support.
  • Linux didn’t get fully functional threads until like 2002, so fork() was heavily optimized instead.
  • Result: Threads are much more efficient than processes on Windows.
  • Threads under Linux evolved from fork(), so the performance difference is small.
  • Multi-core desktop processors showed up around 2005, and suddenly parallelism became nessisary for performance.
Nat Tuck
Author
Nat Tuck