Lecture Notes: 04-17 Data Races

Sum101

#include <stdio.h>
#include <assert.h>

// A billion.
const long TOP = 1000000000;

int
main(int _ac, char* _av[])
{
    printf("Summing numbers divisible by 101 from 0 to %ld.\n", TOP - 1);

    long sum = 0;
    for (long ii = 0; ii < TOP; ++ii) {
        if (ii % 101 == 0) {
            sum += ii;
        }
    }

    printf("Sum = %ld\n", sum);
    return 0;
}

Parallel version with threads (data race):

#include <stdio.h>
#include <pthread.h>

const long TOP = 1000000000;
const long NPP = TOP / 10;

long sum = 0;

void*
thread_main(void* arg)
{
    long pp = (long) arg;
    long i0 = NPP * pp;
    long iN = i0 + NPP;

    for (long ii = i0; ii < iN; ++ii) {
        if (ii % 101 == 0) {
            sum += ii;
        }
    }

    return 0;
}

int
main(int _ac, char* _av[])
{
    printf("Summing numbers divisible by 101 from 0 to %ld.\n", TOP - 1);

    pthread_t threads[10];

    for (long pp = 0; pp < 10; ++pp) {
        pthread_create(&threads[pp], 0, thread_main, (void*) pp);
    }

    for (int pp = 0; pp < 10; ++pp) {
        pthread_join(threads[pp], 0);
    }

    printf("Sum = %ld\n", sum);
    return 0;
}

Add a lock:

#include <stdio.h>
#include <pthread.h>
#include <semaphore.h>

const long TOP = 1000000000;
const long NPP = TOP / 10;

long sum = 0;
sem_t lock;

void*
thread_main(void* arg)
{
    long pp = (long) arg;
    long i0 = NPP * pp;
    long iN = i0 + NPP;

    for (long ii = i0; ii < iN; ++ii) {
        if (ii % 101 == 0) {
            sem_wait(&lock);
            sum += ii;
            sem_post(&lock);
        }
    }

    return 0;
}

int
main(int _ac, char* _av[])
{
    printf("Summing numbers divisible by 101 from 0 to %ld.\n", TOP - 1);

    sem_init(&lock, 0, 1);

    pthread_t threads[10];

    for (long pp = 0; pp < 10; ++pp) {
        pthread_create(&threads[pp], 0, thread_main, (void*) pp);
    }

    for (int pp = 0; pp < 10; ++pp) {
        pthread_join(threads[pp], 0);
    }

    printf("Sum = %ld\n", sum);

    sem_destroy(&lock);
    return 0;
}

Local sum (threads return local sum):

#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>

const long TOP = 1000000000;
const long NPP = TOP / 10;

void*
thread_main(void* arg)
{
    long pp = (long) arg;
    long i0 = NPP * pp;
    long iN = i0 + NPP;

    long* local_sum = malloc(sizeof(long));
    *local_sum = 0;

    for (long ii = i0; ii < iN; ++ii) {
        if (ii % 101 == 0) {
            *local_sum += ii;
        }
    }

    return local_sum;
}

int
main(int _ac, char* _av[])
{
    printf("Summing numbers divisible by 101 from 0 to %ld.\n", TOP - 1);

    pthread_t threads[10];

    for (long pp = 0; pp < 10; ++pp) {
        pthread_create(&threads[pp], 0, thread_main, (void*) pp);
    }

    long sum = 0;
    for (int pp = 0; pp < 10; ++pp) {
        void* ret;
        pthread_join(threads[pp], &ret);
        sum += *((long*) ret);
        free(ret);
    }

    printf("Sum = %ld\n", sum);
    return 0;
}

Deadlock example:

#include <stdio.h>
#include <pthread.h>
#include <semaphore.h>
#include <unistd.h>

sem_t aa;
sem_t bb;

void*
thread_a(void* arg)
{
    printf("In thread A\n");

    sem_wait(&aa);
    sleep(1);
    sem_wait(&bb);

    printf("Doing real work in thread A...\n");

    sem_post(&bb);
    sem_post(&aa);

    return 0;
}

void*
thread_b(void* arg)
{
    printf("In thread B\n");

    sem_wait(&bb);
    sleep(1);
    sem_wait(&aa);

    printf("Doing real work in thread B...\n");

    sem_post(&bb);
    sem_post(&aa);

    return 0;
}

int
main(int _ac, char* _av[])
{
    sem_init(&aa, 0, 1);
    sem_init(&bb, 0, 1);

    pthread_t ta, tb;

    pthread_create(&ta, 0, thread_a, 0);
    pthread_create(&tb, 0, thread_b, 0);

    pthread_join(ta, 0);
    pthread_join(tb, 0);

    sem_destroy(&aa);
    sem_destroy(&bb);
    return 0;
}

Virtual Memory and Threads
#

With threads, all memory is shared by default.
No need for mmap or special shared memory allocation.
Advantage: Allocating shared memory post-spawn is trivial.
Disadvantage: 100% data races on shared writable data.

Threads vs. Processes
#

We can spawn multiple processes with fork()
We can execute multiple threads within a single process.

Key difference: With threads, all memory is shared by default.

Advantage: Allocating shared memory post-spawn.
Disadvantage: 100% data races

History
#

Early days
#

Before multi-processor systems parallelism didn’t matter.
Concurrency was still useful though:
- Running multiple programs at once.
- Having multiple logical tasks happening within one program.
On Unix style systems, processes were commonly used for concurrency.
On early Windows / Mac systems, concurrency within a program was represented by cooperative threading:
- One thread could run at a time.
- To let other threads run, explicitly call yield()
- Some systems had an implicit yield when a thread blocked on I/O.
By the 90’s, systems had some sort of pre-emptive threading. This still didn’t work in parallel, but it would automatically schedule work between threads without explicit yield() calls.

Multiprocessors
#

Multiprocessor servers became widely available in the mid 90’s.
Windows and Solaris had decent parallel thread support.
Linux didn’t get fully functional threads until like 2002, so fork() was heavily optimized instead.
Result: Threads are much more efficient than processes on Windows.
Threads under Linux evolved from fork(), so the performance difference is small.
Multi-core desktop processors showed up around 2005, and suddenly parallelism became nessisary for performance.

Author

Nat Tuck

Virtual Memory and Threads #

Threads vs. Processes #

History #

Early days #

Multiprocessors #

Virtual Memory and Threads
#

Threads vs. Processes
#

History
#

Early days
#

Multiprocessors
#