Sum101
#include <stdio.h>
#include <assert.h>
// A billion.
const long TOP = 1000000000;
int
main(int _ac, char* _av[])
{
printf("Summing numbers divisible by 101 from 0 to %ld.\n", TOP - 1);
long sum = 0;
for (long ii = 0; ii < TOP; ++ii) {
if (ii % 101 == 0) {
sum += ii;
}
}
printf("Sum = %ld\n", sum);
return 0;
}
Parallel version with threads (data race):
#include <stdio.h>
#include <pthread.h>
const long TOP = 1000000000;
const long NPP = TOP / 10;
long sum = 0;
void*
thread_main(void* arg)
{
long pp = (long) arg;
long i0 = NPP * pp;
long iN = i0 + NPP;
for (long ii = i0; ii < iN; ++ii) {
if (ii % 101 == 0) {
sum += ii;
}
}
return 0;
}
int
main(int _ac, char* _av[])
{
printf("Summing numbers divisible by 101 from 0 to %ld.\n", TOP - 1);
pthread_t threads[10];
for (long pp = 0; pp < 10; ++pp) {
pthread_create(&threads[pp], 0, thread_main, (void*) pp);
}
for (int pp = 0; pp < 10; ++pp) {
pthread_join(threads[pp], 0);
}
printf("Sum = %ld\n", sum);
return 0;
}
Add a lock:
#include <stdio.h>
#include <pthread.h>
#include <semaphore.h>
const long TOP = 1000000000;
const long NPP = TOP / 10;
long sum = 0;
sem_t lock;
void*
thread_main(void* arg)
{
long pp = (long) arg;
long i0 = NPP * pp;
long iN = i0 + NPP;
for (long ii = i0; ii < iN; ++ii) {
if (ii % 101 == 0) {
sem_wait(&lock);
sum += ii;
sem_post(&lock);
}
}
return 0;
}
int
main(int _ac, char* _av[])
{
printf("Summing numbers divisible by 101 from 0 to %ld.\n", TOP - 1);
sem_init(&lock, 0, 1);
pthread_t threads[10];
for (long pp = 0; pp < 10; ++pp) {
pthread_create(&threads[pp], 0, thread_main, (void*) pp);
}
for (int pp = 0; pp < 10; ++pp) {
pthread_join(threads[pp], 0);
}
printf("Sum = %ld\n", sum);
sem_destroy(&lock);
return 0;
}
Local sum (threads return local sum):
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
const long TOP = 1000000000;
const long NPP = TOP / 10;
void*
thread_main(void* arg)
{
long pp = (long) arg;
long i0 = NPP * pp;
long iN = i0 + NPP;
long* local_sum = malloc(sizeof(long));
*local_sum = 0;
for (long ii = i0; ii < iN; ++ii) {
if (ii % 101 == 0) {
*local_sum += ii;
}
}
return local_sum;
}
int
main(int _ac, char* _av[])
{
printf("Summing numbers divisible by 101 from 0 to %ld.\n", TOP - 1);
pthread_t threads[10];
for (long pp = 0; pp < 10; ++pp) {
pthread_create(&threads[pp], 0, thread_main, (void*) pp);
}
long sum = 0;
for (int pp = 0; pp < 10; ++pp) {
void* ret;
pthread_join(threads[pp], &ret);
sum += *((long*) ret);
free(ret);
}
printf("Sum = %ld\n", sum);
return 0;
}
Deadlock example:
#include <stdio.h>
#include <pthread.h>
#include <semaphore.h>
#include <unistd.h>
sem_t aa;
sem_t bb;
void*
thread_a(void* arg)
{
printf("In thread A\n");
sem_wait(&aa);
sleep(1);
sem_wait(&bb);
printf("Doing real work in thread A...\n");
sem_post(&bb);
sem_post(&aa);
return 0;
}
void*
thread_b(void* arg)
{
printf("In thread B\n");
sem_wait(&bb);
sleep(1);
sem_wait(&aa);
printf("Doing real work in thread B...\n");
sem_post(&bb);
sem_post(&aa);
return 0;
}
int
main(int _ac, char* _av[])
{
sem_init(&aa, 0, 1);
sem_init(&bb, 0, 1);
pthread_t ta, tb;
pthread_create(&ta, 0, thread_a, 0);
pthread_create(&tb, 0, thread_b, 0);
pthread_join(ta, 0);
pthread_join(tb, 0);
sem_destroy(&aa);
sem_destroy(&bb);
return 0;
}
Virtual Memory and Threads #
- With threads, all memory is shared by default.
- No need for mmap or special shared memory allocation.
- Advantage: Allocating shared memory post-spawn is trivial.
- Disadvantage: 100% data races on shared writable data.
Threads vs. Processes #
- We can spawn multiple processes with fork()
- We can execute multiple threads within a single process.
Key difference: With threads, all memory is shared by default.
- Advantage: Allocating shared memory post-spawn.
- Disadvantage: 100% data races
History #
Early days #
- Before multi-processor systems parallelism didn’t matter.
- Concurrency was still useful though:
- Running multiple programs at once.
- Having multiple logical tasks happening within one program.
- On Unix style systems, processes were commonly used for concurrency.
- On early Windows / Mac systems, concurrency within a program was represented
by cooperative threading:
- One thread could run at a time.
- To let other threads run, explicitly call yield()
- Some systems had an implicit yield when a thread blocked on I/O.
- By the 90’s, systems had some sort of pre-emptive threading. This still didn’t work in parallel, but it would automatically schedule work between threads without explicit yield() calls.
Multiprocessors #
- Multiprocessor servers became widely available in the mid 90’s.
- Windows and Solaris had decent parallel thread support.
- Linux didn’t get fully functional threads until like 2002, so fork() was heavily optimized instead.
- Result: Threads are much more efficient than processes on Windows.
- Threads under Linux evolved from fork(), so the performance difference is small.
- Multi-core desktop processors showed up around 2005, and suddenly parallelism became nessisary for performance.