Sum101
#include <stdio.h>
#include <assert.h>
// A billion.
const long TOP = 1000000000;
int
main(int _ac, char* _av[])
{
printf("Summing numbers divisible by 101 from 0 to %ld.\n", TOP - 1);
long sum = 0;
for (long ii = 0; ii < TOP; ++ii) {
if (ii % 101 == 0) {
sum += ii;
}
}
printf("Sum = %ld\n", sum);
return 0;
}
Parallel version
#include <stdio.h>
#include <assert.h>
#include <sys/mman.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>
int
main(int _ac, char* _av[])
{
// A billion.
const long TOP = 1000000000;
const long NPP = TOP / 10;
printf("Summing numbers divisible by 101 from 0 to %ld.\n", TOP - 1);
long* sum = mmap(0, sizeof(long), PROT_READ|PROT_WRITE,
MAP_SHARED|MAP_ANONYMOUS, -1, 0);
pid_t kids[10];
for (int pp = 0; pp < 10; ++pp) {
if ((kids[pp] = fork())) {
// do nothing
}
else {
int i0 = NPP*pp;
int iN = NPP*pp + NPP;
for (int ii = i0; ii < iN; ++ii) {
if (ii % 101 == 0) {
*sum += ii;
}
}
munmap(sum, sizeof(long));
exit(0);
}
}
for (long pp = 0; pp < 10; ++pp) {
waitpid(kids[pp], 0, 0);
}
printf("Sum = %ld\n", *sum);
munmap(sum, sizeof(long));
return 0;
}
Add a lock:
#include <stdio.h>
#include <assert.h>
#include <sys/mman.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <semaphore.h>
void*
malloc_shared(size_t size)
{
return mmap(0, size, PROT_READ|PROT_WRITE,
MAP_SHARED|MAP_ANONYMOUS, -1, 0);
}
void
free_shared(void* ptr, size_t size)
{
munmap(ptr, size);
}
int
main(int _ac, char* _av[])
{
// A billion.
const long TOP = 1000000000;
const long NPP = TOP / 10;
printf("Summing numbers divisible by 101 from 0 to %ld.\n", TOP - 1);
long* sum = malloc_shared(sizeof(long));
sem_t* lock = malloc_shared(sizeof(sem_t));
sem_init(lock, 1, 1);
// Semaphores?
pid_t kids[10];
for (int pp = 0; pp < 10; ++pp) {
if ((kids[pp] = fork())) {
// do nothing
}
else {
int i0 = NPP*pp;
int iN = NPP*pp + NPP;
for (int ii = i0; ii < iN; ++ii) {
// try wait here
if (ii % 101 == 0) {
sem_wait(lock);
*sum += ii;
sem_post(lock);
}
// and post here
}
free_shared(sum, sizeof(long));
free_shared(lock, sizeof(sem_t));
exit(0);
}
}
for (long pp = 0; pp < 10; ++pp) {
waitpid(kids[pp], 0, 0);
}
printf("Sum = %ld\n", *sum);
free_shared(sum, sizeof(long));
free_shared(lock, sizeof(sem_t));
return 0;
}
Real work
#include <stdio.h>
#include <assert.h>
#include <sys/mman.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <semaphore.h>
void*
malloc_shared(size_t size)
{
return mmap(0, size, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0);
}
void
free_shared(void* ptr, size_t size)
{
munmap(ptr, size);
}
int
main(int _ac, char* _av[])
{
sem_t* locks = malloc_shared(2 * sizeof(sem_t));
sem_t* aa = &(locks[0]);
sem_t* bb = &(locks[1]);
sem_init(aa, 1, 1);
sem_init(bb, 1, 1);
int cpid;
if ((cpid = fork())) {
printf("In parent\n");
sem_wait(aa);
sleep(1);
sem_wait(bb);
printf("Doing real work in parent...");
sem_post(bb);
sem_post(aa);
waitpid(cpid, 0, 0);
}
else {
printf("In child\n");
sem_wait(bb);
sleep(1);
sem_wait(aa);
printf("Doing real work in child...\n");
sem_post(bb);
sem_post(aa);
}
free_shared(locks, sizeof(long));
return 0;
}
Virtual Memory and Fork: A Review #
- Draw the virtual memory diagram.
- Allocate some shared memory.
- Fork.
- Point out that shared memory is shared, and non-shared writable memory soon isn’t.
Introducing Threads #
// create.c
#include <stdio.h>
#include <pthread.h>
#include <assert.h>
#define NN 10
void*
thread_main(void* thread_arg)
{
int xx = *((int*)thread_arg);
printf("thread %d: We're in a thread.\n", xx);
*((int*)thread_arg) += xx;
return thread_arg;
}
int
main(int _argc, char* _argv[])
{
int nums[NN];
int rv;
pthread_t threads[NN];
printf("main: Starting %d threads.\n", NN);
for (int ii = 0; ii < NN; ++ii) {
nums[ii] = ii;
rv = pthread_create(&(threads[ii]), 0, thread_main, &(nums[ii]));
assert(rv == 0);
}
printf("main: Started %d threads.\n", NN);
for (int ii = 0; ii < NN; ++ii) {
void* ret;
rv = pthread_join(threads[ii], &ret);
int yy = *((int*) ret);
printf("main: Joined thread %d, rv = %d.\n", ii, yy);
}
printf("main: All threads joined.\n");
return 0;
}
- show create.c
- Discuss how threads change the virtual memory story.
Threads vs. Processes #
- We can spawn multiple processes with fork()
- We can execute multiple threads within a single process.
Key difference: With threads, all memory is shared by default.
- Advantage: Allocating shared memory post-spawn.
- Disadvantage: 100% data races
History #
Early days #
- Before multi-processor systems parallelism didn’t matter.
- Concurrency was still useful though:
- Running multiple programs at once.
- Having multiple logical tasks happening within one program.
- On Unix style systems, processes were commonly used for concurrency.
- On early Windows / Mac systems, concurrency within a program was represented
by cooperative threading:
- One thread could run at a time.
- To let other threads run, explicitly call yield()
- Some systems had an implicit yield when a thread blocked on I/O.
- By the 90’s, systems had some sort of pre-emptive threading. This still didn’t work in parallel, but it would automatically schedule work between threads without explicit yield() calls.
Multiprocessors #
- Multiprocessor servers became widely available in the mid 90’s.
- Windows and Solaris had decent parallel thread support.
- Linux didn’t get fully functional threads until like 2002, so fork() was heavily optimized instead.
- Result: Threads are much more efficient than processes on Windows.
- Threads under Linux evolved from fork(), so the performance difference is small.
- Multi-core desktop processors showed up around 2005, and suddenly parallelism became nessisary for performance.