Lecture Notes: 23 Project One
··2 mins
Strategies for Project 1 #
Let’s look at the starter code #
Three programs:
- ivec_main.c
- list_main.c
- frag_main.c
We’re supposeded to write an allocator that makes the first two programs go fast. The third program just checks for worst-case memory usage.
So let’s inspect what these two programs we want to optimize are actually doing:
// opt-malloc
#include "xmalloc.h"
#include <stdlib.h>
#include <stdio.h>
void*
xmalloc(size_t bytes)
{
// How big are the allocations?
printf("size = %ld\n", bytes);
// Just delegate to sysmalloc for now.
return malloc(bytes);
}
void
xfree(void* ptr)
{
free(ptr);
}
void*
xrealloc(void* prev, size_t bytes)
{
return realloc(prev, bytes);
}
Now let’s run both programs and see what we get for sizes.
$ ./collatz-list-opt 100
$ ./collatz-ivec-opt 100
The list program only does one size of allocations, so let’s try optimizing for that.
The simplest possible allocator with an optimization for that one size:
#include "xmalloc.h"
#include <sys/mman.h>
#include <string.h>
typedef struct small_block {
size_t size;
struct small_block* next;
size_t _unused;
} small_block;
// https://gcc.gnu.org/onlinedocs/gcc/Thread-Local.html
static __thread small_block* smalls = 0;
void
small_free(void* ptr)
{
small_block* block = ptr - 8;
block->next = smalls;
smalls = block;
}
void*
small_alloc()
{
if (smalls == 0) {
small_block* page = xmalloc(4088);
for (int ii = 0; ii < 170; ++ii) {
small_free(&(page[ii]));
}
}
small_block* block = smalls;
smalls = smalls->next;
block->size = 24;
return &(block->next);
}
void*
xmalloc(size_t bytes)
{
// Space for header
bytes += sizeof(size_t);
// small allocations
if (bytes <= 24) {
return small_alloc();
}
// big allocations
size_t* big = mmap(0, bytes, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
*big = bytes;
return (void*)(big + 1);
}
void
xfree(void* ptr)
{
size_t* bytes = ((size_t*)ptr) - 1;
size_t sz = *bytes;
if (sz <= 24) {
small_free(ptr);
}
else {
munmap(bytes, sz);
}
}
void*
xrealloc(void* prev, size_t bytes)
{
void* next = xmalloc(bytes);
memcpy(next, prev, bytes);
xfree(prev);
return next;
}
Now let’s profile it.
To set up profiling:
- Add -pg to CFLAGS
- ./collatz-list-opt 100000
- gprof ./collatz-list-opt
- Look at the flat profile; ignore the rest.
What other optimizations have people considered?