/* Complete B-tree implementation
*
* Author: Remigiusz 'lRem' Modrzejewski <http://lrem.net/>
* Distributed under the terms of GNU GPL version 2, as found on:
* http://www.gnu.org/licenses/old-licenses/gpl-2.0.html
*/
#include <cstdio>
#include <cerrno>
#include <cassert>
#include <queue>
#include <map>
using namespace std;
#define DATAFILE "data"
#define TREEFILE "index"
#define MISCFILE "misc"
#define NODESIZE 64
#define RECORD_SIZE 15
#define T 31
#define HELP "Possible actions: (h)elp, (a)dd, (s)earch, (d)elete, (e)dit, \
(p)rint all, (q)uit\n"
struct record/*{{{*/
{
int numbers_count,
numbers[RECORD_SIZE];
void dump (void);
};
/*}}}*/
class node/*{{{*/
{
// Internals {{{
int n;
bool leaf;
int keys[NODESIZE],
sons[NODESIZE],
data[NODESIZE];
// }}}
public:
int offset,
father;
//{{{Methods - this is actual B-Tree implementation
void create(); // This is _not_ a constructor
pair<node*, int> search (int key); // Returns the data offset
void split(int index); // Parent is a field in node
void merge(node &source);
void insert(int key, int data);
void overflow(int key);
void remove(int key);
void search_dump (int key);
int dump (void);
void debug (void);
int validate (int bound);
//}}}
} *tree, dummy;
/*}}}*/
class cache/*{{{*/
{
// Internals {{{
struct entry
{
node data;
bool dirty;
};
FILE *index,
*data,
*misc;
int next_new_data,
next_new;
queue<int> free_data;
queue<int> free_nodes;
map<int, entry> storage;
int next_free();
//}}}
public:
// Methods - everything that touches files {{{
cache();
~cache();
void flush (void);
node& allocate (void);
node& read (int offset, int parent);
void write (int offset);
void free (node &n);
int allocate_record (void);
record& read_record (int offset);
void write_record (int offset, record &r);
void free_record (int offset);
//}}}
} disk;
/*}}}*/
int main (void)/*{{{*/
{
char choice = 'h';
int key, data;
record tmp;
while(choice != 'q')
{
switch(choice)
{
case 'h':/*{{{*/
printf(HELP);
break;/*}}}*/
case 'a':/*{{{*/
printf("Key: ");
scanf("%d", &key);
printf("Numbers count: ");
scanf("%d", &tmp.numbers_count);
for(int i = 0; i < tmp.numbers_count; i++)
{
printf("Number %d: ", i);
scanf("%d", &tmp.numbers[i]);
}
data = disk.allocate_record();
disk.write_record(data, tmp);
tree->insert(key, data);
break;/*}}}*/
case 'e':/*{{{*/
printf("Key: ");
scanf("%d", &key);
if(tree->search(key).second == -1)
break;
tree->remove(key);
scanf("%d", &key);
printf("Numbers count: ");
scanf("%d", &tmp.numbers_count);
for(int i = 0; i < tmp.numbers_count; i++)
{
printf("Number %d: ", i);
scanf("%d", &tmp.numbers[i]);
}
data = disk.allocate_record();
disk.write_record(data, tmp);
tree->insert(key, data);
break;/*}}}*/
case 'p':/*{{{*/
printf("Total records: %d\t",tree->dump());
break;/*}}}*/
case 's':/*{{{*/
printf("Key: ");
scanf("%d", &key);
tree->search_dump(key);
break;/*}}}*/
case 'd':/*{{{*/
printf("Key: ");
scanf("%d", &key);
tree->remove(key);
break;/*}}}*/
default:/*{{{*/
printf("Unknown choice\n");/*}}}*/
}
#ifdef VERBOSE
printf("VALIDATION BEGIN{{{\n");
tree->validate(-1);
printf("VALIDATION END}}}\n");
#else
#ifdef DEBUG
tree->validate(-1);
#endif
#endif
disk.flush();
printf("> ");
do
scanf("%c", &choice);
while(isspace(choice));
}
return 0;
}
/*}}}*/
//{{{ Global functions
void swap (int &a, int &b)
{
int tmp = a;
a = b;
b = tmp;
}
//}}}
//{{{ Methods of node
void node::create()/*{{{*/
{
leaf = true;
father = -1;
n = 0;
disk.write(offset);
}
/*}}}*/
pair<node*, int> node::search (int key)/*{{{*/
{
int i = 0;
while(i < n && keys[i] < key)
i++;
if(i <= n && keys[i] == key)
return make_pair(this, i);
if(leaf)
return make_pair(this, -1);
node &son = disk.read(sons[i], offset);
return son.search(key);
}
/*}}}*/
void node::split(int index)/*{{{*/
{
assert(n == 2*T-1);
// Make the new one similar to the old one
node &z = disk.allocate();
z.father = father;
z.leaf = leaf;
z.n = T-1;
// Move half of the keys to the new one
for(int j = T; j--; )
{
z.keys[j] = keys[j+T];
z.data[j] = data[j+T];
}
// If not leaf, move the subtrees
if(!leaf)
for(int j = T+1; j--; )
z.sons[j] = sons[j+T];
n = T-1;
// Move the last key from old son to father
// Note that the subtree stays where it was - it will be new post-n
node &x = disk.read(father, -1);
for(int j = x.n; j > index; j--)
{
x.sons[j+1] = x.sons[j];
x.keys[j+1] = x.keys[j];
x.data[j+1] = x.data[j];
}
x.keys[index+1] = x.keys[index];
x.data[index+1] = x.data[index];
x.sons[index+1] = z.offset;
x.keys[index] = keys[T-1];
x.data[index] = data[T-1];
x.n++;
disk.write(offset);
disk.write(x.offset);
disk.write(z.offset);
}
/*}}}*/
void node::merge(node &source)/*{{{*/
{
assert(source.n == T-1 && n == T);
assert(offset != source.offset);
assert(father == source.father);
assert(leaf == source.leaf);
//Unasserted assumption - we lack rightmost son
for(int i = 0; i < n; i++)
{
keys[n+i] = source.keys[i];
data[n+i] = source.data[i];
sons[n+i] = source.sons[i];
}
n=2*T-1;
}
/*}}}*/
void node::insert(int key, int new_data)/*{{{*/
{
#ifdef DEBUG
printf("node::insert(%d, %d)\n", key, new_data);
#endif
pair<node*, int> x = search(key);
if(x.second == -1)
{
assert(x.first->leaf);
int i = x.first->n;
for(; i && x.first->keys[i-1] > key; i--)
{
x.first->keys[i] = x.first->keys[i-1];
x.first->data[i] = x.first->data[i-1];
}
x.first->keys[i] = key;
x.first->data[i] = new_data;
x.first->n++;
disk.write(x.first->offset);
if(x.first->n==2*T-1)
x.first->overflow(key);
}
else
{
disk.free_record(new_data);
}
}
/*}}}*/
void node::overflow(int key)/*{{{*/
{
if(this == tree)
{
#ifdef DEBUG
printf("node::overflow(%d) - root\n", key);
#endif
node &s = disk.allocate();
s.leaf = false;
s.n = 0;
s.father = -1;
s.sons[0] = tree->offset;
tree->father = s.offset;
tree->split(0);
disk.write(tree->offset);
disk.write(s.offset);
tree = &s;
}
else
{
#ifdef DEBUG
printf("node::overflow(%d) - nonroot\n", key);
#endif
node &parent = disk.read(father, -1);
int i = parent.n-1;
while(i>=0 && parent.keys[i] > key)
i--;
i++;
node &y = i > 0 ? disk.read(parent.sons[i-1], -1) : dummy;
node &z = i < parent.n ? disk.read(parent.sons[i+1], -1): dummy;
if((y.n != dummy.n) && (y.n < 2*(T-1)))
{
// Append a key to y
y.keys[y.n] = parent.keys[i-1];
y.data[y.n] = parent.data[i-1];
// Move the orphaned subtree where it belongs
y.sons[y.n] = sons[0];
// Move a key from this into that place
parent.keys[i-1] = keys[0];
parent.data[i-1] = data[0];
for(int j = 0; j < n; j++)
{
keys[j] = keys[j+1];
data[j] = data[j+1];
sons[j] = sons[j+1];
}
n--;
// Write changes
disk.write(offset);
disk.write(y.offset);
disk.write(parent.offset);
}
else if((z.n != dummy.n) && (z.n < 2*(T-1)))
{
// Preppend a key to z
z.n++;
for(int j = z.n; j; j--)
{
z.keys[j] = z.keys[j-1];
z.data[j] = z.data[j-1];
z.sons[j] = z.sons[j-1];
}
z.keys[0] = parent.keys[i];
z.data[0] = parent.data[i];
// Move a key from this into that place
n--;
parent.keys[i] = keys[n];
parent.data[i] = data[n];
// Move the orphaned subtree where it belongs
z.sons[0] = sons[n+1];
// Write changes
disk.write(offset);
disk.write(z.offset);
disk.write(parent.offset);
}
else
split(i);
if(parent.n == 2*T-1)
parent.overflow(key);
}
}
/*}}}*/
#define checkroot()/*{{{*/\
{\
if(n == 0)\
{\
printf("Root going down\n");\
assert(this == tree);\
node &son = disk.read(sons[0], offset);\
son.father = -1;\
tree = &son;\
disk.free(*this);\
disk.write(son.offset);\
need_write = false;\
}\
}
/*}}}*/
void node::remove(int key)/*{{{*/
{
int index = 0;
bool need_write = true;
while(index < n && keys[index] < key)
index++;
if(leaf)
{ // 1. Leaf/*{{{*/
#ifdef DEBUG
printf("node::remove 1 (%d)\n", key);
#endif
// Simply delete k from x
if(index < n && keys[index] == key)
{
n--;
while(index < n)
{
disk.free_record(data[index]);
keys[index] = keys[index+1];
data[index] = data[index+1];
index++;
}
}
}/*}}}*/
else if(index < n && keys[index] == key)
{ // 2. Internal and k is in x/*{{{*/
node &y = disk.read(sons[index], offset);
node &z = disk.read(sons[index+1], offset);
if(y.n >= T)
{ // 2a/*{{{*/
#ifdef DEBUG
printf("node::remove 2a\n");
#endif
// Swap with predecesor
node *current = &y;
while(!current->leaf)
current = &disk.read(current->sons[current->n],
offset);
swap(keys[index], current->keys[current->n - 1]);
swap(data[index], current->data[current->n - 1]);
// Note this does not hurt reads count nor asymptotical
// computing time, as _all_ nodes just iterated will be
// subject to recursion.
// Recursively delete
checkroot();
y.remove(key);
}/*}}}*/
else if(z.n >= T)
{ // 2b/*{{{*/
#ifdef DEBUG
printf("node::remove 2b\n");
#endif
// 2a turned around
node *current = &z;
while(!current->leaf)
current = &disk.read(current->sons[0], offset);
swap(keys[index], current->keys[0]);
swap(data[index], current->data[0]);
checkroot();
z.remove(key);
}/*}}}*/
else
{ // 2c - both sons have T-1 keys/*{{{*/
#ifdef DEBUG
printf("node::remove 2c\n");
#endif
// Append k to y
y.keys[y.n] = keys[index];
y.data[y.n] = data[index];
y.n++;
// Remove k from x
n--;
while(index <= n)
{
keys[index] = keys[index+1];
data[index] = data[index+1];
sons[index] = sons[index+1];
index++;
}
// Append z to y
y.merge(z);
// Get rid of z
disk.free(z);
// Recurse
checkroot();
y.remove(key);
}/*}}}*/
}/*}}}*/
else
{ // 3. Internal and k is not in x/*{{{*/
node &son = disk.read(sons[index], offset);
if(son.n >= T)
{
#ifdef DEBUG
printf("node::remove 3 void\n");
#endif
need_write = false;
checkroot();
son.remove(key);
}
else
{
node &y = index > 0 ? disk.read(sons[index-1], offset)
: dummy;
node &z = index < n ? disk.read(sons[index+1], offset)
: dummy;
if(y.n >= T)
{ // 3a - borrow from left sibling/*{{{*/
#ifdef DEBUG
printf("node::remove 3a\n");
#endif
// Preppend a key to the son
son.n++;
for(int j = son.n; j; j--)
{
son.keys[j] = son.keys[j-1];
son.data[j] = son.data[j-1];
son.sons[j] = son.sons[j-1];
}
son.keys[0] = keys[index-1];
son.data[0] = data[index-1];
// Move a key from sibling into that place
y.n--;
keys[index-1] = y.keys[y.n];
data[index-1] = y.data[y.n];
// Write changes
disk.write(son.offset);
disk.write(y.offset);
// Move the orphaned subtree where it belongs
son.sons[0] = y.sons[y.n+1];
checkroot();
son.remove(key);
}/*}}}*/
else if(z.n >= T)
{ // 3a - borrow from right sibling/*{{{*/
#ifdef DEBUG
printf("node::remove 3ar\n");
#endif
// Append a key to the son
son.keys[son.n] = keys[index];
son.data[son.n] = data[index];
son.n++;
// Move the orphaned subtree where it belongs
son.sons[son.n] = z.sons[0];
// Move a key from sibling into that place
keys[index] = z.keys[0];
data[index] = z.data[0];
for(int j = 0; j < z.n; j++)
{
z.keys[j] = z.keys[j+1];
z.data[j] = z.data[j+1];
z.sons[j] = z.sons[j+1];
}
z.n--;
// Write changes
disk.write(son.offset);
disk.write(z.offset);
checkroot();
son.remove(key);
}/*}}}*/
else if (index > 0)
{ // 3b - merge with left sibling/*{{{*/
#ifdef DEBUG
printf("node::remove 3b\n");
#endif
// Append i-1 to y
y.keys[y.n] = keys[index-1];
y.data[y.n] = data[index-1];
y.n++;
// Append son to y
y.merge(son);
// Get rid of son
disk.write(y.offset);
disk.free(son);
// Remove i-1 from x
n--;
keys[index-1] = keys[index];
data[index-1] = data[index];
while(index <= n)
{
keys[index] = keys[index+1];
sons[index] = sons[index+1];
data[index] = data[index+1];
index++;
}
// Recurse
checkroot();
y.remove(key);
}/*}}}*/
else
{ // 3b - merge with right sibling/*{{{*/
#ifdef DEBUG
printf("node::remove 3br\n");
#endif
// Append i to son
son.keys[son.n] = keys[index];
son.data[son.n] = data[index];
son.n++;
// Append z to son
son.merge(z);
// Get rid of z
disk.write(son.offset);
disk.free(z);
// Remove i from x
n--;
//We've removed the right son...
sons[index+1] = sons[index];
while(index <= n)
{
keys[index] = keys[index+1];
sons[index] = sons[index+1];
data[index] = data[index+1];
index++;
}
// Recurse
checkroot();
son.remove(key);
}/*}}}*/
}
}/*}}}*/
if(need_write)
disk.write(offset);
}
/*}}}*/
void node::search_dump (int key)/*{{{*/
{
pair<node*, int> x = search(key);
if(x.second >= 0)
{
record &r = disk.read_record(x.first->data[x.second]);
r.dump();
}
else
{
printf("NOT FOUND\n");
}
}
/*}}}*/
int node::dump (void)/*{{{*/
{
int count = 0;
for(int i = 0; i <= n; i++)
{
if(!leaf)
{
node &n = disk.read(sons[i], offset);
count += n.dump();
}
printf("%d: ", keys[i]);
record &r = disk.read_record(data[i]);
r.dump();
}
if(!leaf)
{
node &last = disk.read(sons[n], offset);
count += last.dump();
}
return count + n;
}
/*}}}*/
void node::debug (void)/*{{{*/
{
printf("offset: %d\tfather: %d\tleaf: %s\tn: %d\n",
offset, father, leaf ? "true" : "false", n);
printf("keys:\t");
for(int i = 0; i < n; i++)
printf("%d\t", keys[i]);
printf("\n");
printf("data:\t");
for(int i = 0; i < n; i++)
printf("%d\t", data[i]);
printf("\n");
if(!leaf)
{
printf("sons:\t");
for(int i = 0; i < n; i++)
printf("%d\t", sons[i]);
printf("%d", sons[n]);
printf("\n");
}
}
/*}}}*/
int node::validate (int bound)/*{{{*/
{
#ifdef VERBOSE
debug();
#endif
assert(n < 2*T);
if(leaf)
{
for(int i = 0; i < n; i++)
{
assert(bound < keys[i]);
bound = keys[i];
}
}
else
{
for(int i = 0; i < n; i++)
{
node &son = disk.read(sons[i], offset);
assert(son.offset == sons[i]);
assert(son.father == offset);
bound = son.validate(bound);
assert(bound < keys[i]);
bound = keys[i];
}
node &son = disk.read(sons[n], offset);
assert(son.offset == sons[n]);
assert(son.father == offset);
bound = son.validate(bound);
}
return bound;
}
/*}}}*/
//}}}
//{{{ Methods of cache
cache::cache()/*{{{*/
{
misc = fopen(MISCFILE, "rb+");
if(!misc)
{
assert(errno == ENOENT);
printf("Files not found, creating empty structures!\n");
misc = fopen(MISCFILE, "wb+");
data = fopen(DATAFILE, "wb+");
index = fopen(TREEFILE, "wb+");
//Tadam...
tree = &(allocate());
tree->create();
}
else
{
data = fopen(DATAFILE, "rb+");
assert(data);
index = fopen(TREEFILE, "rb+");
assert(index);
// Read metadata
int root;
fread(&root, sizeof(int), 1, misc);
fread(&next_new, sizeof(int), 1, misc);
fread(&next_new_data, sizeof(int), 1, misc);
int tmp = 0;
fread(&tmp, sizeof(int), 1, misc);
while(tmp>=0)
{
free_nodes.push(tmp);
fread(&tmp, sizeof(int), 1, misc);
}
fread(&tmp, sizeof(int), 1, misc);
while(tmp>=0)
{
free_data.push(tmp);
fread(&tmp, sizeof(int), 1, misc);
}
// Read the root
tree = &(read(root, -1));
}
}
/*}}}*/
cache::~cache()/*{{{*//*{{{*/
{
// Just in case, flush the index
flush();
fclose(index);
// Records are not cached for writing
fclose(data);
// Metadata is flushed only in here
rewind(misc);
fwrite(&tree->offset, sizeof(int), 1, misc);
fwrite(&next_new, sizeof(int), 1, misc);
fwrite(&next_new_data, sizeof(int), 1, misc);
int tmp;
while(!free_nodes.empty())
{
tmp = free_nodes.front();
free_nodes.pop();
fwrite(&tmp, sizeof(int), 1, misc);
}
tmp = -1;
fwrite(&tmp, sizeof(int), 1, misc);
while(!free_data.empty())
{
tmp = free_data.front();
free_data.pop();
fwrite(&tmp, sizeof(int), 1, misc);
}
tmp = -1;
fwrite(&tmp, sizeof(int), 1, misc);
fclose(misc);
}
/*}}}*/
void cache::flush (void)/*{{{*/
{
int writes = 0;
for(map<int, entry>::iterator it = storage.begin(); it!=storage.end();
it++)
if(it->second.dirty)
{
assert(it->second.data.offset == it->first);
fseek(index, it->second.data.offset * sizeof(node),
SEEK_SET);
fwrite(&(it->second.data), sizeof(node), 1, index);
writes++;
}
printf("Total reads: %lu\tTotal writes: %d\n",storage.size()-1, writes);
// Keep the root
entry tmp;
tmp.data = *tree;
tmp.dirty = false;
storage.clear();
tree = &(storage.insert(make_pair(tmp.data.offset, tmp)).first//Iterator
->second.data);
}
/*}}}*/
int cache::next_free (void)/*{{{*/
{
if(free_nodes.empty())
return next_new++;
else
{
int tmp = free_nodes.front();
free_nodes.pop();
return tmp;
}
}
/*}}}*/
node& cache::allocate ()/*{{{*/
{
entry tmp;
tmp.dirty = true;
tmp.data.offset = next_free();
map<int, entry>::iterator it =
storage.insert(make_pair(tmp.data.offset, tmp)).first;
return it->second.data;
}
/*}}}*//*}}}*/
node& cache::read (int offset, int parent)/*{{{*/
{
map<int, entry>::iterator it = storage.find(offset);
if(it != storage.end())
{
if(parent >= 0)
it->second.data.father = parent;
return it->second.data;
}
entry tmp;
tmp.dirty = false;
fseek(index, offset * sizeof(node), SEEK_SET);
fread(&tmp.data, sizeof(node), 1, index);
if(parent >= 0)
tmp.data.father = parent;
assert(offset == tmp.data.offset);
it = storage.insert(make_pair(tmp.data.offset, tmp)).first;
return it->second.data;
}
/*}}}*/
void cache::write (int offset)/*{{{*/
{
assert(storage.count(offset));
#ifdef DEBUG
if(offset == tree->offset)
{
printf("Writing root node, contents:\n");
tree->debug();
}
else
{
printf("Writing non-root node, contents:\n");
storage[offset].data.debug();
}
#endif
storage[offset].dirty = true;
}
/*}}}*/
void cache::free (node &n)/*{{{*/
{
free_nodes.push(n.offset);
}
/*}}}*/
int cache::allocate_record (void)/*{{{*/
{
if(free_data.empty())
return next_new_data++;
else
{
int tmp = free_data.front();
free_data.pop();
return tmp;
}
}
/*}}}*/
record& cache::read_record (int offset)/*{{{*/
{
//Unasserted assumption - there is only one record in use at any time
static record r;
fseek(data, offset * sizeof(record), SEEK_SET);
fread(&r, sizeof(record), 1, data);
return r;
}
/*}}}*/
void cache::write_record (int offset, record &r)/*{{{*/
{
fseek(data, offset * sizeof(record), SEEK_SET);
fwrite(&r, sizeof(record), 1, data);
}
/*}}}*/
void cache::free_record (int offset)/*{{{*/
{
free_data.push(offset);
}
/*}}}*/
//}}}
//{{{ Methods of record
void record::dump (void)
{
for(int i = 0; i < numbers_count; i++)
printf("%d ", numbers[i]);
printf("\n");
}
//}}}