BTree code

/* Complete B-tree implementation
*
* Author: Remigiusz 'lRem' Modrzejewski <http://lrem.net/>
* Distributed under the terms of GNU GPL version 2, as found on:  
* http://www.gnu.org/licenses/old-licenses/gpl-2.0.html
*/

#include <cstdio>
#include <cerrno>
#include <cassert>
#include <queue>
#include <map>

using namespace std;

#define DATAFILE "data"
#define TREEFILE "index"
#define MISCFILE "misc"
#define NODESIZE 64
#define RECORD_SIZE 15
#define T 31
#define HELP "Possible actions: (h)elp, (a)dd, (s)earch, (d)elete, (e)dit, \
   (p)rint all, (q)uit\n"

struct record/*{{{*/
{
   int numbers_count,
      numbers[RECORD_SIZE];

   void dump (void);
};
/*}}}*/
class node/*{{{*/
{
   // Internals {{{
   int n;
   bool leaf;
   int keys[NODESIZE],
      sons[NODESIZE],
      data[NODESIZE];
   // }}}
   public:
   int offset,
      father;
   //{{{Methods - this is actual B-Tree implementation
   void create(); // This is _not_ a constructor
   pair<node*, int> search (int key); // Returns the data offset
   void split(int index); // Parent is a field in node
   void merge(node &source);
   void insert(int key, int data);
   void overflow(int key);
   void remove(int key);
   void search_dump (int key);
   int dump (void);
   void debug (void);
   int validate (int bound);
   //}}}
} *tree, dummy;
/*}}}*/
class cache/*{{{*/
{
   // Internals {{{
   struct entry
   {
      node data;
      bool dirty;
   };

   FILE *index,
      *data,
      *misc;
   int next_new_data,
      next_new;
   queue<int> free_data;
   queue<int> free_nodes;
   map<int, entry> storage;
   int next_free();
   //}}}
   public:
   // Methods - everything that touches files {{{
   cache();
   ~cache();
   void flush (void);
   node& allocate (void);
   node& read (int offset, int parent);
   void write (int offset);
   void free (node &n);
   int allocate_record (void);
   record& read_record (int offset);
   void write_record (int offset, record &r);
   void free_record (int offset);
   //}}}
} disk;
/*}}}*/
int main (void)/*{{{*/
{
   char choice = 'h';
   int key, data;
   record tmp;

   while(choice != 'q')
   {
      switch(choice)
      {
         case 'h':/*{{{*/
            printf(HELP);
            break;/*}}}*/
         case 'a':/*{{{*/
            printf("Key: ");
            scanf("%d", &key);
            printf("Numbers count: ");
            scanf("%d", &tmp.numbers_count);
            for(int i = 0; i < tmp.numbers_count; i++)
            {
               printf("Number %d: ", i);
               scanf("%d", &tmp.numbers[i]);
            }
            data = disk.allocate_record();
            disk.write_record(data, tmp);
            tree->insert(key, data);
            break;/*}}}*/
         case 'e':/*{{{*/
            printf("Key: ");
            scanf("%d", &key);
            if(tree->search(key).second == -1)
               break;
            tree->remove(key);
            scanf("%d", &key);
            printf("Numbers count: ");
            scanf("%d", &tmp.numbers_count);
            for(int i = 0; i < tmp.numbers_count; i++)
            {
               printf("Number %d: ", i);
               scanf("%d", &tmp.numbers[i]);
            }
            data = disk.allocate_record();
            disk.write_record(data, tmp);
            tree->insert(key, data);
            break;/*}}}*/
         case 'p':/*{{{*/
            printf("Total records: %d\t",tree->dump());
            break;/*}}}*/
         case 's':/*{{{*/
            printf("Key: ");
            scanf("%d", &key);
            tree->search_dump(key);
            break;/*}}}*/
         case 'd':/*{{{*/
            printf("Key: ");
            scanf("%d", &key);
            tree->remove(key);
            break;/*}}}*/
         default:/*{{{*/
            printf("Unknown choice\n");/*}}}*/
      }
#ifdef VERBOSE
      printf("VALIDATION BEGIN{{{\n");
      tree->validate(-1);
      printf("VALIDATION END}}}\n");
#else
#ifdef DEBUG
      tree->validate(-1);
#endif
#endif
      disk.flush();
      printf("> ");
      do
         scanf("%c", &choice);
      while(isspace(choice));
   }
   return 0;
}
/*}}}*/
//{{{ Global functions
void swap (int &a, int &b)
{
   int tmp = a;
   a = b;
   b = tmp;
}
//}}}
//{{{ Methods of node
void node::create()/*{{{*/
{
   leaf = true;
   father = -1;
   n = 0;
   disk.write(offset);
}
/*}}}*/
pair<node*, int> node::search (int key)/*{{{*/
{
   int i = 0;
   while(i < n && keys[i] < key)
      i++;
   if(i <= n && keys[i] == key)
      return make_pair(this, i);
   if(leaf)
      return make_pair(this, -1);
   node &son = disk.read(sons[i], offset);
   return son.search(key);
}
/*}}}*/
void node::split(int index)/*{{{*/
{
   assert(n == 2*T-1);
   // Make the new one similar to the old one
   node &z = disk.allocate();
   z.father = father;
   z.leaf = leaf;
   z.n = T-1;
   // Move half of the keys to the new one
   for(int j = T; j--; )
   {
      z.keys[j] = keys[j+T];
      z.data[j] = data[j+T];
   }
   // If not leaf, move the subtrees
   if(!leaf)
      for(int j = T+1; j--; )
         z.sons[j] = sons[j+T];
   n = T-1;

   // Move the last key from old son to father
   // Note that the subtree stays where it was - it will be new post-n
   node &x = disk.read(father, -1);
   for(int j = x.n; j > index; j--)
   {
      x.sons[j+1] = x.sons[j];
      x.keys[j+1] = x.keys[j];
      x.data[j+1] = x.data[j];
   }
   x.keys[index+1] = x.keys[index];
   x.data[index+1] = x.data[index];
   x.sons[index+1] = z.offset;
   x.keys[index] = keys[T-1];
   x.data[index] = data[T-1];
   x.n++;

   disk.write(offset);
   disk.write(x.offset);
   disk.write(z.offset);
}
/*}}}*/
void node::merge(node &source)/*{{{*/
{
   assert(source.n == T-1 && n == T);
   assert(offset != source.offset);
   assert(father == source.father);
   assert(leaf == source.leaf);
   //Unasserted assumption - we lack rightmost son
   for(int i = 0; i < n; i++)
   {
      keys[n+i] = source.keys[i];
      data[n+i] = source.data[i];
      sons[n+i] = source.sons[i];
   }
   n=2*T-1;
}
/*}}}*/
void node::insert(int key, int new_data)/*{{{*/
{
#ifdef DEBUG
   printf("node::insert(%d, %d)\n", key, new_data);
#endif
   pair<node*, int> x = search(key);
   if(x.second == -1)
   {
      assert(x.first->leaf);
      int i = x.first->n;
      for(; i && x.first->keys[i-1] > key; i--)
      {
         x.first->keys[i] = x.first->keys[i-1];
         x.first->data[i] = x.first->data[i-1];
      }
      x.first->keys[i] = key;
      x.first->data[i] = new_data;
      x.first->n++;
      disk.write(x.first->offset);
      if(x.first->n==2*T-1)
         x.first->overflow(key);
   }
   else
   {
      disk.free_record(new_data);
   }

}
/*}}}*/
void node::overflow(int key)/*{{{*/
{
   if(this == tree)
   {
#ifdef DEBUG
      printf("node::overflow(%d) - root\n", key);
#endif
      node &s = disk.allocate();
      s.leaf = false;
      s.n = 0;
      s.father = -1;
      s.sons[0] = tree->offset;
      tree->father = s.offset;
      tree->split(0);
      disk.write(tree->offset);
      disk.write(s.offset);
      tree = &s;
   }
   else
   {
#ifdef DEBUG
      printf("node::overflow(%d) - nonroot\n", key);
#endif
      node &parent = disk.read(father, -1);
      int i = parent.n-1;
      while(i>=0 && parent.keys[i] > key)
         i--;
      i++;
      node &y = i > 0 ? disk.read(parent.sons[i-1], -1) : dummy;
      node &z = i < parent.n ? disk.read(parent.sons[i+1], -1): dummy;
      if((y.n != dummy.n) && (y.n < 2*(T-1)))
      {
         // Append a key to y
         y.keys[y.n] = parent.keys[i-1];
         y.data[y.n] = parent.data[i-1];
         // Move the orphaned subtree where it belongs
         y.sons[y.n] = sons[0];
         // Move a key from this into that place
         parent.keys[i-1] = keys[0];
         parent.data[i-1] = data[0];
         for(int j = 0; j < n; j++)
         {
            keys[j] = keys[j+1];
            data[j] = data[j+1];
            sons[j] = sons[j+1];
         }
         n--;
         // Write changes
         disk.write(offset);
         disk.write(y.offset);
         disk.write(parent.offset);
      }
      else if((z.n != dummy.n) && (z.n < 2*(T-1)))
      {
         // Preppend a key to z
         z.n++;
         for(int j = z.n; j; j--)
         {
            z.keys[j] = z.keys[j-1];
            z.data[j] = z.data[j-1];
            z.sons[j] = z.sons[j-1];
         }
         z.keys[0] = parent.keys[i];
         z.data[0] = parent.data[i];
         // Move a key from this into that place
         n--;
         parent.keys[i] = keys[n];
         parent.data[i] = data[n];
         // Move the orphaned subtree where it belongs
         z.sons[0] = sons[n+1];
         // Write changes
         disk.write(offset);
         disk.write(z.offset);
         disk.write(parent.offset);
      }
      else
         split(i);
      if(parent.n == 2*T-1)
         parent.overflow(key);
   }
}
/*}}}*/
#define checkroot()/*{{{*/\
{\
   if(n == 0)\
   {\
      printf("Root going down\n");\
      assert(this == tree);\
      node &son = disk.read(sons[0], offset);\
      son.father = -1;\
      tree = &son;\
      disk.free(*this);\
      disk.write(son.offset);\
      need_write = false;\
   }\
}
/*}}}*/
void node::remove(int key)/*{{{*/
{
   int index = 0;
   bool need_write = true;
   while(index < n && keys[index] < key)
      index++;
   if(leaf)
   { // 1. Leaf/*{{{*/
#ifdef DEBUG
      printf("node::remove 1 (%d)\n", key);
#endif
      // Simply delete k from x
      if(index < n && keys[index] == key)
      {
         n--;
         while(index < n)
         {
            disk.free_record(data[index]);
            keys[index] = keys[index+1];
            data[index] = data[index+1];
            index++;
         }      
      }
   }/*}}}*/
   else if(index < n && keys[index] == key)
   { // 2. Internal and k is in x/*{{{*/
      node &y = disk.read(sons[index], offset);
      node &z = disk.read(sons[index+1], offset);
      if(y.n >= T)
      { // 2a/*{{{*/
#ifdef DEBUG
         printf("node::remove 2a\n");
#endif
         // Swap with predecesor
         node *current = &y;
         while(!current->leaf)
            current = &disk.read(current->sons[current->n],
                  offset);
         swap(keys[index], current->keys[current->n - 1]);
         swap(data[index], current->data[current->n - 1]);
         // Note this does not hurt reads count nor asymptotical
         // computing time, as _all_ nodes just iterated will be
         // subject to recursion.

         // Recursively delete
         checkroot();
         y.remove(key);
      }/*}}}*/
      else if(z.n >= T)
      { // 2b/*{{{*/
#ifdef DEBUG
         printf("node::remove 2b\n");
#endif
         // 2a turned around
         node *current = &z;
         while(!current->leaf)
            current = &disk.read(current->sons[0], offset);
         swap(keys[index], current->keys[0]);
         swap(data[index], current->data[0]);

         checkroot();
         z.remove(key);
      }/*}}}*/
      else
      { // 2c - both sons have T-1 keys/*{{{*/
#ifdef DEBUG
         printf("node::remove 2c\n");
#endif
         // Append k to y
         y.keys[y.n] = keys[index];
         y.data[y.n] = data[index];
         y.n++;
         // Remove k from x
         n--;
         while(index <= n)
         {
            keys[index] = keys[index+1];
            data[index] = data[index+1];
            sons[index] = sons[index+1];
            index++;
         }
         // Append z to y
         y.merge(z);
         // Get rid of z
         disk.free(z);
         // Recurse
         checkroot();
         y.remove(key);
      }/*}}}*/
   }/*}}}*/
   else
   { // 3. Internal and k is not in x/*{{{*/
      node &son = disk.read(sons[index], offset);
      if(son.n >= T)
      {
#ifdef DEBUG
         printf("node::remove 3 void\n");
#endif
         need_write = false;
         checkroot();
         son.remove(key);
      }
      else
      {
         node &y = index > 0 ? disk.read(sons[index-1], offset)
            : dummy;
         node &z = index < n ? disk.read(sons[index+1], offset)
            : dummy;
         if(y.n >= T)
         { // 3a - borrow from left sibling/*{{{*/
#ifdef DEBUG
            printf("node::remove 3a\n");
#endif
            // Preppend a key to the son
            son.n++;
            for(int j = son.n; j; j--)
            {
               son.keys[j] = son.keys[j-1];
               son.data[j] = son.data[j-1];
               son.sons[j] = son.sons[j-1];
            }
            son.keys[0] = keys[index-1];
            son.data[0] = data[index-1];
            // Move a key from sibling into that place
            y.n--;
            keys[index-1] = y.keys[y.n];
            data[index-1] = y.data[y.n];
            // Write changes
            disk.write(son.offset);
            disk.write(y.offset);
            // Move the orphaned subtree where it belongs
            son.sons[0] = y.sons[y.n+1];
            checkroot();
            son.remove(key);
         }/*}}}*/
         else if(z.n >= T)
         { // 3a - borrow from right sibling/*{{{*/
#ifdef DEBUG
            printf("node::remove 3ar\n");
#endif
            // Append a key to the son
            son.keys[son.n] = keys[index];
            son.data[son.n] = data[index];
            son.n++;
            // Move the orphaned subtree where it belongs
            son.sons[son.n] = z.sons[0];
            // Move a key from sibling into that place
            keys[index] = z.keys[0];
            data[index] = z.data[0];
            for(int j = 0; j < z.n; j++)
            {
               z.keys[j] = z.keys[j+1];
               z.data[j] = z.data[j+1];
               z.sons[j] = z.sons[j+1];
            }
            z.n--;
            // Write changes
            disk.write(son.offset);
            disk.write(z.offset);
            checkroot();
            son.remove(key);
         }/*}}}*/
         else if (index > 0)
         { // 3b - merge with left sibling/*{{{*/
#ifdef DEBUG
            printf("node::remove 3b\n");
#endif
            // Append i-1 to y
            y.keys[y.n] = keys[index-1];
            y.data[y.n] = data[index-1];
            y.n++;
            // Append son to y
            y.merge(son);
            // Get rid of son
            disk.write(y.offset);
            disk.free(son);
            // Remove i-1 from x
            n--;
            keys[index-1] = keys[index];
            data[index-1] = data[index];
            while(index <= n)
            {
               keys[index] = keys[index+1];
               sons[index] = sons[index+1];
               data[index] = data[index+1];
               index++;
            }
            // Recurse
            checkroot();
            y.remove(key);
         }/*}}}*/
         else
         { // 3b - merge with right sibling/*{{{*/
#ifdef DEBUG
            printf("node::remove 3br\n");
#endif
            // Append i to son
            son.keys[son.n] = keys[index];
            son.data[son.n] = data[index];
            son.n++;
            // Append z to son
            son.merge(z);
            // Get rid of z
            disk.write(son.offset);
            disk.free(z);
            // Remove i from x
            n--;
            //We've removed the right son...
            sons[index+1] = sons[index];
            while(index <= n)
            {
               keys[index] = keys[index+1];
               sons[index] = sons[index+1];
               data[index] = data[index+1];
               index++;
            }
            // Recurse
            checkroot();
            son.remove(key);
         }/*}}}*/
      }
   }/*}}}*/
   if(need_write)
      disk.write(offset);
}
/*}}}*/
void node::search_dump (int key)/*{{{*/
{
   pair<node*, int> x = search(key);
   if(x.second >= 0)
   {
      record &r = disk.read_record(x.first->data[x.second]);
      r.dump();
   }
   else
   {
      printf("NOT FOUND\n");
   }
}
/*}}}*/
int node::dump (void)/*{{{*/
{
   int count = 0;
   for(int i = 0; i <= n; i++)
   {
      if(!leaf)
      {
         node &n = disk.read(sons[i], offset);
         count += n.dump();
      }
      printf("%d: ", keys[i]);
      record &r = disk.read_record(data[i]);
      r.dump();
   }
   if(!leaf)
   {
      node &last = disk.read(sons[n], offset);
      count += last.dump();
   }
   return count + n;
}
/*}}}*/
void node::debug (void)/*{{{*/
{
   printf("offset: %d\tfather: %d\tleaf: %s\tn: %d\n",
         offset, father, leaf ? "true" : "false", n);
   printf("keys:\t");
   for(int i = 0; i < n; i++)
      printf("%d\t", keys[i]);
   printf("\n");
   printf("data:\t");
   for(int i = 0; i < n; i++)
      printf("%d\t", data[i]);
   printf("\n");
   if(!leaf)
   {
      printf("sons:\t");
      for(int i = 0; i < n; i++)
         printf("%d\t", sons[i]);
      printf("%d", sons[n]);
      printf("\n");
   }
}
/*}}}*/
int node::validate (int bound)/*{{{*/
{
#ifdef VERBOSE
   debug();
#endif
   assert(n < 2*T);
   if(leaf)
   {
      for(int i = 0; i < n; i++)
      {
         assert(bound < keys[i]);
         bound = keys[i];
      }
   }
   else
   {
      for(int i = 0; i < n; i++)
      {
         node &son = disk.read(sons[i], offset);
         assert(son.offset == sons[i]);
         assert(son.father == offset);
         bound = son.validate(bound);
         assert(bound < keys[i]);
         bound = keys[i];
      }
      node &son = disk.read(sons[n], offset);
      assert(son.offset == sons[n]);
      assert(son.father == offset);
      bound = son.validate(bound);
   }
   return bound;
}
/*}}}*/
//}}}
//{{{ Methods of cache
cache::cache()/*{{{*/
{
   misc = fopen(MISCFILE, "rb+");
   if(!misc)
   {
      assert(errno == ENOENT);
      printf("Files not found, creating empty structures!\n");
      misc = fopen(MISCFILE, "wb+");
      data = fopen(DATAFILE, "wb+");
      index = fopen(TREEFILE, "wb+");
      //Tadam...
      tree = &(allocate());
      tree->create();
   }
   else
   {
      data = fopen(DATAFILE, "rb+");
      assert(data);
      index = fopen(TREEFILE, "rb+");
      assert(index);
      // Read metadata
      int root;
      fread(&root, sizeof(int), 1, misc);
      fread(&next_new, sizeof(int), 1, misc);
      fread(&next_new_data, sizeof(int), 1, misc);
      int tmp = 0;
      fread(&tmp, sizeof(int), 1, misc);
      while(tmp>=0)
      {
         free_nodes.push(tmp);
         fread(&tmp, sizeof(int), 1, misc);
      }
      fread(&tmp, sizeof(int), 1, misc);
      while(tmp>=0)
      {
         free_data.push(tmp);
         fread(&tmp, sizeof(int), 1, misc);
      }
      // Read the root
      tree = &(read(root, -1));
   }
}
/*}}}*/
cache::~cache()/*{{{*//*{{{*/
{
   // Just in case, flush the index
   flush();
   fclose(index);
   // Records are not cached for writing
   fclose(data);
   // Metadata is flushed only in here
   rewind(misc);
   fwrite(&tree->offset, sizeof(int), 1, misc);
   fwrite(&next_new, sizeof(int), 1, misc);
   fwrite(&next_new_data, sizeof(int), 1, misc);
   int tmp;
   while(!free_nodes.empty())
   {
      tmp = free_nodes.front();
      free_nodes.pop();
      fwrite(&tmp, sizeof(int), 1, misc);
   }
   tmp = -1;
   fwrite(&tmp, sizeof(int), 1, misc);
   while(!free_data.empty())
   {
      tmp = free_data.front();
      free_data.pop();
      fwrite(&tmp, sizeof(int), 1, misc);
   }
   tmp = -1;
   fwrite(&tmp, sizeof(int), 1, misc);
   fclose(misc);
}
/*}}}*/
void cache::flush (void)/*{{{*/
{
   int writes = 0;
   for(map<int, entry>::iterator it = storage.begin(); it!=storage.end();
         it++)
      if(it->second.dirty)
      {
         assert(it->second.data.offset == it->first);
         fseek(index, it->second.data.offset * sizeof(node),
               SEEK_SET);
         fwrite(&(it->second.data), sizeof(node), 1, index);
         writes++;
      }
   printf("Total reads: %lu\tTotal writes: %d\n",storage.size()-1, writes);
   // Keep the root
   entry tmp;
   tmp.data = *tree;
   tmp.dirty = false;
   storage.clear();
   tree = &(storage.insert(make_pair(tmp.data.offset, tmp)).first//Iterator
         ->second.data);

}
/*}}}*/
int cache::next_free (void)/*{{{*/
{
   if(free_nodes.empty())
      return next_new++;
   else
   {
      int tmp = free_nodes.front();
      free_nodes.pop();
      return tmp;
   }
}
/*}}}*/
node& cache::allocate ()/*{{{*/
{
   entry tmp;
   tmp.dirty = true;
   tmp.data.offset = next_free();
   map<int, entry>::iterator it =
      storage.insert(make_pair(tmp.data.offset, tmp)).first;
   return it->second.data;
}
/*}}}*//*}}}*/
node& cache::read (int offset, int parent)/*{{{*/
{
   map<int, entry>::iterator it = storage.find(offset);
   if(it != storage.end())
   {
      if(parent >= 0)
         it->second.data.father = parent;
      return it->second.data;
   }
   entry tmp;
   tmp.dirty = false;
   fseek(index, offset * sizeof(node), SEEK_SET);
   fread(&tmp.data, sizeof(node), 1, index);
   if(parent >= 0)
      tmp.data.father = parent;
   assert(offset == tmp.data.offset);
   it = storage.insert(make_pair(tmp.data.offset, tmp)).first;
   return it->second.data;
}
/*}}}*/
void cache::write (int offset)/*{{{*/
{
   assert(storage.count(offset));
#ifdef DEBUG
   if(offset == tree->offset)
   {
      printf("Writing root node, contents:\n");
      tree->debug();
   }
   else
   {
      printf("Writing non-root node, contents:\n");
      storage[offset].data.debug();
   }
#endif
   storage[offset].dirty = true;
}
/*}}}*/
void cache::free (node &n)/*{{{*/
{
   free_nodes.push(n.offset);
}
/*}}}*/
int cache::allocate_record (void)/*{{{*/
{
   if(free_data.empty())
      return next_new_data++;
   else
   {
      int tmp = free_data.front();
      free_data.pop();
      return tmp;
   }
}
/*}}}*/
record& cache::read_record (int offset)/*{{{*/
{
   //Unasserted assumption - there is only one record in use at any time
   static record r;
   fseek(data, offset * sizeof(record), SEEK_SET);
   fread(&r, sizeof(record), 1, data);
   return r;
}
/*}}}*/
void cache::write_record (int offset, record &r)/*{{{*/
{
   fseek(data, offset * sizeof(record), SEEK_SET);
   fwrite(&r, sizeof(record), 1, data);
}
/*}}}*/
void cache::free_record (int offset)/*{{{*/
{
   free_data.push(offset);
}
/*}}}*/
//}}}
//{{{ Methods of record
void record::dump (void)
{
   for(int i = 0; i < numbers_count; i++)
      printf("%d ", numbers[i]);
   printf("\n");
}
//}}}