RocksDB is a low-latency embedded key value database created by Facebook engineers.
It is based on the LevelDB which was created by Google Engineers Jeffrey Dean and Sanjay Ghemawat.
According the rocksdb website, it offers the following advantages compared to LevelDB:
– much better performance for databases larger than RAM
– great performance for IO bound workloads
– fixed frequent write-stalls with LevelDB that caused 99-percentile latency
sudo apt-get install -y build-essential libsnappy-dev zlib1g-dev libbz2-dev libgflags-dev mkdir -p ~/build/rocksdb cd ~/build/rocksdb git clone https://github.com/facebook/rocksdb.git cd rocksdb # It is tested with this version git checkout 2.7.fb #compile the code #make shared_lib make librocksdb.so #"make install" does not work! - must install manually #>make install #make: *** No rule to make target `install'. Stop. # globally install librocksdb.so library sudo cp librocksdb.so /usr/local/lib # globally install headers sudo mkdir -p /usr/local/include/rocksdb/ sudo cp -r ./include/* /usr/local/include/ # update cache - Thanks for tip from Dmitriy Selivanov sudo ldconfig # alternatively #export CPLUS_INCLUDE_PATH=${CPLUS_INCLUDE_PATH}:/include #export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}: #export LIBRARY_PATH=${LIBRARY_PATH}:
Install pyrocksdb
# install pyrocksdb - created by Stephan Hof sudo apt-get install -y python-virtualenv python-dev # install pyrocksdb and dependencies through virtualenv #virtualenv pyrocks_test #cd pyrocks_test #. bin/active sudo pip install "Cython>=0.20" mkdir -p ~/build/rocksdb cd ~/build/rocksdb git clone http://github.com/stephan-hof/pyrocksdb.git cd pyrocksdb sudo python setup.py install # alternatively - to continue using virtualenv # sudo pip install git+git://github.com/stephan-hof/pyrocksdb.git
Test pyrocksdb
# test if rocksdb sucessfully installed cd ~/ python -c 'import rocksdb; print "works!"'
Testing the rocksdb performance
# mkdir -p ~/build/rocksdb/test cd ~/build/rocksdb/test tee ./rocksdb_test.py <<"_EOF_" import rocksdb def main(): import sys #print "argurment count",len(sys.argv) if len(sys.argv) < 4: print """enter operation and how many. example: time python rocksdb_test.py# write 1 million values to testdb time python rocksdb_test.py test.db write 1000000 # read 1 million values to testdb time python rocksdb_test.py test.db read 1000000 """ return filename,ops,max = sys.argv[1],sys.argv[2],int(sys.argv[3]) db = get_db(filename) if 'write' in ops: mystring='a'*80 for i in range(0,max): db.put('key'+str(i), str(i)+mystring) elif 'read' in ops: for i in range(0,max): db.get('key'+str(i)) else: print 'unkown operation:%s use "read" or "write"'%ops print 'done' def get_db(filename): #db = rocksdb.DB(filename, rocksdb.Options(create_if_missing=True)) opts = rocksdb.Options() opts.create_if_missing = True opts.max_open_files = 300000 opts.write_buffer_size = 67108864 opts.max_write_buffer_number = 3 opts.target_file_size_base = 67108864 opts.filter_policy = rocksdb.BloomFilterPolicy(10) opts.block_cache = rocksdb.LRUCache(2 * (1024 ** 3)) opts.block_cache_compressed = rocksdb.LRUCache(500 * (1024 ** 2)) db = rocksdb.DB(filename,opts) return db if __name__ == "__main__": main() _EOF_
Tests and Results
# write 1 million values time python rocksdb_test.py test.db write 1000000 #done # #real 0m4.488s #user 0m3.237s #sys 0m1.625s # read 1 million values time python rocksdb_test.py test.db read 1000000 #done # #real 0m3.825s #user 0m3.652s #sys 0m0.147s # write 100 million values time python rocksdb_test.py test2.db write 100000000 #done # #real 9m29.150s #user 12m22.623s #sys 5m11.072s # read 100 million values time python rocksdb_test.py test2.db read 100000000
References
1. RocksDB Home: http://rocksdb.org/
2. PyrocksDB: https://github.com/stephan-hof/pyrocksdb
3. PyrocksDB Documentation: http://pyrocksdb.readthedocs.org/en/latest/