--- /dev/null
+#include "vector_io.h"
+#include <vector>
+#include <iostream>
+#include <stdexcept>
+using namespace std;
+
+template<class T>
+void compare_num_data(const string&vector1_file, const string&vector2_file){
+ vector<T>vector1 = load_vector<T>(vector1_file);
+ vector<T>vector2 = load_vector<T>(vector2_file);
+
+ if(vector1.size() < vector2.size()){
+ cout << "\""<< vector1_file<< "\" has only " << vector1.size() << " elements while \""<< vector2_file<< "\" has "<<vector2.size() << ". Can only compare vectors of equal size." << endl;
+ }else if(vector2.size() < vector1.size()){
+ cout << "\""<< vector2_file<< "\" has only " << vector2.size() << " elements while \""<< vector1_file<< "\" has "<<vector1.size() << ". Can only compare vectors of equal size." << endl;
+ }else{
+ unsigned vector1_smaller_count = 0;
+ unsigned vector2_smaller_count = 0;
+
+ unsigned first_difference = (unsigned)-1;
+
+ for(unsigned i=0; i<vector1.size(); ++i){
+ if(vector1[i] < vector2[i])
+ ++vector1_smaller_count;
+ if(vector2[i] < vector1[i])
+ ++vector2_smaller_count;
+ if(vector1[i] != vector2[i] && first_difference == (unsigned)-1)
+ first_difference = i;
+ }
+
+ if(vector1_smaller_count == 0 && vector2_smaller_count == 0){
+ cout << "The vectors are the same and have "<<vector1.size()<<" elements." << endl;
+ }else{
+ cout
+ << "The vectors differ. "
+ << vector1_smaller_count <<" elements are smaller in \""<<vector1_file<<"\". "
+ << vector2_smaller_count <<" elements are smaller in \""<<vector2_file<<"\". "
+ << (vector1.size()-vector1_smaller_count-vector2_smaller_count) <<" elements are the same. "
+ << (vector1_smaller_count+vector2_smaller_count) <<" elements are different. "
+ << "The vectors have "<<vector1.size() <<" elements."
+ << "The first element that differ is at index "<< first_difference << "."
+ << endl
+ ;
+ }
+ }
+
+}
+
+void compare_string_data(const std::string&vector1_file, const std::string&vector2_file){
+ vector<string>vector1 = load_vector<string>(vector1_file);
+ vector<string>vector2 = load_vector<string>(vector2_file);
+
+ if(vector1.size() < vector2.size()){
+ cout << "\""<< vector1_file<< "\" has only " << vector1.size() << " elements while \""<< vector2_file<< "\" has "<<vector2.size() << ". Can only compare vectors of equal size." << endl;
+ }else if(vector2.size() < vector1.size()){
+ cout << "\""<< vector2_file<< "\" has only " << vector2.size() << " elements while \""<< vector1_file<< "\" has "<<vector1.size() << ". Can only compare vectors of equal size." << endl;
+ }else{
+ unsigned equal_count = 0;
+
+ for(unsigned i=0; i<vector1.size(); ++i){
+ if(vector1[i] == vector2[i])
+ ++equal_count;
+ }
+
+ if(equal_count == vector1.size()){
+ cout << "The vectors are the same and have "<<vector1.size()<<" elements." << endl;
+ }else{
+ cout
+ << "The vectors differ. "
+ << equal_count <<" elements are the same. "
+ << vector1.size()-equal_count <<" elements are different. "
+ << "The vectors have "<<vector1.size() <<" elements."
+ << endl
+ ;
+ }
+ }
+
+}
+
+int main(int argc, char*argv[]){
+ try{
+ string vector1_file;
+ string vector2_file;
+ string data_type;
+ if(argc != 4){
+ cerr << "Usage: "<< argv[0] << " type vector1 vector2\n"
+ << "\n"
+ << "Compares two vectors of elements in binary format. data_type can be one of\n"
+ << " * int8\n"
+ << " * uint8\n"
+ << " * int16\n"
+ << " * uint16\n"
+ << " * int32\n"
+ << " * uint32\n"
+ << " * int64\n"
+ << " * uint64\n"
+ << " * float32\n"
+ << " * float64\n"
+ << " * string" << endl;
+ return 1;
+ }else{
+ data_type = argv[1];
+ vector1_file = argv[2];
+ vector2_file = argv[3];
+ }
+
+ if(data_type == "int8")
+ compare_num_data<signed char>(vector1_file, vector2_file);
+ else if(data_type == "uint8")
+ compare_num_data<unsigned char>(vector1_file, vector2_file);
+ else if(data_type == "int16")
+ compare_num_data<signed short>(vector1_file, vector2_file);
+ else if(data_type == "uint16")
+ compare_num_data<unsigned short>(vector1_file, vector2_file);
+ else if(data_type == "int32")
+ compare_num_data<signed int>(vector1_file, vector2_file);
+ else if(data_type == "uint32")
+ compare_num_data<unsigned int>(vector1_file, vector2_file);
+ else if(data_type == "int64")
+ compare_num_data<signed long long>(vector1_file, vector2_file);
+ else if(data_type == "uint64")
+ compare_num_data<unsigned long long>(vector1_file, vector2_file);
+ else if(data_type == "float32")
+ compare_num_data<float>(vector1_file, vector2_file);
+ else if(data_type == "float64")
+ compare_num_data<double>(vector1_file, vector2_file);
+ else if(data_type == "string")
+ compare_string_data(vector1_file, vector2_file);
+ else
+ throw runtime_error("Unknown data type \""+data_type+"\"");
+ }catch(exception&err){
+ cerr << "Stopped on exception : " << err.what() << endl;
+ }
+}
--- /dev/null
+#!/bin/sh
+g++ -O3 -DNDEBUG -std=c++0x encode_vector.cpp -o encode_vector
+g++ -O3 -DNDEBUG -std=c++0x decode_vector.cpp -o decode_vector
+g++ -O3 -DNDEBUG -std=c++0x compare_vector.cpp -o compare_vector
--- /dev/null
+#ifndef CONSTANTS_H
+#define CONSTANTS_H
+
+const unsigned invalid_id = 4294967295u;
+const unsigned inf_weight = 2147483647u;
+
+#endif
--- /dev/null
+#include "vector_io.h"
+#include <string>
+#include <iostream>
+#include <limits>
+#include <iomanip>
+using namespace std;
+
+template<class T>
+void convert_num_data(const string&input_vector_file){
+ vector<T>v = load_vector<T>(input_vector_file);
+
+ for(auto&x:v)
+ cout << std::setprecision(std::numeric_limits<T>::digits10+1) << x << '\n';
+}
+
+string replace_all_substrings(string subject, const string& search, const string& replace) {
+ size_t pos = 0;
+ while ((pos = subject.find(search, pos)) != std::string::npos) {
+ subject.replace(pos, search.length(), replace);
+ pos += replace.length();
+ }
+ return std::move(subject);
+}
+
+void convert_string_data(const string&input_vector_file){
+ vector<string>v = load_vector<string>(input_vector_file);
+
+ for(auto&s:v)
+ cout << replace_all_substrings(replace_all_substrings(s, "\\", "\\\\"), "\n", "\\n") << '\n';
+}
+
+int main(int argc, char*argv[]){
+ try{
+ string data_type, input_vector_file;
+ if(argc != 3){
+ cerr
+ << "Usage: "<< argv[0] << " data_type input_vector_file\n"
+ << "\n"
+ << "Reads binary data from input_vector_file and writes the data to the standard output. data_type can be one of\n"
+ << " * int8\n"
+ << " * uint8\n"
+ << " * int16\n"
+ << " * uint16\n"
+ << " * int32\n"
+ << " * uint32\n"
+ << " * int64\n"
+ << " * uint64\n"
+ << " * float32\n"
+ << " * float64\n"
+ << " * string" << endl;
+ return 1;
+ }else{
+ data_type = argv[1];
+ input_vector_file = argv[2];
+ }
+
+ if(data_type == "int8")
+ convert_num_data<signed char>(input_vector_file);
+ else if(data_type == "uint8")
+ convert_num_data<unsigned char>(input_vector_file);
+ else if(data_type == "int16")
+ convert_num_data<signed short>(input_vector_file);
+ else if(data_type == "uint16")
+ convert_num_data<unsigned short>(input_vector_file);
+ else if(data_type == "int32")
+ convert_num_data<signed int>(input_vector_file);
+ else if(data_type == "uint32")
+ convert_num_data<unsigned int>(input_vector_file);
+ else if(data_type == "int64")
+ convert_num_data<signed long long>(input_vector_file);
+ else if(data_type == "uint64")
+ convert_num_data<unsigned long long>(input_vector_file);
+ else if(data_type == "float32")
+ convert_num_data<float>(input_vector_file);
+ else if(data_type == "float64")
+ convert_num_data<double>(input_vector_file);
+ else if(data_type == "string")
+ convert_string_data(input_vector_file);
+ else
+ throw runtime_error("Unknown data type \""+data_type+"\"");
+ }catch(exception&err){
+ cerr << "Stopped on exception : "<< err.what() << endl;
+ }
+}
--- /dev/null
+#include "vector_io.h"
+#include <string>
+#include <iostream>
+#include <limits>
+using namespace std;
+
+
+template<class T>
+void convert_int_data(const string&output_vector_file){
+ string line;
+ vector<T>v;
+ while(getline(cin, line)){
+ long long x = stoll(line);
+ if(x < numeric_limits<T>::min())
+ throw runtime_error("The number \""+line+"\" is too small, min is \""+to_string(numeric_limits<T>::min())+"\"");
+ if(x > numeric_limits<T>::max())
+ throw runtime_error("The number \""+line+"\" is too large, max is \""+to_string(numeric_limits<T>::max())+"\"");
+ v.push_back(x);
+ }
+ save_vector(output_vector_file, v);
+}
+
+template<class T>
+void convert_float_data(const string&output_vector_file){
+ string line;
+ vector<T>v;
+ while(getline(cin, line)){
+ v.push_back(stold(line));
+ }
+ save_vector(output_vector_file, v);
+}
+
+void convert_uint64_data(const string&output_vector_file){
+ string line;
+ vector<unsigned long long>v;
+ while(getline(cin, line)){
+ v.push_back(stoull(line));
+ }
+ save_vector(output_vector_file, v);
+}
+
+string replace_all_substrings(string subject, const string& search, const string& replace) {
+ size_t pos = 0;
+ while ((pos = subject.find(search, pos)) != std::string::npos) {
+ subject.replace(pos, search.length(), replace);
+ pos += replace.length();
+ }
+ return std::move(subject);
+}
+
+void convert_string_data(const string&output_vector_file){
+ string line;
+ vector<string>v;
+ while(getline(cin, line)){
+ v.push_back(replace_all_substrings(replace_all_substrings(line, "\\\\", "\\"), "\\n", "\n"));
+ }
+ save_vector(output_vector_file, v);
+}
+
+int main(int argc, char*argv[]){
+ try{
+ string data_type, output_vector_file;
+ if(argc != 3){
+ cerr
+ << "Usage: "<< argv[0] << " data_type output_vector_file\n"
+ << "\n"
+ << "Reads textual data from the standard input and writes it in a binary format to output_vector_file. The input data should be one data element per line. The data is only written once an end of file is encountered on the input. data_type can be one of\n"
+ << " * int8\n"
+ << " * uint8\n"
+ << " * int16\n"
+ << " * uint16\n"
+ << " * int32\n"
+ << " * uint32\n"
+ << " * int64\n"
+ << " * uint64\n"
+ << " * float32\n"
+ << " * float64\n"
+ << " * string" << endl;
+ return 1;
+ }else{
+ data_type = argv[1];
+ output_vector_file = argv[2];
+ }
+
+ if(data_type == "int8")
+ convert_int_data<signed char>(output_vector_file);
+ else if(data_type == "uint8")
+ convert_int_data<unsigned char>(output_vector_file);
+ else if(data_type == "int16")
+ convert_int_data<signed short>(output_vector_file);
+ else if(data_type == "uint16")
+ convert_int_data<unsigned short>(output_vector_file);
+ else if(data_type == "int32")
+ convert_int_data<signed int>(output_vector_file);
+ else if(data_type == "uint32")
+ convert_int_data<unsigned int>(output_vector_file);
+ else if(data_type == "int64")
+ convert_int_data<signed long long>(output_vector_file);
+ else if(data_type == "uint64")
+ convert_uint64_data(output_vector_file);
+ else if(data_type == "float32")
+ convert_float_data<float>(output_vector_file);
+ else if(data_type == "float64")
+ convert_float_data<double>(output_vector_file);
+ else if(data_type == "string")
+ convert_string_data(output_vector_file);
+ else
+ throw runtime_error("Unknown data type \""+data_type+"\"");
+ }catch(exception&err){
+ cerr << "Stopped on exception : "<< err.what() << endl;
+ }
+}
--- /dev/null
+#ifndef ID_QUEUE_H
+#define ID_QUEUE_H
+
+#include "constants.h"
+#include <vector>
+#include <algorithm>
+#include <cassert>
+
+struct IDKeyPair{
+ unsigned id;
+ unsigned key;
+};
+
+//! A priority queue where the elements are IDs from 0 to id_count-1 where id_count is a number that is set in the constructor.
+//! The elements are sorted by integer keys.
+class MinIDQueue{
+private:
+ static const unsigned tree_arity = 4;
+public:
+ MinIDQueue():heap_size(0){}
+
+ explicit MinIDQueue(unsigned id_count):
+ id_pos(id_count, invalid_id),
+ heap(id_count),
+ heap_size(0){}
+
+ //! Returns whether the queue is empty. Equivalent to checking whether size() returns 0.
+ bool empty()const{
+ return heap_size == 0;
+ }
+
+ //! Returns the number of elements in the queue.
+ unsigned size()const{
+ return heap_size;
+ }
+
+ //! Returns the id_count value passed to the constructor.
+ unsigned id_count()const{
+ return id_pos.size();
+ }
+
+ //! Checks whether an element is in the queue.
+ bool contains_id(unsigned id){
+ assert(id < id_count());
+ return id_pos[id] != invalid_id;
+ }
+
+ //! Removes all elements from the queue.
+ void clear(){
+ for(unsigned i=0; i<heap_size; ++i)
+ id_pos[heap[i].id] = invalid_id;
+ heap_size = 0;
+ }
+
+ friend void swap(MinIDQueue&l, MinIDQueue&r){
+ using std::swap;
+ swap(l.id_pos, r.id_pos);
+ swap(l.heap, r.heap);
+ swap(l.heap, r.heap);
+ swap(l.heap_size, r.heap_size);
+ }
+
+ //! Returns the current key of an element.
+ //! Undefined if the element is not part of the queue.
+ unsigned get_key(unsigned id)const{
+ assert(id < id_count());
+ assert(id_pos[id] != invalid_id);
+ return heap[id_pos[id]].key;
+ }
+
+ //! Returns the smallest element key pair without removing it from the queue.
+ IDKeyPair peek()const{
+ assert(!empty());
+ return heap.front();
+ }
+
+ //! Returns the smallest element key pair and removes it form the queue.
+ IDKeyPair pop(){
+ assert(!empty());
+ --heap_size;
+ std::swap(heap[0].key, heap[heap_size].key);
+ std::swap(heap[0].id, heap[heap_size].id);
+ id_pos[heap[0].id] = 0;
+ id_pos[heap[heap_size].id] = invalid_id;
+
+ move_down_in_tree(0);
+ return heap[heap_size];
+ }
+
+ //! Inserts a element key pair.
+ //! Undefined if the element is part of the queue.
+ void push(IDKeyPair p){
+ assert(p.id < id_count());
+ assert(!contains_id(p.id));
+
+ unsigned pos = heap_size;
+ ++heap_size;
+ heap[pos] = p;
+ id_pos[p.id] = pos;
+ move_up_in_tree(pos);
+ }
+
+ //! Updates the key of an element if the new key is smaller than the old key.
+ //! Does nothing if the new key is larger.
+ //! Undefined if the element is not part of the queue.
+ bool decrease_key(IDKeyPair p){
+ assert(p.id < id_count());
+ assert(contains_id(p.id));
+
+ unsigned pos = id_pos[p.id];
+
+ if(heap[pos].key > p.key){
+ heap[pos].key = p.key;
+ move_up_in_tree(pos);
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ //! Updates the key of an element if the new key is larger than the old key.
+ //! Does nothing if the new key is smaller.
+ //! Undefined if the element is not part of the queue.
+ bool increase_key(IDKeyPair p){
+ assert(p.id < id_count());
+ assert(contains_id(p.id));
+
+ unsigned pos = id_pos[p.id];
+
+ if(heap[pos].key < p.key){
+ heap[pos].key = p.key;
+ move_down_in_tree(pos);
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+private:
+ void move_up_in_tree(unsigned pos){
+ while(pos != 0){
+ unsigned parent = (pos-1)/tree_arity;
+ if(heap[parent].key > heap[pos].key){
+ std::swap(heap[pos], heap[parent]);
+ std::swap(id_pos[heap[pos].id], id_pos[heap[parent].id]);
+ }
+ pos = parent;
+ }
+ }
+
+ void move_down_in_tree(unsigned pos){
+ for(;;){
+ unsigned first_child = tree_arity*pos+1;
+ if(first_child >= heap_size)
+ return; // no children
+ unsigned smallest_child = first_child;
+ for(unsigned c = first_child+1; c < std::min(tree_arity*pos+tree_arity+1, heap_size); ++c){
+ if(heap[smallest_child].key > heap[c].key){
+ smallest_child = c;
+ }
+ }
+
+ if(heap[smallest_child].key >= heap[pos].key)
+ return; // no child is smaller
+
+ std::swap(heap[pos], heap[smallest_child]);
+ std::swap(id_pos[heap[pos].id], id_pos[heap[smallest_child].id]);
+ pos = smallest_child;
+ }
+ }
+
+ std::vector<unsigned>id_pos;
+ std::vector<IDKeyPair>heap;
+
+ unsigned heap_size;
+};
+
+#endif
--- /dev/null
+#ifndef TIMER_H
+#define TIMER_H
+
+#include <sys/time.h>
+
+inline
+long long get_micro_time(){
+ timeval t;
+ gettimeofday(&t, 0);
+ return t.tv_sec*1000000ll+t.tv_usec;
+}
+
+#endif
--- /dev/null
+#ifndef VECTOR_IO_H
+#define VECTPR_IO_H
+
+#include <string>
+#include <vector>
+#include <stdexcept>
+#include <fstream>
+
+template<class T>
+void save_vector(const std::string&file_name, const std::vector<T>&vec){
+ std::ofstream out(file_name, std::ios::binary);
+ if(!out)
+ throw std::runtime_error("Can not open \""+file_name+"\" for writing.");
+ out.write(reinterpret_cast<const char*>(&vec[0]), vec.size()*sizeof(T));
+}
+
+template<class T>
+std::vector<T>load_vector(const std::string&file_name){
+ std::ifstream in(file_name, std::ios::binary);
+ if(!in)
+ throw std::runtime_error("Can not open \""+file_name+"\" for reading.");
+ in.seekg(0, std::ios::end);
+ unsigned long long file_size = in.tellg();
+ if(file_size % sizeof(T) != 0)
+ throw std::runtime_error("File \""+file_name+"\" can not be a vector of the requested type because it's size is no multiple of the element type's size.");
+ in.seekg(0, std::ios::beg);
+ std::vector<T>vec(file_size / sizeof(T));
+ in.read(reinterpret_cast<char*>(&vec[0]), file_size);
+ return vec; // NVRO
+}
+
+template<>
+void save_vector<std::string>(const std::string&file_name, const std::vector<std::string>&vec){
+ std::ofstream out(file_name, std::ios::binary);
+ for(unsigned i=0; i<vec.size(); ++i){
+ const char*x = vec[i].c_str();
+ out.write(x, vec[i].length()+1);
+ }
+}
+
+template<>
+std::vector<std::string>load_vector<std::string>(const std::string&file_name){
+ std::vector<char>data = load_vector<char>(file_name);
+ std::vector<std::string>ret;
+ std::vector<char>::const_iterator
+ str_begin = data.begin(),
+ str_end = data.begin(),
+ data_end = data.end();
+
+ while(str_end != data_end){
+ if(*str_end == '\0'){
+ ret.push_back(std::string(str_begin, str_end));
+ ++str_end;
+ str_begin = str_end;
+ }else{
+ ++str_end;
+ }
+ }
+
+ ret.shrink_to_fit();
+ return ret; // NVRO
+}
+
+template<class T>
+void save_value(const std::string&file_name, const T&val){
+ save_vector(file_name, std::vector<T>{val});
+}
+
+template<class T>
+T load_value(const std::string&file_name){
+ auto v = load_vector<T>(file_name);
+ if(v.empty())
+ throw std::runtime_error(file_name+" is empty");
+ if(v.size() >= 2)
+ throw std::runtime_error(file_name+" contains more than one element");
+ return v.front();
+}
+
+#endif