Initial commit
authorTim "S.D.Eagle" Zeitz <dev.tim.zeitz@gmail.com>
Mon, 15 Oct 2018 12:48:20 +0000 (14:48 +0200)
committerTim "S.D.Eagle" Zeitz <dev.tim.zeitz@gmail.com>
Mon, 15 Oct 2018 12:48:20 +0000 (14:48 +0200)
compare_vector.cpp [new file with mode: 0644]
compile.sh [new file with mode: 0755]
constants.h [new file with mode: 0644]
decode_vector.cpp [new file with mode: 0644]
encode_vector.cpp [new file with mode: 0644]
id_queue.h [new file with mode: 0644]
timer.h [new file with mode: 0755]
vector_io.h [new file with mode: 0644]

diff --git a/compare_vector.cpp b/compare_vector.cpp
new file mode 100644 (file)
index 0000000..00cc02d
--- /dev/null
@@ -0,0 +1,134 @@
+#include "vector_io.h"
+#include <vector>
+#include <iostream>
+#include <stdexcept>
+using namespace std;
+
+template<class T>
+void compare_num_data(const string&vector1_file, const string&vector2_file){
+       vector<T>vector1 = load_vector<T>(vector1_file);
+       vector<T>vector2 = load_vector<T>(vector2_file);
+
+       if(vector1.size() < vector2.size()){
+               cout << "\""<< vector1_file<< "\" has only " << vector1.size() << " elements while \""<< vector2_file<< "\" has "<<vector2.size() << ". Can only compare vectors of equal size." << endl;
+       }else if(vector2.size() < vector1.size()){
+               cout << "\""<< vector2_file<< "\" has only " << vector2.size() << " elements while \""<< vector1_file<< "\" has "<<vector1.size() << ". Can only compare vectors of equal size." << endl;
+       }else{
+               unsigned vector1_smaller_count = 0;
+               unsigned vector2_smaller_count = 0;
+
+               unsigned first_difference = (unsigned)-1;
+
+               for(unsigned i=0; i<vector1.size(); ++i){
+                       if(vector1[i] < vector2[i])
+                               ++vector1_smaller_count;
+                       if(vector2[i] < vector1[i])
+                               ++vector2_smaller_count;
+                       if(vector1[i] != vector2[i] && first_difference == (unsigned)-1)
+                               first_difference = i;
+               }
+
+               if(vector1_smaller_count == 0 && vector2_smaller_count == 0){
+                       cout << "The vectors are the same and have "<<vector1.size()<<" elements." << endl;
+               }else{
+                       cout 
+                               << "The vectors differ. "
+                               << vector1_smaller_count <<" elements are smaller in \""<<vector1_file<<"\". " 
+                               << vector2_smaller_count <<" elements are smaller in \""<<vector2_file<<"\". "
+                               << (vector1.size()-vector1_smaller_count-vector2_smaller_count) <<" elements are the same. "
+                               << (vector1_smaller_count+vector2_smaller_count) <<" elements are different. "
+                               << "The vectors have "<<vector1.size() <<" elements."
+                               << "The first element that differ is at index "<< first_difference << "."
+                               << endl
+                       ;
+               }
+       }
+
+}
+
+void compare_string_data(const std::string&vector1_file, const std::string&vector2_file){
+       vector<string>vector1 = load_vector<string>(vector1_file);
+       vector<string>vector2 = load_vector<string>(vector2_file);
+
+       if(vector1.size() < vector2.size()){
+               cout << "\""<< vector1_file<< "\" has only " << vector1.size() << " elements while \""<< vector2_file<< "\" has "<<vector2.size() << ". Can only compare vectors of equal size." << endl;
+       }else if(vector2.size() < vector1.size()){
+               cout << "\""<< vector2_file<< "\" has only " << vector2.size() << " elements while \""<< vector1_file<< "\" has "<<vector1.size() << ". Can only compare vectors of equal size." << endl;
+       }else{
+               unsigned equal_count = 0;
+
+               for(unsigned i=0; i<vector1.size(); ++i){
+                       if(vector1[i] == vector2[i])
+                               ++equal_count;
+               }
+
+               if(equal_count == vector1.size()){
+                       cout << "The vectors are the same and have "<<vector1.size()<<" elements." << endl;
+               }else{
+                       cout 
+                               << "The vectors differ. "
+                               << equal_count <<" elements are the same. "
+                               << vector1.size()-equal_count <<" elements are different. "
+                               << "The vectors have "<<vector1.size() <<" elements."
+                               << endl
+                       ;
+               }
+       }
+
+}
+
+int main(int argc, char*argv[]){
+       try{
+               string vector1_file;
+               string vector2_file;
+               string data_type;
+               if(argc != 4){
+                       cerr << "Usage: "<< argv[0] << " type vector1 vector2\n"
+                               << "\n"
+                               << "Compares two vectors of elements in binary format. data_type can be one of\n"
+                               << " * int8\n"
+                               << " * uint8\n"
+                               << " * int16\n"
+                               << " * uint16\n"
+                               << " * int32\n"
+                               << " * uint32\n"
+                               << " * int64\n"
+                               << " * uint64\n"
+                               << " * float32\n"
+                               << " * float64\n"
+                               << " * string" << endl;
+                       return 1;
+               }else{
+                       data_type = argv[1];
+                       vector1_file = argv[2];
+                       vector2_file = argv[3];
+               }
+
+               if(data_type == "int8")
+                       compare_num_data<signed char>(vector1_file, vector2_file);
+               else if(data_type == "uint8")
+                       compare_num_data<unsigned char>(vector1_file, vector2_file);
+               else if(data_type == "int16")
+                       compare_num_data<signed short>(vector1_file, vector2_file);
+               else if(data_type == "uint16")
+                       compare_num_data<unsigned short>(vector1_file, vector2_file);
+               else if(data_type == "int32")
+                       compare_num_data<signed int>(vector1_file, vector2_file);
+               else if(data_type == "uint32")
+                       compare_num_data<unsigned int>(vector1_file, vector2_file);
+               else if(data_type == "int64")
+                       compare_num_data<signed long long>(vector1_file, vector2_file);
+               else if(data_type == "uint64")
+                       compare_num_data<unsigned long long>(vector1_file, vector2_file);
+               else if(data_type == "float32")
+                       compare_num_data<float>(vector1_file, vector2_file);
+               else if(data_type == "float64")
+                       compare_num_data<double>(vector1_file, vector2_file);
+               else if(data_type == "string")
+                       compare_string_data(vector1_file, vector2_file);
+               else
+                       throw runtime_error("Unknown data type \""+data_type+"\"");
+       }catch(exception&err){
+               cerr << "Stopped on exception : " << err.what() << endl;
+       }
+}
diff --git a/compile.sh b/compile.sh
new file mode 100755 (executable)
index 0000000..48dd289
--- /dev/null
@@ -0,0 +1,4 @@
+#!/bin/sh
+g++ -O3 -DNDEBUG -std=c++0x encode_vector.cpp -o encode_vector
+g++ -O3 -DNDEBUG -std=c++0x decode_vector.cpp -o decode_vector
+g++ -O3 -DNDEBUG -std=c++0x compare_vector.cpp -o compare_vector
diff --git a/constants.h b/constants.h
new file mode 100644 (file)
index 0000000..cd91361
--- /dev/null
@@ -0,0 +1,7 @@
+#ifndef CONSTANTS_H
+#define CONSTANTS_H
+
+const unsigned invalid_id = 4294967295u;
+const unsigned inf_weight = 2147483647u;
+
+#endif
diff --git a/decode_vector.cpp b/decode_vector.cpp
new file mode 100644 (file)
index 0000000..4348a07
--- /dev/null
@@ -0,0 +1,84 @@
+#include "vector_io.h"
+#include <string>
+#include <iostream>
+#include <limits>
+#include <iomanip>
+using namespace std;
+
+template<class T>
+void convert_num_data(const string&input_vector_file){
+       vector<T>v = load_vector<T>(input_vector_file);
+
+       for(auto&x:v)
+               cout << std::setprecision(std::numeric_limits<T>::digits10+1) << x << '\n';
+}
+
+string replace_all_substrings(string subject, const string& search, const string& replace) {
+    size_t pos = 0;
+    while ((pos = subject.find(search, pos)) != std::string::npos) {
+         subject.replace(pos, search.length(), replace);
+         pos += replace.length();
+    }
+    return std::move(subject);
+}
+
+void convert_string_data(const string&input_vector_file){
+       vector<string>v = load_vector<string>(input_vector_file);
+
+       for(auto&s:v)
+               cout << replace_all_substrings(replace_all_substrings(s, "\\", "\\\\"), "\n", "\\n") << '\n';
+}
+
+int main(int argc, char*argv[]){
+       try{
+               string data_type, input_vector_file;
+               if(argc != 3){
+                       cerr 
+                               << "Usage: "<< argv[0] << " data_type input_vector_file\n" 
+                               << "\n"
+                               << "Reads binary data from input_vector_file and writes the data to the standard output. data_type can be one of\n"
+                               << " * int8\n"
+                               << " * uint8\n"
+                               << " * int16\n"
+                               << " * uint16\n"
+                               << " * int32\n"
+                               << " * uint32\n"
+                               << " * int64\n"
+                               << " * uint64\n"
+                               << " * float32\n"
+                               << " * float64\n"
+                               << " * string" << endl;
+                       return 1;
+               }else{
+                       data_type = argv[1];
+                       input_vector_file = argv[2];
+               }
+
+               if(data_type == "int8")
+                       convert_num_data<signed char>(input_vector_file);
+               else if(data_type == "uint8")
+                       convert_num_data<unsigned char>(input_vector_file);
+               else if(data_type == "int16")
+                       convert_num_data<signed short>(input_vector_file);
+               else if(data_type == "uint16")
+                       convert_num_data<unsigned short>(input_vector_file);
+               else if(data_type == "int32")
+                       convert_num_data<signed int>(input_vector_file);
+               else if(data_type == "uint32")
+                       convert_num_data<unsigned int>(input_vector_file);
+               else if(data_type == "int64")
+                       convert_num_data<signed long long>(input_vector_file);
+               else if(data_type == "uint64")
+                       convert_num_data<unsigned long long>(input_vector_file);
+               else if(data_type == "float32")
+                       convert_num_data<float>(input_vector_file);
+               else if(data_type == "float64")
+                       convert_num_data<double>(input_vector_file);
+               else if(data_type == "string")
+                       convert_string_data(input_vector_file);
+               else
+                       throw runtime_error("Unknown data type \""+data_type+"\"");
+       }catch(exception&err){
+               cerr << "Stopped on exception : "<< err.what() << endl;
+       }
+}
diff --git a/encode_vector.cpp b/encode_vector.cpp
new file mode 100644 (file)
index 0000000..9eab195
--- /dev/null
@@ -0,0 +1,112 @@
+#include "vector_io.h"
+#include <string>
+#include <iostream>
+#include <limits>
+using namespace std;
+
+
+template<class T>
+void convert_int_data(const string&output_vector_file){
+       string line;
+       vector<T>v;
+       while(getline(cin, line)){
+               long long x = stoll(line);
+               if(x < numeric_limits<T>::min())
+                       throw runtime_error("The number \""+line+"\" is too small, min is \""+to_string(numeric_limits<T>::min())+"\"");
+               if(x > numeric_limits<T>::max())
+                       throw runtime_error("The number \""+line+"\" is too large, max is \""+to_string(numeric_limits<T>::max())+"\"");
+               v.push_back(x);
+       }
+       save_vector(output_vector_file, v);
+}
+
+template<class T>
+void convert_float_data(const string&output_vector_file){
+       string line;
+       vector<T>v;
+       while(getline(cin, line)){
+               v.push_back(stold(line));
+       }
+       save_vector(output_vector_file, v);
+}
+
+void convert_uint64_data(const string&output_vector_file){
+       string line;
+       vector<unsigned long long>v;
+       while(getline(cin, line)){
+               v.push_back(stoull(line));
+       }
+       save_vector(output_vector_file, v);
+}
+
+string replace_all_substrings(string subject, const string& search, const string& replace) {
+    size_t pos = 0;
+    while ((pos = subject.find(search, pos)) != std::string::npos) {
+         subject.replace(pos, search.length(), replace);
+         pos += replace.length();
+    }
+    return std::move(subject);
+}
+
+void convert_string_data(const string&output_vector_file){
+       string line;
+       vector<string>v;
+       while(getline(cin, line)){
+               v.push_back(replace_all_substrings(replace_all_substrings(line, "\\\\", "\\"), "\\n", "\n"));
+       }
+       save_vector(output_vector_file, v);
+}
+
+int main(int argc, char*argv[]){
+       try{
+               string data_type, output_vector_file;
+               if(argc != 3){
+                       cerr 
+                               << "Usage: "<< argv[0] << " data_type output_vector_file\n" 
+                               << "\n"
+                               << "Reads textual data from the standard input and writes it in a binary format to output_vector_file. The input data should be one data element per line. The data is only written once an end of file is encountered on the input. data_type can be one of\n"
+                               << " * int8\n"
+                               << " * uint8\n"
+                               << " * int16\n"
+                               << " * uint16\n"
+                               << " * int32\n"
+                               << " * uint32\n"
+                               << " * int64\n"
+                               << " * uint64\n"
+                               << " * float32\n"
+                               << " * float64\n"
+                               << " * string" << endl;
+                       return 1;
+               }else{
+                       data_type = argv[1];
+                       output_vector_file = argv[2];
+               }
+
+               if(data_type == "int8")
+                       convert_int_data<signed char>(output_vector_file);
+               else if(data_type == "uint8")
+                       convert_int_data<unsigned char>(output_vector_file);
+               else if(data_type == "int16")
+                       convert_int_data<signed short>(output_vector_file);
+               else if(data_type == "uint16")
+                       convert_int_data<unsigned short>(output_vector_file);
+               else if(data_type == "int32")
+                       convert_int_data<signed int>(output_vector_file);
+               else if(data_type == "uint32")
+                       convert_int_data<unsigned int>(output_vector_file);
+               else if(data_type == "int64")
+                       convert_int_data<signed long long>(output_vector_file);
+               else if(data_type == "uint64")
+                       convert_uint64_data(output_vector_file);
+               else if(data_type == "float32")
+                       convert_float_data<float>(output_vector_file);
+               else if(data_type == "float64")
+                       convert_float_data<double>(output_vector_file);
+               else if(data_type == "string")
+                       convert_string_data(output_vector_file);
+               else
+                       throw runtime_error("Unknown data type \""+data_type+"\"");
+       }catch(exception&err){
+               cerr << "Stopped on exception : "<< err.what() << endl;
+       }
+}
diff --git a/id_queue.h b/id_queue.h
new file mode 100644 (file)
index 0000000..9614784
--- /dev/null
@@ -0,0 +1,178 @@
+#ifndef ID_QUEUE_H
+#define ID_QUEUE_H
+
+#include "constants.h"
+#include <vector>
+#include <algorithm>
+#include <cassert>
+
+struct IDKeyPair{
+       unsigned id;
+       unsigned key;
+};
+
+//! A priority queue where the elements are IDs from 0 to id_count-1 where id_count is a number that is set in the constructor.
+//! The elements are sorted by integer keys.
+class MinIDQueue{
+private:
+       static const unsigned tree_arity = 4;
+public:
+       MinIDQueue():heap_size(0){}
+
+       explicit MinIDQueue(unsigned id_count):
+               id_pos(id_count, invalid_id), 
+               heap(id_count), 
+               heap_size(0){}
+
+       //! Returns whether the queue is empty. Equivalent to checking whether size() returns 0.
+       bool empty()const{
+               return heap_size == 0;
+       }
+
+       //! Returns the number of elements in the queue.
+       unsigned size()const{
+               return heap_size;
+       }
+
+       //! Returns the id_count value passed to the constructor.
+       unsigned id_count()const{
+               return id_pos.size();
+       }
+
+       //! Checks whether an element is in the queue.
+       bool contains_id(unsigned id){
+               assert(id < id_count());
+               return id_pos[id] != invalid_id;
+       }
+
+       //! Removes all elements from the queue.
+       void clear(){
+               for(unsigned i=0; i<heap_size; ++i)
+                       id_pos[heap[i].id] = invalid_id;
+               heap_size = 0;
+       }
+
+       friend void swap(MinIDQueue&l, MinIDQueue&r){
+               using std::swap;
+               swap(l.id_pos, r.id_pos);
+               swap(l.heap, r.heap);
+               swap(l.heap, r.heap);
+               swap(l.heap_size, r.heap_size);
+       }
+
+       //! Returns the current key of an element.
+       //! Undefined if the element is not part of the queue.
+       unsigned get_key(unsigned id)const{
+               assert(id < id_count());
+               assert(id_pos[id] != invalid_id);
+               return heap[id_pos[id]].key;
+       }
+
+       //! Returns the smallest element key pair without removing it from the queue.
+       IDKeyPair peek()const{
+               assert(!empty());
+               return heap.front();
+       }
+
+       //! Returns the smallest element key pair and removes it form the queue.
+       IDKeyPair pop(){
+               assert(!empty());
+               --heap_size;
+               std::swap(heap[0].key, heap[heap_size].key);
+               std::swap(heap[0].id,  heap[heap_size].id);
+               id_pos[heap[0].id] = 0;
+               id_pos[heap[heap_size].id] = invalid_id;
+               
+               move_down_in_tree(0);
+               return heap[heap_size];
+       }
+
+       //! Inserts a element key pair. 
+       //! Undefined if the element is part of the queue.
+       void push(IDKeyPair p){
+               assert(p.id < id_count());
+               assert(!contains_id(p.id));
+       
+               unsigned pos = heap_size;
+               ++heap_size;
+               heap[pos] = p;
+               id_pos[p.id] = pos;
+               move_up_in_tree(pos);
+       }
+
+       //! Updates the key of an element if the new key is smaller than the old key. 
+       //! Does nothing if the new key is larger.
+       //! Undefined if the element is not part of the queue.
+       bool decrease_key(IDKeyPair p){
+               assert(p.id < id_count());
+               assert(contains_id(p.id));
+       
+               unsigned pos = id_pos[p.id];
+
+               if(heap[pos].key > p.key){
+                       heap[pos].key = p.key;
+                       move_up_in_tree(pos);
+                       return true;
+               } else {
+                       return false;
+               }
+       }
+
+       //! Updates the key of an element if the new key is larger than the old key. 
+       //! Does nothing if the new key is smaller.
+       //! Undefined if the element is not part of the queue.
+       bool increase_key(IDKeyPair p){
+               assert(p.id < id_count());
+               assert(contains_id(p.id));
+       
+               unsigned pos = id_pos[p.id];
+
+               if(heap[pos].key < p.key){
+                       heap[pos].key = p.key;
+                       move_down_in_tree(pos);
+                       return true;
+               } else {
+                       return false;
+               }
+       }
+
+private:
+       void move_up_in_tree(unsigned pos){
+               while(pos != 0){
+                       unsigned parent = (pos-1)/tree_arity;
+                       if(heap[parent].key > heap[pos].key){
+                               std::swap(heap[pos],  heap[parent]);
+                               std::swap(id_pos[heap[pos].id], id_pos[heap[parent].id]);
+                       }
+                       pos = parent;
+               }
+       }
+
+       void move_down_in_tree(unsigned pos){
+               for(;;){
+                       unsigned first_child = tree_arity*pos+1;
+                       if(first_child >= heap_size)
+                               return; // no children
+                       unsigned smallest_child = first_child;
+                       for(unsigned c = first_child+1; c < std::min(tree_arity*pos+tree_arity+1, heap_size); ++c){
+                               if(heap[smallest_child].key > heap[c].key){
+                                       smallest_child = c;
+                               }
+                       }
+
+                       if(heap[smallest_child].key >= heap[pos].key)
+                               return; // no child is smaller
+
+                       std::swap(heap[pos],  heap[smallest_child]);
+                       std::swap(id_pos[heap[pos].id], id_pos[heap[smallest_child].id]);
+                       pos = smallest_child;
+               }
+       }
+
+       std::vector<unsigned>id_pos;
+       std::vector<IDKeyPair>heap;
+
+       unsigned heap_size;
+};
+
+#endif
diff --git a/timer.h b/timer.h
new file mode 100755 (executable)
index 0000000..6ee97e5
--- /dev/null
+++ b/timer.h
@@ -0,0 +1,13 @@
+#ifndef TIMER_H
+#define TIMER_H
+
+#include <sys/time.h>
+
+inline
+long long get_micro_time(){
+       timeval t;
+       gettimeofday(&t, 0);
+       return t.tv_sec*1000000ll+t.tv_usec;
+}
+
+#endif
diff --git a/vector_io.h b/vector_io.h
new file mode 100644 (file)
index 0000000..4cac56f
--- /dev/null
@@ -0,0 +1,79 @@
+#ifndef VECTOR_IO_H
+#define VECTPR_IO_H
+
+#include <string>
+#include <vector>
+#include <stdexcept>
+#include <fstream>
+
+template<class T>
+void save_vector(const std::string&file_name, const std::vector<T>&vec){
+       std::ofstream out(file_name, std::ios::binary);
+       if(!out)
+               throw std::runtime_error("Can not open \""+file_name+"\" for writing.");
+       out.write(reinterpret_cast<const char*>(&vec[0]), vec.size()*sizeof(T));
+}
+
+template<class T>
+std::vector<T>load_vector(const std::string&file_name){
+       std::ifstream in(file_name, std::ios::binary);
+       if(!in)
+               throw std::runtime_error("Can not open \""+file_name+"\" for reading.");
+       in.seekg(0, std::ios::end);
+       unsigned long long file_size = in.tellg();
+       if(file_size % sizeof(T) != 0)
+               throw std::runtime_error("File \""+file_name+"\" can not be a vector of the requested type because it's size is no multiple of the element type's size.");
+       in.seekg(0, std::ios::beg);
+       std::vector<T>vec(file_size / sizeof(T));
+       in.read(reinterpret_cast<char*>(&vec[0]), file_size);
+       return vec; // NVRO
+}
+
+template<>
+void save_vector<std::string>(const std::string&file_name, const std::vector<std::string>&vec){
+       std::ofstream out(file_name, std::ios::binary);
+       for(unsigned i=0; i<vec.size(); ++i){
+               const char*x = vec[i].c_str();
+               out.write(x, vec[i].length()+1);
+       }
+}
+
+template<>
+std::vector<std::string>load_vector<std::string>(const std::string&file_name){
+       std::vector<char>data = load_vector<char>(file_name);
+       std::vector<std::string>ret;
+       std::vector<char>::const_iterator 
+               str_begin = data.begin(), 
+               str_end = data.begin(), 
+               data_end = data.end();
+
+       while(str_end != data_end){
+               if(*str_end == '\0'){
+                       ret.push_back(std::string(str_begin, str_end));
+                       ++str_end;
+                       str_begin = str_end;
+               }else{
+                       ++str_end;
+               }
+       }
+
+       ret.shrink_to_fit();
+       return ret; // NVRO
+}
+
+template<class T>
+void save_value(const std::string&file_name, const T&val){
+       save_vector(file_name, std::vector<T>{val});
+}
+
+template<class T>
+T load_value(const std::string&file_name){
+       auto v = load_vector<T>(file_name);
+       if(v.empty())
+               throw std::runtime_error(file_name+" is empty");
+       if(v.size() >= 2)
+               throw std::runtime_error(file_name+" contains more than one element");
+       return v.front();
+}
+
+#endif