From 98a3aec2c48ec86813ae24320712cf21be26111a Mon Sep 17 00:00:00 2001 From: "Tim \"S.D.Eagle\" Zeitz" Date: Mon, 15 Oct 2018 14:48:20 +0200 Subject: [PATCH] Initial commit --- compare_vector.cpp | 134 ++++++++++++++++++++++++++++++++++ compile.sh | 4 + constants.h | 7 ++ decode_vector.cpp | 84 +++++++++++++++++++++ encode_vector.cpp | 112 ++++++++++++++++++++++++++++ id_queue.h | 178 +++++++++++++++++++++++++++++++++++++++++++++ timer.h | 13 ++++ vector_io.h | 79 ++++++++++++++++++++ 8 files changed, 611 insertions(+) create mode 100644 compare_vector.cpp create mode 100755 compile.sh create mode 100644 constants.h create mode 100644 decode_vector.cpp create mode 100644 encode_vector.cpp create mode 100644 id_queue.h create mode 100755 timer.h create mode 100644 vector_io.h diff --git a/compare_vector.cpp b/compare_vector.cpp new file mode 100644 index 0000000..00cc02d --- /dev/null +++ b/compare_vector.cpp @@ -0,0 +1,134 @@ +#include "vector_io.h" +#include +#include +#include +using namespace std; + +template +void compare_num_data(const string&vector1_file, const string&vector2_file){ + vectorvector1 = load_vector(vector1_file); + vectorvector2 = load_vector(vector2_file); + + if(vector1.size() < vector2.size()){ + cout << "\""<< vector1_file<< "\" has only " << vector1.size() << " elements while \""<< vector2_file<< "\" has "<vector1 = load_vector(vector1_file); + vectorvector2 = load_vector(vector2_file); + + if(vector1.size() < vector2.size()){ + cout << "\""<< vector1_file<< "\" has only " << vector1.size() << " elements while \""<< vector2_file<< "\" has "<(vector1_file, vector2_file); + else if(data_type == "uint8") + compare_num_data(vector1_file, vector2_file); + else if(data_type == "int16") + compare_num_data(vector1_file, vector2_file); + else if(data_type == "uint16") + compare_num_data(vector1_file, vector2_file); + else if(data_type == "int32") + compare_num_data(vector1_file, vector2_file); + else if(data_type == "uint32") + compare_num_data(vector1_file, vector2_file); + else if(data_type == "int64") + compare_num_data(vector1_file, vector2_file); + else if(data_type == "uint64") + compare_num_data(vector1_file, vector2_file); + else if(data_type == "float32") + compare_num_data(vector1_file, vector2_file); + else if(data_type == "float64") + compare_num_data(vector1_file, vector2_file); + else if(data_type == "string") + compare_string_data(vector1_file, vector2_file); + else + throw runtime_error("Unknown data type \""+data_type+"\""); + }catch(exception&err){ + cerr << "Stopped on exception : " << err.what() << endl; + } +} diff --git a/compile.sh b/compile.sh new file mode 100755 index 0000000..48dd289 --- /dev/null +++ b/compile.sh @@ -0,0 +1,4 @@ +#!/bin/sh +g++ -O3 -DNDEBUG -std=c++0x encode_vector.cpp -o encode_vector +g++ -O3 -DNDEBUG -std=c++0x decode_vector.cpp -o decode_vector +g++ -O3 -DNDEBUG -std=c++0x compare_vector.cpp -o compare_vector diff --git a/constants.h b/constants.h new file mode 100644 index 0000000..cd91361 --- /dev/null +++ b/constants.h @@ -0,0 +1,7 @@ +#ifndef CONSTANTS_H +#define CONSTANTS_H + +const unsigned invalid_id = 4294967295u; +const unsigned inf_weight = 2147483647u; + +#endif diff --git a/decode_vector.cpp b/decode_vector.cpp new file mode 100644 index 0000000..4348a07 --- /dev/null +++ b/decode_vector.cpp @@ -0,0 +1,84 @@ +#include "vector_io.h" +#include +#include +#include +#include +using namespace std; + +template +void convert_num_data(const string&input_vector_file){ + vectorv = load_vector(input_vector_file); + + for(auto&x:v) + cout << std::setprecision(std::numeric_limits::digits10+1) << x << '\n'; +} + +string replace_all_substrings(string subject, const string& search, const string& replace) { + size_t pos = 0; + while ((pos = subject.find(search, pos)) != std::string::npos) { + subject.replace(pos, search.length(), replace); + pos += replace.length(); + } + return std::move(subject); +} + +void convert_string_data(const string&input_vector_file){ + vectorv = load_vector(input_vector_file); + + for(auto&s:v) + cout << replace_all_substrings(replace_all_substrings(s, "\\", "\\\\"), "\n", "\\n") << '\n'; +} + +int main(int argc, char*argv[]){ + try{ + string data_type, input_vector_file; + if(argc != 3){ + cerr + << "Usage: "<< argv[0] << " data_type input_vector_file\n" + << "\n" + << "Reads binary data from input_vector_file and writes the data to the standard output. data_type can be one of\n" + << " * int8\n" + << " * uint8\n" + << " * int16\n" + << " * uint16\n" + << " * int32\n" + << " * uint32\n" + << " * int64\n" + << " * uint64\n" + << " * float32\n" + << " * float64\n" + << " * string" << endl; + return 1; + }else{ + data_type = argv[1]; + input_vector_file = argv[2]; + } + + if(data_type == "int8") + convert_num_data(input_vector_file); + else if(data_type == "uint8") + convert_num_data(input_vector_file); + else if(data_type == "int16") + convert_num_data(input_vector_file); + else if(data_type == "uint16") + convert_num_data(input_vector_file); + else if(data_type == "int32") + convert_num_data(input_vector_file); + else if(data_type == "uint32") + convert_num_data(input_vector_file); + else if(data_type == "int64") + convert_num_data(input_vector_file); + else if(data_type == "uint64") + convert_num_data(input_vector_file); + else if(data_type == "float32") + convert_num_data(input_vector_file); + else if(data_type == "float64") + convert_num_data(input_vector_file); + else if(data_type == "string") + convert_string_data(input_vector_file); + else + throw runtime_error("Unknown data type \""+data_type+"\""); + }catch(exception&err){ + cerr << "Stopped on exception : "<< err.what() << endl; + } +} diff --git a/encode_vector.cpp b/encode_vector.cpp new file mode 100644 index 0000000..9eab195 --- /dev/null +++ b/encode_vector.cpp @@ -0,0 +1,112 @@ +#include "vector_io.h" +#include +#include +#include +using namespace std; + + +template +void convert_int_data(const string&output_vector_file){ + string line; + vectorv; + while(getline(cin, line)){ + long long x = stoll(line); + if(x < numeric_limits::min()) + throw runtime_error("The number \""+line+"\" is too small, min is \""+to_string(numeric_limits::min())+"\""); + if(x > numeric_limits::max()) + throw runtime_error("The number \""+line+"\" is too large, max is \""+to_string(numeric_limits::max())+"\""); + v.push_back(x); + } + save_vector(output_vector_file, v); +} + +template +void convert_float_data(const string&output_vector_file){ + string line; + vectorv; + while(getline(cin, line)){ + v.push_back(stold(line)); + } + save_vector(output_vector_file, v); +} + +void convert_uint64_data(const string&output_vector_file){ + string line; + vectorv; + while(getline(cin, line)){ + v.push_back(stoull(line)); + } + save_vector(output_vector_file, v); +} + +string replace_all_substrings(string subject, const string& search, const string& replace) { + size_t pos = 0; + while ((pos = subject.find(search, pos)) != std::string::npos) { + subject.replace(pos, search.length(), replace); + pos += replace.length(); + } + return std::move(subject); +} + +void convert_string_data(const string&output_vector_file){ + string line; + vectorv; + while(getline(cin, line)){ + v.push_back(replace_all_substrings(replace_all_substrings(line, "\\\\", "\\"), "\\n", "\n")); + } + save_vector(output_vector_file, v); +} + +int main(int argc, char*argv[]){ + try{ + string data_type, output_vector_file; + if(argc != 3){ + cerr + << "Usage: "<< argv[0] << " data_type output_vector_file\n" + << "\n" + << "Reads textual data from the standard input and writes it in a binary format to output_vector_file. The input data should be one data element per line. The data is only written once an end of file is encountered on the input. data_type can be one of\n" + << " * int8\n" + << " * uint8\n" + << " * int16\n" + << " * uint16\n" + << " * int32\n" + << " * uint32\n" + << " * int64\n" + << " * uint64\n" + << " * float32\n" + << " * float64\n" + << " * string" << endl; + return 1; + }else{ + data_type = argv[1]; + output_vector_file = argv[2]; + } + + if(data_type == "int8") + convert_int_data(output_vector_file); + else if(data_type == "uint8") + convert_int_data(output_vector_file); + else if(data_type == "int16") + convert_int_data(output_vector_file); + else if(data_type == "uint16") + convert_int_data(output_vector_file); + else if(data_type == "int32") + convert_int_data(output_vector_file); + else if(data_type == "uint32") + convert_int_data(output_vector_file); + else if(data_type == "int64") + convert_int_data(output_vector_file); + else if(data_type == "uint64") + convert_uint64_data(output_vector_file); + else if(data_type == "float32") + convert_float_data(output_vector_file); + else if(data_type == "float64") + convert_float_data(output_vector_file); + else if(data_type == "string") + convert_string_data(output_vector_file); + else + throw runtime_error("Unknown data type \""+data_type+"\""); + }catch(exception&err){ + cerr << "Stopped on exception : "<< err.what() << endl; + } +} diff --git a/id_queue.h b/id_queue.h new file mode 100644 index 0000000..9614784 --- /dev/null +++ b/id_queue.h @@ -0,0 +1,178 @@ +#ifndef ID_QUEUE_H +#define ID_QUEUE_H + +#include "constants.h" +#include +#include +#include + +struct IDKeyPair{ + unsigned id; + unsigned key; +}; + +//! A priority queue where the elements are IDs from 0 to id_count-1 where id_count is a number that is set in the constructor. +//! The elements are sorted by integer keys. +class MinIDQueue{ +private: + static const unsigned tree_arity = 4; +public: + MinIDQueue():heap_size(0){} + + explicit MinIDQueue(unsigned id_count): + id_pos(id_count, invalid_id), + heap(id_count), + heap_size(0){} + + //! Returns whether the queue is empty. Equivalent to checking whether size() returns 0. + bool empty()const{ + return heap_size == 0; + } + + //! Returns the number of elements in the queue. + unsigned size()const{ + return heap_size; + } + + //! Returns the id_count value passed to the constructor. + unsigned id_count()const{ + return id_pos.size(); + } + + //! Checks whether an element is in the queue. + bool contains_id(unsigned id){ + assert(id < id_count()); + return id_pos[id] != invalid_id; + } + + //! Removes all elements from the queue. + void clear(){ + for(unsigned i=0; i p.key){ + heap[pos].key = p.key; + move_up_in_tree(pos); + return true; + } else { + return false; + } + } + + //! Updates the key of an element if the new key is larger than the old key. + //! Does nothing if the new key is smaller. + //! Undefined if the element is not part of the queue. + bool increase_key(IDKeyPair p){ + assert(p.id < id_count()); + assert(contains_id(p.id)); + + unsigned pos = id_pos[p.id]; + + if(heap[pos].key < p.key){ + heap[pos].key = p.key; + move_down_in_tree(pos); + return true; + } else { + return false; + } + } + +private: + void move_up_in_tree(unsigned pos){ + while(pos != 0){ + unsigned parent = (pos-1)/tree_arity; + if(heap[parent].key > heap[pos].key){ + std::swap(heap[pos], heap[parent]); + std::swap(id_pos[heap[pos].id], id_pos[heap[parent].id]); + } + pos = parent; + } + } + + void move_down_in_tree(unsigned pos){ + for(;;){ + unsigned first_child = tree_arity*pos+1; + if(first_child >= heap_size) + return; // no children + unsigned smallest_child = first_child; + for(unsigned c = first_child+1; c < std::min(tree_arity*pos+tree_arity+1, heap_size); ++c){ + if(heap[smallest_child].key > heap[c].key){ + smallest_child = c; + } + } + + if(heap[smallest_child].key >= heap[pos].key) + return; // no child is smaller + + std::swap(heap[pos], heap[smallest_child]); + std::swap(id_pos[heap[pos].id], id_pos[heap[smallest_child].id]); + pos = smallest_child; + } + } + + std::vectorid_pos; + std::vectorheap; + + unsigned heap_size; +}; + +#endif diff --git a/timer.h b/timer.h new file mode 100755 index 0000000..6ee97e5 --- /dev/null +++ b/timer.h @@ -0,0 +1,13 @@ +#ifndef TIMER_H +#define TIMER_H + +#include + +inline +long long get_micro_time(){ + timeval t; + gettimeofday(&t, 0); + return t.tv_sec*1000000ll+t.tv_usec; +} + +#endif diff --git a/vector_io.h b/vector_io.h new file mode 100644 index 0000000..4cac56f --- /dev/null +++ b/vector_io.h @@ -0,0 +1,79 @@ +#ifndef VECTOR_IO_H +#define VECTPR_IO_H + +#include +#include +#include +#include + +template +void save_vector(const std::string&file_name, const std::vector&vec){ + std::ofstream out(file_name, std::ios::binary); + if(!out) + throw std::runtime_error("Can not open \""+file_name+"\" for writing."); + out.write(reinterpret_cast(&vec[0]), vec.size()*sizeof(T)); +} + +template +std::vectorload_vector(const std::string&file_name){ + std::ifstream in(file_name, std::ios::binary); + if(!in) + throw std::runtime_error("Can not open \""+file_name+"\" for reading."); + in.seekg(0, std::ios::end); + unsigned long long file_size = in.tellg(); + if(file_size % sizeof(T) != 0) + throw std::runtime_error("File \""+file_name+"\" can not be a vector of the requested type because it's size is no multiple of the element type's size."); + in.seekg(0, std::ios::beg); + std::vectorvec(file_size / sizeof(T)); + in.read(reinterpret_cast(&vec[0]), file_size); + return vec; // NVRO +} + +template<> +void save_vector(const std::string&file_name, const std::vector&vec){ + std::ofstream out(file_name, std::ios::binary); + for(unsigned i=0; i +std::vectorload_vector(const std::string&file_name){ + std::vectordata = load_vector(file_name); + std::vectorret; + std::vector::const_iterator + str_begin = data.begin(), + str_end = data.begin(), + data_end = data.end(); + + while(str_end != data_end){ + if(*str_end == '\0'){ + ret.push_back(std::string(str_begin, str_end)); + ++str_end; + str_begin = str_end; + }else{ + ++str_end; + } + } + + ret.shrink_to_fit(); + return ret; // NVRO +} + +template +void save_value(const std::string&file_name, const T&val){ + save_vector(file_name, std::vector{val}); +} + +template +T load_value(const std::string&file_name){ + auto v = load_vector(file_name); + if(v.empty()) + throw std::runtime_error(file_name+" is empty"); + if(v.size() >= 2) + throw std::runtime_error(file_name+" contains more than one element"); + return v.front(); +} + +#endif -- 2.34.1