From ed60e2ac6d76414ab9af484a2e3d13b990cad2c9 Mon Sep 17 00:00:00 2001 From: hppeng Date: Sat, 1 Apr 2023 20:27:52 -0700 Subject: [PATCH] Bitvector c++ implementation --- js/c++/Makefile | 2 +- js/c++/utils.cpp | 37 +++++- js/c++/utils.js.in | 243 +------------------------------------ js/c++/utils/bitvector.cpp | 193 +++++++++++++++++++++++++++++ js/c++/utils/bitvector.h | 90 ++++++++++++++ 5 files changed, 321 insertions(+), 244 deletions(-) create mode 100644 js/c++/utils/bitvector.cpp create mode 100644 js/c++/utils/bitvector.h diff --git a/js/c++/Makefile b/js/c++/Makefile index 11617b3..0997374 100644 --- a/js/c++/Makefile +++ b/js/c++/Makefile @@ -9,7 +9,7 @@ all: utils.js powders.js powders.js: powders.cpp -utils.js: utils.cpp utils/math_utils.cpp utils/base64.cpp +utils.js: utils.cpp utils/math_utils.cpp utils/base64.cpp utils/bitvector.cpp .PHONY: clean clean: diff --git a/js/c++/utils.cpp b/js/c++/utils.cpp index f52ef1c..a2102d7 100644 --- a/js/c++/utils.cpp +++ b/js/c++/utils.cpp @@ -6,7 +6,9 @@ using namespace emscripten; #include "utils.h" #include "utils/math_utils.h" #include "utils/base64.h" +#include "utils/bitvector.h" #include +#include #include namespace utils { @@ -62,10 +64,28 @@ val __perm_wrap(val a) { } return return_array; } -#endif +/** Appends data to the BitVector. + * + * @param {Number | String} data - The data to append. + * @param {Number} length - The length, in bits, of the new data. This is ignored if data is a string. + */ +void __BitVector_append(BitVector& self, val data, val length) { + if (data.typeOf().as() == "string") { + self.append(data.as()); + return; + } + if (data.typeOf().as() == "number") { + size_t num = data.as(); + //if (num >= 1<()); + return; + } + throw std::invalid_argument("BitVector must be appended with a Number or a B64 String"); +} -#ifdef __EMSCRIPTEN__ EMSCRIPTEN_BINDINGS(utils) { function("clamp", &clamp); function("round_near", &round_near); @@ -74,6 +94,19 @@ EMSCRIPTEN_BINDINGS(utils) { function("b64_toInt", &Base64::toInt); function("b64_toIntSigned", &Base64::toIntSigned); function("perm", &__perm_wrap); + class_("BitVector") + .constructor() + .constructor() + .function("read_bit", &BitVector::read_bit) + .function("slice", &BitVector::slice) + .function("set_bit", &BitVector::set_bit) + .function("clear_bit", &BitVector::clear_bit) + .function("toB64", &BitVector::toB64) + .function("toString", &BitVector::toString) + .function("toStringR", &BitVector::toStringR) + .function("append", select_overload(&BitVector::append)) + .function("append", select_overload(&BitVector::append)) + ; } #endif diff --git a/js/c++/utils.js.in b/js/c++/utils.js.in index 985e61b..5f5326b 100644 --- a/js/c++/utils.js.in +++ b/js/c++/utils.js.in @@ -10,7 +10,8 @@ const Base64 = { fromIntN: _module_utils.b64_fromIntN, toInt: _module_utils.b64_toInt, toIntSigned: _module_utils.b64_toIntSigned -} +}; +const BitVector = _module_utils.BitVector; // const perm = _module_utils.perm; way too garbage to use... we supply JS perm. // Permutations in js reference (also cool algorithm): @@ -74,246 +75,6 @@ function getValue(id) { return document.getElementById(id).value; } -/** A class used to represent an arbitrary length bit vector. Very useful for encoding and decoding. - * - */ - class BitVector { - - /** Constructs an arbitrary-length bit vector. - * @class - * @param {String | Number} data - The data to append. - * @param {Number} length - A set length for the data. Ignored if data is a string. - * - * The structure of the Uint32Array should be [[last, ..., first], ..., [last, ..., first], [empty space, last, ..., first]] - */ - constructor(data, length) { - let bit_vec = []; - - if (typeof data === "string") { - let int = 0; - let bv_idx = 0; - length = data.length * 6; - - for (let i = 0; i < data.length; i++) { - let char = Base64.toInt(data[i]); - let pre_pos = bv_idx % 32; - int |= (char << bv_idx); - bv_idx += 6; - let post_pos = bv_idx % 32; - if (post_pos < pre_pos) { //we have to have filled up the integer - bit_vec.push(int); - int = (char >>> (6 - post_pos)); - } - - if (i == data.length - 1 && post_pos != 0) { - bit_vec.push(int); - } - } - } else if (typeof data === "number") { - if (typeof length === "undefined") - if (length < 0) { - throw new RangeError("BitVector must have nonnegative length."); - } - - //convert to int just in case - data = Math.round(data); - - //range of numbers that won't fit in a uint32 - if (data > 2**32 - 1 || data < -(2 ** 32 - 1)) { - throw new RangeError("Numerical data has to fit within a 32-bit integer range to instantiate a BitVector."); - } - bit_vec.push(data); - } else { - throw new TypeError("BitVector must be instantiated with a Number or a B64 String"); - } - - this.length = length; - this.bits = new Uint32Array(bit_vec); - } - - /** Return value of bit at index idx. - * - * @param {Number} idx - The index to read - * - * @returns The bit value at position idx - */ - read_bit(idx) { - if (idx < 0 || idx >= this.length) { - throw new RangeError("Cannot read bit outside the range of the BitVector. ("+idx+" > "+this.length+")"); - } - return ((this.bits[Math.floor(idx / 32)] & (1 << idx)) == 0 ? 0 : 1); - } - - /** Returns an integer value (if possible) made from the range of bits [start, end). Undefined behavior if the range to read is too big. - * - * @param {Number} start - The index to start slicing from. Inclusive. - * @param {Number} end - The index to end slicing at. Exclusive. - * - * @returns An integer representation of the sliced bits. - */ - slice(start, end) { - //TO NOTE: JS shifting is ALWAYS in mod 32. a << b will do a << (b mod 32) implicitly. - - if (end < start) { - throw new RangeError("Cannot slice a range where the end is before the start."); - } else if (end == start) { - return 0; - } else if (end - start > 32) { - //requesting a slice of longer than 32 bits (safe integer "length") - throw new RangeError("Cannot slice a range of longer than 32 bits (unsafe to store in an integer)."); - } - - let res = 0; - if (Math.floor((end - 1) / 32) == Math.floor(start / 32)) { - //the range is within 1 uint32 section - do some relatively fast bit twiddling - res = (this.bits[Math.floor(start / 32)] & ~((((~0) << ((end - 1))) << 1) | ~((~0) << (start)))) >>> (start % 32); - } else { - //the number of bits in the uint32s - let start_pos = (start % 32); - let int_idx = Math.floor(start/32); - res = (this.bits[int_idx] & ((~0) << (start))) >>> (start_pos); - res |= (this.bits[int_idx + 1] & ~((~0) << (end))) << (32 - start_pos); - } - - return res; - - // General code - slow - // for (let i = start; i < end; i++) { - // res |= (get_bit(i) << (i - start)); - // } - } - - /** Assign bit at index idx to 1. - * - * @param {Number} idx - The index to set. - */ - set_bit(idx) { - if (idx < 0 || idx >= this.length) { - throw new RangeError("Cannot set bit outside the range of the BitVector."); - } - this.bits[Math.floor(idx / 32)] |= (1 << idx % 32); - } - - /** Assign bit at index idx to 0. - * - * @param {Number} idx - The index to clear. - */ - clear_bit(idx) { - if (idx < 0 || idx >= this.length) { - throw new RangeError("Cannot clear bit outside the range of the BitVector."); - } - this.bits[Math.floor(idx / 32)] &= ~(1 << idx % 32); - } - - /** Creates a string version of the bit vector in B64. Does not keep the order of elements a sensible human readable format. - * - * @returns A b64 string representation of the BitVector. - */ - toB64() { - if (this.length == 0) { - return ""; - } - let b64_str = ""; - let i = 0; - while (i < this.length) { - b64_str += Base64.fromIntN(this.slice(i, i + 6), 1); - i += 6; - } - - return b64_str; - } - - /** Returns a BitVector in bitstring format. Probably only useful for dev debugging. - * - * @returns A bit string representation of the BitVector. Goes from higher-indexed bits to lower-indexed bits. (n ... 0) - */ - toString() { - let ret_str = ""; - for (let i = 0; i < this.length; i++) { - ret_str = (this.read_bit(i) == 0 ? "0": "1") + ret_str; - } - return ret_str; - } - - /** Returns a BitVector in bitstring format. Probably only useful for dev debugging. - * - * @returns A bit string representation of the BitVector. Goes from lower-indexed bits to higher-indexed bits. (0 ... n) - */ - toStringR() { - let ret_str = ""; - for (let i = 0; i < this.length; i++) { - ret_str += (this.read_bit(i) == 0 ? "0": "1"); - } - return ret_str; - } - - /** Appends data to the BitVector. - * - * @param {Number | String} data - The data to append. - * @param {Number} length - The length, in bits, of the new data. This is ignored if data is a string. - */ - append(data, length) { - if (length < 0) { - throw new RangeError("BitVector length must increase by a nonnegative number."); - } - - let bit_vec = []; - for (const uint of this.bits) { - bit_vec.push(uint); - } - if (typeof data === "string") { - let int = bit_vec[bit_vec.length - 1]; - let bv_idx = this.length; - length = data.length * 6; - let updated_curr = false; - for (let i = 0; i < data.length; i++) { - let char = Base64.toInt(data[i]); - let pre_pos = bv_idx % 32; - int |= (char << bv_idx); - bv_idx += 6; - let post_pos = bv_idx % 32; - if (post_pos < pre_pos) { //we have to have filled up the integer - if (bit_vec.length == this.bits.length && !updated_curr) { - bit_vec[bit_vec.length - 1] = int; - updated_curr = true; - } else { - bit_vec.push(int); - } - int = (char >>> (6 - post_pos)); - } - - if (i == data.length - 1) { - if (bit_vec.length == this.bits.length && !updated_curr) { - bit_vec[bit_vec.length - 1] = int; - } else if (post_pos != 0) { - bit_vec.push(int); - } - } - } - } else if (typeof data === "number") { - //convert to int just in case - let int = Math.round(data); - - //range of numbers that "could" fit in a uint32 -> [0, 2^32) U [-2^31, 2^31) - if (data > 2**32 - 1 || data < -(2 ** 31)) { - throw new RangeError("Numerical data has to fit within a 32-bit integer range to instantiate a BitVector."); - } - //could be split between multiple new ints - //reminder that shifts implicitly mod 32 - bit_vec[bit_vec.length - 1] |= ((int & ~((~0) << length)) << (this.length)); - if (((this.length - 1) % 32 + 1) + length > 32) { - bit_vec.push(int >>> (32 - this.length)); - } - } else { - throw new TypeError("BitVector must be appended with a Number or a B64 String"); - } - - this.bits = new Uint32Array(bit_vec); - this.length += length; - } -}; - - /* Turns a raw stat and a % stat into a final stat on the basis that - raw and >= 100% becomes 0 and + raw and <=-100% becomes negative. Pct would be 0.80 for 80%, -1.20 for 120%, etc diff --git a/js/c++/utils/bitvector.cpp b/js/c++/utils/bitvector.cpp new file mode 100644 index 0000000..fc46029 --- /dev/null +++ b/js/c++/utils/bitvector.cpp @@ -0,0 +1,193 @@ +#include "bitvector.h" +#include "base64.h" +#include +#include +#include + +BitVector::BitVector(const std::string b64_data) { + length = b64_data.length() * 6; + data.reserve(length/bitvec_data_s + 1); + + bitvec_data_t scratch = 0; + size_t bitvec_index = 0; + for (size_t i = 0; i < b64_data.length(); ++i) { + size_t char_num = Base64::digitsMap.find(b64_data[i])->second; + unsigned int pre_pos = bitvec_index % bitvec_data_s; + scratch |= char_num << pre_pos; + bitvec_index += 6; // b64 is 6 bits per character. + unsigned int post_pos = bitvec_index % bitvec_data_s; + if (post_pos < pre_pos) { //we have to have filled up the integer + data.push_back(scratch); + scratch = (char_num >> (6 - post_pos)); + } + if (i == b64_data.length()-1 && post_pos != 0) { + data.push_back(scratch); + } + } +} + +BitVector::BitVector(bitvec_data_t num, size_t length) { + if (length < 0) { + throw std::range_error("BitVector must have nonnegative length."); + } + data.push_back(num); + this->length = length; +} + +/** Return value of bit at index idx. + * + * @param {Number} idx - The index to read + * + * @returns The bit value at position idx + */ +bool BitVector::read_bit(size_t idx) const { + if (idx < 0 || idx >= length) { + std::stringstream ss; + ss << "Cannot read bit outside the range of the BitVector. (" << idx << " > " << length << ")"; + throw std::range_error(ss.str()); + } + return (data[idx / bitvec_data_s] & (1 << (idx % bitvec_data_s))) == 0 ? 0 : 1; +} + +/** Returns an integer value (if possible) made from the range of bits [start, end). Undefined behavior if the range to read is too big. + * + * @param {Number} start - The index to start slicing from. Inclusive. + * @param {Number} end - The index to end slicing at. Exclusive. + * + * @returns An integer representation of the sliced bits. + */ +bitvec_data_t BitVector::slice(size_t start, size_t end) const { + if (end < start) { + throw std::range_error("Cannot slice a range where the end is before the start."); + } else if (end == start) { + return 0; + } else if (end - start > bitvec_data_s) { + //requesting a slice of longer than the size of a single data element (safe integer "length") + std::stringstream ss; + ss << "Cannot slice a range of longer than " << bitvec_data_s << " bits (unsafe to store in an integer)."; + throw std::range_error(ss.str()); + } + bitvec_data_t res = 0; + if ((end-1) / bitvec_data_s == start / bitvec_data_s) { + //the range is within 1 uint32 section - do some relatively fast bit twiddling + //res = (this.bits[Math.floor(start / 32)] & ~((((~0) << ((end - 1))) << 1) | ~((~0) << (start)))) >>> (start % 32); + + bitvec_data_t mask = (~(((~0) << ((end - 1) % bitvec_data_s + 1)))) & ((~0) << (start % bitvec_data_s)); + res = (data[start / bitvec_data_s] & mask) >> (start % bitvec_data_s); + } + else { + //the number of bits in the uint32s + //let start_pos = (start % 32); + //let int_idx = Math.floor(start/32); + //res = (this.bits[int_idx] & ((~0) << (start))) >>> (start_pos); + //res |= (this.bits[int_idx + 1] & ~((~0) << (end))) << (32 - start_pos); + + unsigned int start_pos = start % bitvec_data_s; + unsigned int int_idx = start / bitvec_data_s; + res = (data[int_idx] & ((~0) << start_pos)) >> start_pos; + // IMPORTANT: (end % bitvec_data_s) is never zero. + res |= (data[int_idx + 1] & ~((~0) << (end % bitvec_data_s))) << (bitvec_data_s - start_pos); + } + return res; + + // General code - slow + // for (let i = start; i < end; i++) { + // res |= (get_bit(i) << (i - start)); + // } +} + +/** Assign bit at index idx to 1. + * + * @param {Number} idx - The index to set. + */ +void BitVector::set_bit(size_t idx) { + if (idx < 0 || idx >= length) { + throw std::range_error("Cannot set bit outside the range of the BitVector."); + } + data[idx / bitvec_data_s] |= (1 << (idx % bitvec_data_s)); +} + +/** Assign bit at index idx to 0. + * + * @param {Number} idx - The index to clear. + */ +void BitVector::clear_bit(size_t idx) { + if (idx < 0 || idx >= length) { + throw std::range_error("Cannot clear bit outside the range of the BitVector."); + } + data[idx / bitvec_data_s] &= ~(1 << (idx % bitvec_data_s)); +} + +/** Creates a string version of the bit vector in B64. Does not keep the order of elements a sensible human readable format. + * + * @returns A b64 string representation of the BitVector. + */ +std::string BitVector::toB64() const { + if (length == 0) { + return ""; + } + std::stringstream b64_str; + size_t i = 0; + while (i < length) { + b64_str << Base64::fromIntN(this->slice(i, i + 6), 1); + i += 6; + } + + return b64_str.str(); +} + +/** Returns a BitVector in bitstring format. Probably only useful for dev debugging. + * + * @returns A bit string representation of the BitVector. Goes from higher-indexed bits to lower-indexed bits. (n ... 0) + */ +std::string BitVector::toString() const { + std::stringstream ret_str; + for (size_t i = length; i != 0; --i) { + ret_str << (this->read_bit(i-1) ? "1": "0"); + } + return ret_str.str(); +} + +/** Returns a BitVector in bitstring format. Probably only useful for dev debugging. + * + * @returns A bit string representation of the BitVector. Goes from lower-indexed bits to higher-indexed bits. (0 ... n) + */ +std::string BitVector::toStringR() const { + std::stringstream ret_str; + for (size_t i = 0; i < length; ++i) { + ret_str << (this->read_bit(i) ? "1": "0"); + } + return ret_str.str(); +} + +void BitVector::append(const BitVector& other) { + data.reserve(data.size() + other.data.size()); + + size_t other_index = 0; + if (this->length % bitvec_data_s != 0) { + // fill in the last block. + bitvec_data_t scratch = data[data.size() - 1]; + size_t bits_remaining = bitvec_data_s - (this->length % bitvec_data_s); + + size_t n = std::min(other.length, bits_remaining); + scratch |= (other.slice(0, n) << (this->length % bitvec_data_s)); + data[data.size() - 1] = scratch; + other_index += n; + } + while (other_index != other.length) { + size_t n = std::min(other.length - other_index, (size_t)bitvec_data_s); + data.push_back(other.slice(other_index, other_index + n)); + other_index += n; + } + this->length += other.length; +} + +void BitVector::append(const std::string b64_data) { + BitVector tmp(b64_data); + this->append(tmp); +} + +void BitVector::append(bitvec_data_t num, size_t length) { + BitVector tmp(num, length); + this->append(tmp); +} diff --git a/js/c++/utils/bitvector.h b/js/c++/utils/bitvector.h new file mode 100644 index 0000000..cb74b43 --- /dev/null +++ b/js/c++/utils/bitvector.h @@ -0,0 +1,90 @@ +#pragma once +#include +#include + +#ifdef __EMSCRIPTEN__ +#define bitvec_data_s 32 +#define bitvec_data_t uint32_t +#else +#define bitvec_data_s 64 +#define bitvec_data_t uint64_t +#endif + +class BitVector { + +/** A class used to represent an arbitrary length bit vector. Very useful for encoding and decoding. + * + */ +public: + /** Constructs an arbitrary-length bit vector. + * @class + * @param {String | Number} data - The data to append. + * @param {Number} length - A set length for the data. Ignored if data is a string. + * + * The structure of the Uint32Array should be [[last, ..., first], ..., [last, ..., first], [empty space, last, ..., first]] + */ + BitVector() {}; + BitVector(const BitVector& other) : data(other.data), length(other.length) {}; + BitVector(const std::string b64_data); + BitVector(bitvec_data_t num, size_t length); + + /** Return value of bit at index idx. + * + * @param {Number} idx - The index to read + * + * @returns The bit value at position idx + */ + bool read_bit(size_t idx) const; + + /** Returns an integer value (if possible) made from the range of bits [start, end). Undefined behavior if the range to read is too big. + * + * @param {Number} start - The index to start slicing from. Inclusive. + * @param {Number} end - The index to end slicing at. Exclusive. + * + * @returns An integer representation of the sliced bits. + */ + bitvec_data_t slice(size_t start, size_t end) const; + + /** Assign bit at index idx to 1. + * + * @param {Number} idx - The index to set. + */ + void set_bit(size_t idx); + + /** Assign bit at index idx to 0. + * + * @param {Number} idx - The index to clear. + */ + void clear_bit(size_t idx); + + /** Creates a string version of the bit vector in B64. Does not keep the order of elements a sensible human readable format. + * + * @returns A b64 string representation of the BitVector. + */ + std::string toB64() const; + + /** Returns a BitVector in bitstring format. Probably only useful for dev debugging. + * + * @returns A bit string representation of the BitVector. Goes from higher-indexed bits to lower-indexed bits. (n ... 0) + */ + std::string toString() const; + + /** Returns a BitVector in bitstring format. Probably only useful for dev debugging. + * + * @returns A bit string representation of the BitVector. Goes from lower-indexed bits to higher-indexed bits. (0 ... n) + */ + std::string toStringR() const; + + /** Appends data to the BitVector. + * + * @param {Number | String} data - The data to append. + * @param {Number} length - The length, in bits, of the new data. This is ignored if data is a string. + */ + void append(const BitVector& other); + void append(const std::string b64_data); + void append(bitvec_data_t num, size_t length); + +private: + std::vector data; + size_t length; +};