From ed60e2ac6d76414ab9af484a2e3d13b990cad2c9 Mon Sep 17 00:00:00 2001
From: hppeng <hppeng>
Date: Sat, 1 Apr 2023 20:27:52 -0700
Subject: [PATCH] Bitvector c++ implementation

---
 js/c++/Makefile            |   2 +-
 js/c++/utils.cpp           |  37 +++++-
 js/c++/utils.js.in         | 243 +------------------------------------
 js/c++/utils/bitvector.cpp | 193 +++++++++++++++++++++++++++++
 js/c++/utils/bitvector.h   |  90 ++++++++++++++
 5 files changed, 321 insertions(+), 244 deletions(-)
 create mode 100644 js/c++/utils/bitvector.cpp
 create mode 100644 js/c++/utils/bitvector.h
diff --git a/js/c++/Makefile b/js/c++/Makefile
index 11617b3..0997374 100644
--- a/js/c++/Makefile
+++ b/js/c++/Makefile
@@ -9,7 +9,7 @@ all: utils.js powders.js
 
 powders.js: powders.cpp
 
-utils.js: utils.cpp utils/math_utils.cpp utils/base64.cpp
+utils.js: utils.cpp utils/math_utils.cpp utils/base64.cpp utils/bitvector.cpp
 
 .PHONY: clean
 clean:
diff --git a/js/c++/utils.cpp b/js/c++/utils.cpp
index f52ef1c..a2102d7 100644
--- a/js/c++/utils.cpp
+++ b/js/c++/utils.cpp
@@ -6,7 +6,9 @@ using namespace emscripten;
 #include "utils.h"
 #include "utils/math_utils.h"
 #include "utils/base64.h"
+#include "utils/bitvector.h"
 #include <algorithm>
+#include <memory>
 #include <vector>
 
 namespace utils {
@@ -62,10 +64,28 @@ val __perm_wrap(val a) {
     }
     return return_array;
 }
-#endif
 
+/** Appends data to the BitVector.
+ *
+ * @param {Number | String} data - The data to append.
+ * @param {Number} length - The length, in bits, of the new data. This is ignored if data is a string.
+ */
+void __BitVector_append(BitVector& self, val data, val length) {
+    if (data.typeOf().as<std::string>() == "string") {
+        self.append(data.as<std::string>());
+        return;
+    }
+    if (data.typeOf().as<std::string>() == "number") {
+        size_t num = data.as<size_t>();
+        //if (num >= 1<<bitvec_data_s) {
+        //    throw std::range_error("Numerical data has to fit within a 32-bit integer range to append to a BitVector.");
+        //}
+        self.append(num, length.as<size_t>());
+        return;
+    }
+    throw std::invalid_argument("BitVector must be appended with a Number or a B64 String");
+}
 
-#ifdef __EMSCRIPTEN__
 EMSCRIPTEN_BINDINGS(utils) {
     function("clamp", &clamp);
     function("round_near", &round_near);
@@ -74,6 +94,19 @@ EMSCRIPTEN_BINDINGS(utils) {
     function("b64_toInt", &Base64::toInt);
     function("b64_toIntSigned", &Base64::toIntSigned);
     function("perm", &__perm_wrap);
+    class_<BitVector>("BitVector")
+        .constructor<std::string>()
+        .constructor<size_t, size_t>()
+        .function("read_bit", &BitVector::read_bit)
+        .function("slice", &BitVector::slice)
+        .function("set_bit", &BitVector::set_bit)
+        .function("clear_bit", &BitVector::clear_bit)
+        .function("toB64", &BitVector::toB64)
+        .function("toString", &BitVector::toString)
+        .function("toStringR", &BitVector::toStringR)
+        .function("append", select_overload<void(std::string)>(&BitVector::append))
+        .function("append", select_overload<void(bitvec_data_t, size_t)>(&BitVector::append))
+        ;
 }
 #endif
 
diff --git a/js/c++/utils.js.in b/js/c++/utils.js.in
index 985e61b..5f5326b 100644
--- a/js/c++/utils.js.in
+++ b/js/c++/utils.js.in
@@ -10,7 +10,8 @@ const Base64 = {
     fromIntN: _module_utils.b64_fromIntN,
     toInt: _module_utils.b64_toInt,
     toIntSigned: _module_utils.b64_toIntSigned
-}
+};
+const BitVector = _module_utils.BitVector;
 
 // const perm = _module_utils.perm; way too garbage to use... we supply JS perm.
 // Permutations in js reference (also cool algorithm):
@@ -74,246 +75,6 @@ function getValue(id) {
     return document.getElementById(id).value;
 }
 
-/** A class used to represent an arbitrary length bit vector. Very useful for encoding and decoding.
- *
- */
- class BitVector {
-
-    /** Constructs an arbitrary-length bit vector.
-     * @class
-     * @param {String | Number} data - The data to append.
-     * @param {Number} length - A set length for the data. Ignored if data is a string.
-     *
-     * The structure of the Uint32Array should be [[last, ..., first], ..., [last, ..., first], [empty space, last, ..., first]]
-     */
-    constructor(data, length) {
-        let bit_vec = [];
-
-        if (typeof data === "string") {
-            let int = 0;
-            let bv_idx = 0;
-            length = data.length * 6;
-
-            for (let i = 0; i < data.length; i++) {
-                let char = Base64.toInt(data[i]);
-                let pre_pos = bv_idx % 32;
-                int |= (char << bv_idx);
-                bv_idx += 6;
-                let post_pos = bv_idx % 32;
-                if (post_pos < pre_pos) { //we have to have filled up the integer
-                    bit_vec.push(int);
-                    int = (char >>> (6 - post_pos));
-                }
-
-                if (i == data.length - 1 && post_pos != 0) {
-                    bit_vec.push(int);
-                }
-            }
-        } else if (typeof data === "number") {
-            if (typeof length === "undefined")
-            if (length < 0) {
-                throw new RangeError("BitVector must have nonnegative length.");
-            }
-
-            //convert to int just in case
-            data = Math.round(data);
-
-            //range of numbers that won't fit in a uint32
-            if (data > 2**32 - 1 || data < -(2 ** 32 - 1)) {
-                throw new RangeError("Numerical data has to fit within a 32-bit integer range to instantiate a BitVector.");
-            }
-            bit_vec.push(data);
-        } else {
-            throw new TypeError("BitVector must be instantiated with a Number or a B64 String");
-        }
-
-        this.length = length;
-        this.bits = new Uint32Array(bit_vec);
-    }
-
-    /** Return value of bit at index idx.
-     *
-     * @param {Number} idx - The index to read
-     *
-     * @returns The bit value at position idx
-     */
-    read_bit(idx) {
-        if (idx < 0 || idx >= this.length) {
-            throw new RangeError("Cannot read bit outside the range of the BitVector. ("+idx+" > "+this.length+")");
-        }
-        return ((this.bits[Math.floor(idx / 32)] & (1 << idx)) == 0 ? 0 : 1);
-    }
-
-    /** Returns an integer value (if possible) made from the range of bits [start, end). Undefined behavior if the range to read is too big.
-     *
-     * @param {Number} start - The index to start slicing from. Inclusive.
-     * @param {Number} end - The index to end slicing at. Exclusive.
-     *
-     * @returns An integer representation of the sliced bits.
-     */
-    slice(start, end) {
-        //TO NOTE: JS shifting is ALWAYS in mod 32. a << b will do a << (b mod 32) implicitly.
-
-        if (end < start) {
-            throw new RangeError("Cannot slice a range where the end is before the start.");
-        } else if (end == start) {
-            return 0;
-        } else if (end - start > 32) {
-            //requesting a slice of longer than 32 bits (safe integer "length")
-            throw new RangeError("Cannot slice a range of longer than 32 bits (unsafe to store in an integer).");
-        }
-
-        let res = 0;
-        if (Math.floor((end - 1) / 32) == Math.floor(start / 32)) {
-            //the range is within 1 uint32 section - do some relatively fast bit twiddling
-            res = (this.bits[Math.floor(start / 32)] & ~((((~0) << ((end - 1))) << 1) | ~((~0) << (start)))) >>> (start % 32);
-        } else {
-            //the number of bits in the uint32s
-            let start_pos = (start % 32);
-            let int_idx = Math.floor(start/32);
-            res = (this.bits[int_idx] & ((~0) << (start))) >>> (start_pos);
-            res |= (this.bits[int_idx + 1] & ~((~0) << (end))) << (32 - start_pos);
-        }
-
-        return res;
-
-        // General code - slow
-        // for (let i = start; i < end; i++) {
-        //     res |= (get_bit(i) << (i - start));
-        // }
-    }
-
-    /** Assign bit at index idx to 1.
-     *
-     * @param {Number} idx - The index to set.
-     */
-    set_bit(idx) {
-        if (idx < 0 || idx >= this.length) {
-            throw new RangeError("Cannot set bit outside the range of the BitVector.");
-        }
-        this.bits[Math.floor(idx / 32)] |= (1 << idx % 32);
-    }
-
-    /** Assign bit at index idx to 0.
-     *
-     * @param {Number} idx - The index to clear.
-     */
-    clear_bit(idx) {
-        if (idx < 0 || idx >= this.length) {
-            throw new RangeError("Cannot clear bit outside the range of the BitVector.");
-        }
-        this.bits[Math.floor(idx / 32)] &= ~(1 << idx % 32);
-    }
-
-    /** Creates a string version of the bit vector in B64. Does not keep the order of elements a sensible human readable format.
-     *
-     * @returns A b64 string representation of the BitVector.
-     */
-    toB64() {
-        if (this.length == 0) {
-            return "";
-        }
-        let b64_str = "";
-        let i = 0;
-        while (i < this.length) {
-            b64_str += Base64.fromIntN(this.slice(i, i + 6), 1);
-            i += 6;
-        }
-
-        return b64_str;
-    }
-
-    /** Returns a BitVector in bitstring format. Probably only useful for dev debugging.
-     *
-     * @returns A bit string representation of the BitVector. Goes from higher-indexed bits to lower-indexed bits. (n ... 0)
-     */
-    toString() {
-        let ret_str = "";
-        for (let i = 0; i < this.length; i++) {
-            ret_str = (this.read_bit(i) == 0 ? "0": "1") + ret_str;
-        }
-        return ret_str;
-    }
-
-     /** Returns a BitVector in bitstring format. Probably only useful for dev debugging.
-     *
-     * @returns A bit string representation of the BitVector. Goes from lower-indexed bits to higher-indexed bits. (0 ... n)
-     */
-    toStringR() {
-        let ret_str = "";
-        for (let i = 0; i < this.length; i++) {
-            ret_str += (this.read_bit(i) == 0 ? "0": "1");
-        }
-        return ret_str;
-    }
-
-    /** Appends data to the BitVector.
-     *
-     * @param {Number | String} data - The data to append.
-     * @param {Number} length - The length, in bits, of the new data. This is ignored if data is a string.
-     */
-     append(data, length) {
-        if (length < 0) {
-            throw new RangeError("BitVector length must increase by a nonnegative number.");
-        }
-
-        let bit_vec = [];
-        for (const uint of this.bits) {
-            bit_vec.push(uint);
-        }
-        if (typeof data === "string") {
-            let int = bit_vec[bit_vec.length - 1];
-            let bv_idx = this.length;
-            length = data.length * 6;
-            let updated_curr = false;
-            for (let i = 0; i < data.length; i++) {
-                let char = Base64.toInt(data[i]);
-                let pre_pos = bv_idx % 32;
-                int |= (char << bv_idx);
-                bv_idx += 6;
-                let post_pos = bv_idx % 32;
-                if (post_pos < pre_pos) { //we have to have filled up the integer
-                    if (bit_vec.length == this.bits.length && !updated_curr) {
-                        bit_vec[bit_vec.length - 1] = int;
-                        updated_curr = true;
-                    } else {
-                        bit_vec.push(int);
-                    }
-                    int = (char >>> (6 - post_pos));
-                }
-
-                if (i == data.length - 1) {
-                    if (bit_vec.length == this.bits.length && !updated_curr) {
-                        bit_vec[bit_vec.length - 1] = int;
-                    } else if (post_pos != 0) {
-                        bit_vec.push(int);
-                    }
-                }
-            }
-        } else if (typeof data === "number") {
-            //convert to int just in case
-            let int = Math.round(data);
-
-            //range of numbers that "could" fit in a uint32 -> [0, 2^32) U [-2^31, 2^31)
-            if (data > 2**32 - 1 || data < -(2 ** 31)) {
-                throw new RangeError("Numerical data has to fit within a 32-bit integer range to instantiate a BitVector.");
-            }
-            //could be split between multiple new ints
-            //reminder that shifts implicitly mod 32
-            bit_vec[bit_vec.length - 1] |= ((int & ~((~0) << length)) << (this.length));
-            if (((this.length - 1) % 32 + 1) + length > 32) {
-                bit_vec.push(int >>> (32 - this.length));
-            }
-        } else {
-            throw new TypeError("BitVector must be appended with a Number or a B64 String");
-        }
-
-        this.bits = new Uint32Array(bit_vec);
-        this.length += length;
-    }
-};
-
-
 /*
     Turns a raw stat and a % stat into a final stat on the basis that - raw and >= 100% becomes 0 and + raw and <=-100% becomes negative.
     Pct would be 0.80 for 80%, -1.20 for 120%, etc
diff --git a/js/c++/utils/bitvector.cpp b/js/c++/utils/bitvector.cpp
new file mode 100644
index 0000000..fc46029
--- /dev/null
+++ b/js/c++/utils/bitvector.cpp
@@ -0,0 +1,193 @@
+#include "bitvector.h"
+#include "base64.h"
+#include <algorithm>
+#include <stdexcept>
+#include <sstream>
+
+BitVector::BitVector(const std::string b64_data) {
+    length = b64_data.length() * 6;
+    data.reserve(length/bitvec_data_s + 1);
+
+    bitvec_data_t scratch = 0;
+    size_t bitvec_index = 0;
+    for (size_t i = 0; i < b64_data.length(); ++i) {
+        size_t char_num = Base64::digitsMap.find(b64_data[i])->second;
+        unsigned int pre_pos = bitvec_index % bitvec_data_s;
+        scratch |= char_num << pre_pos;
+        bitvec_index += 6;  // b64 is 6 bits per character.
+        unsigned int post_pos = bitvec_index % bitvec_data_s;
+        if (post_pos < pre_pos) { //we have to have filled up the integer
+            data.push_back(scratch);
+            scratch = (char_num >> (6 - post_pos));
+        }
+        if (i == b64_data.length()-1 && post_pos != 0) {
+            data.push_back(scratch);
+        }
+    }
+}
+
+BitVector::BitVector(bitvec_data_t num, size_t length) {
+    if (length < 0) {
+        throw std::range_error("BitVector must have nonnegative length.");
+    }
+    data.push_back(num);
+    this->length = length;
+}
+
+/** Return value of bit at index idx.
+ *
+ * @param {Number} idx - The index to read
+ *
+ * @returns The bit value at position idx
+ */
+bool BitVector::read_bit(size_t idx) const {
+    if (idx < 0 || idx >= length) {
+        std::stringstream ss;
+        ss << "Cannot read bit outside the range of the BitVector. (" << idx << " > " << length << ")";
+        throw std::range_error(ss.str());
+    }
+    return (data[idx / bitvec_data_s] & (1 << (idx % bitvec_data_s))) == 0 ? 0 : 1;
+}
+
+/** Returns an integer value (if possible) made from the range of bits [start, end). Undefined behavior if the range to read is too big.
+ *
+ * @param {Number} start - The index to start slicing from. Inclusive.
+ * @param {Number} end - The index to end slicing at. Exclusive.
+ *
+ * @returns An integer representation of the sliced bits.
+ */
+bitvec_data_t BitVector::slice(size_t start, size_t end) const {
+    if (end < start) {
+        throw std::range_error("Cannot slice a range where the end is before the start.");
+    } else if (end == start) {
+        return 0;
+    } else if (end - start > bitvec_data_s) {
+        //requesting a slice of longer than the size of a single data element (safe integer "length")
+        std::stringstream ss;
+        ss << "Cannot slice a range of longer than " << bitvec_data_s << " bits (unsafe to store in an integer).";
+        throw std::range_error(ss.str());
+    }
+    bitvec_data_t res = 0;
+    if ((end-1) / bitvec_data_s == start / bitvec_data_s) {
+        //the range is within 1 uint32 section - do some relatively fast bit twiddling
+        //res = (this.bits[Math.floor(start / 32)] & ~((((~0) << ((end - 1))) << 1) | ~((~0) << (start)))) >>> (start % 32);
+
+        bitvec_data_t mask = (~(((~0) << ((end - 1) % bitvec_data_s + 1)))) & ((~0) << (start % bitvec_data_s));
+        res = (data[start / bitvec_data_s] & mask) >> (start % bitvec_data_s);
+    }
+    else {
+        //the number of bits in the uint32s
+        //let start_pos = (start % 32);
+        //let int_idx = Math.floor(start/32);
+        //res = (this.bits[int_idx] & ((~0) << (start))) >>> (start_pos);
+        //res |= (this.bits[int_idx + 1] & ~((~0) << (end))) << (32 - start_pos);
+
+        unsigned int start_pos = start % bitvec_data_s;
+        unsigned int int_idx = start / bitvec_data_s;
+        res = (data[int_idx] & ((~0) << start_pos)) >> start_pos;
+        // IMPORTANT: (end % bitvec_data_s) is never zero.
+        res |= (data[int_idx + 1] & ~((~0) << (end % bitvec_data_s))) << (bitvec_data_s - start_pos);
+    }
+    return res;
+
+    // General code - slow
+    // for (let i = start; i < end; i++) {
+    //     res |= (get_bit(i) << (i - start));
+    // }
+}
+
+/** Assign bit at index idx to 1.
+ *
+ * @param {Number} idx - The index to set.
+ */
+void BitVector::set_bit(size_t idx) {
+    if (idx < 0 || idx >= length) {
+        throw std::range_error("Cannot set bit outside the range of the BitVector.");
+    }
+    data[idx / bitvec_data_s] |= (1 << (idx % bitvec_data_s));
+}
+
+/** Assign bit at index idx to 0.
+ *
+ * @param {Number} idx - The index to clear.
+ */
+void BitVector::clear_bit(size_t idx) {
+    if (idx < 0 || idx >= length) {
+        throw std::range_error("Cannot clear bit outside the range of the BitVector.");
+    }
+    data[idx / bitvec_data_s] &= ~(1 << (idx % bitvec_data_s));
+}
+
+/** Creates a string version of the bit vector in B64. Does not keep the order of elements a sensible human readable format.
+ *
+ * @returns A b64 string representation of the BitVector.
+ */
+std::string BitVector::toB64() const {
+    if (length == 0) {
+        return "";
+    }
+    std::stringstream b64_str;
+    size_t i = 0;
+    while (i < length) {
+        b64_str << Base64::fromIntN(this->slice(i, i + 6), 1);
+        i += 6;
+    }
+
+    return b64_str.str();
+}
+
+/** Returns a BitVector in bitstring format. Probably only useful for dev debugging.
+ *
+ * @returns A bit string representation of the BitVector. Goes from higher-indexed bits to lower-indexed bits. (n ... 0)
+ */
+std::string BitVector::toString() const {
+    std::stringstream ret_str;
+    for (size_t i = length; i != 0; --i) {
+        ret_str << (this->read_bit(i-1) ? "1": "0");
+    }
+    return ret_str.str();
+}
+
+/** Returns a BitVector in bitstring format. Probably only useful for dev debugging.
+ *
+ * @returns A bit string representation of the BitVector. Goes from lower-indexed bits to higher-indexed bits. (0 ... n)
+ */
+std::string BitVector::toStringR() const {
+    std::stringstream ret_str;
+    for (size_t i = 0; i < length; ++i) {
+        ret_str << (this->read_bit(i) ? "1": "0");
+    }
+    return ret_str.str();
+}
+
+void BitVector::append(const BitVector& other) {
+    data.reserve(data.size() + other.data.size());
+
+    size_t other_index = 0;
+    if (this->length % bitvec_data_s != 0) {
+        // fill in the last block.
+        bitvec_data_t scratch = data[data.size() - 1];
+        size_t bits_remaining = bitvec_data_s - (this->length % bitvec_data_s);
+
+        size_t n = std::min(other.length, bits_remaining);
+        scratch |= (other.slice(0, n) << (this->length % bitvec_data_s));
+        data[data.size() - 1] = scratch;
+        other_index += n;
+    }
+    while (other_index != other.length) {
+        size_t n = std::min(other.length - other_index, (size_t)bitvec_data_s);
+        data.push_back(other.slice(other_index, other_index + n));
+        other_index += n;
+    }
+    this->length += other.length;
+}
+
+void BitVector::append(const std::string b64_data) {
+    BitVector tmp(b64_data);
+    this->append(tmp);
+}
+
+void BitVector::append(bitvec_data_t num, size_t length) {
+    BitVector tmp(num, length);
+    this->append(tmp);
+}
diff --git a/js/c++/utils/bitvector.h b/js/c++/utils/bitvector.h
new file mode 100644
index 0000000..cb74b43
--- /dev/null
+++ b/js/c++/utils/bitvector.h
@@ -0,0 +1,90 @@
+#pragma once
+#include <string>
+#include <vector>
+
+#ifdef __EMSCRIPTEN__
+#define bitvec_data_s 32
+#define bitvec_data_t uint32_t
+#else
+#define bitvec_data_s 64
+#define bitvec_data_t uint64_t
+#endif
+
+class BitVector {
+
+/** A class used to represent an arbitrary length bit vector. Very useful for encoding and decoding.
+ *
+ */
+public:
+    /** Constructs an arbitrary-length bit vector.
+     * @class
+     * @param {String | Number} data - The data to append.
+     * @param {Number} length - A set length for the data. Ignored if data is a string.
+     *
+     * The structure of the Uint32Array should be [[last, ..., first], ..., [last, ..., first], [empty space, last, ..., first]]
+     */
+    BitVector() {};
+    BitVector(const BitVector& other) : data(other.data), length(other.length) {};
+    BitVector(const std::string b64_data);
+    BitVector(bitvec_data_t num, size_t length);
+
+    /** Return value of bit at index idx.
+     *
+     * @param {Number} idx - The index to read
+     *
+     * @returns The bit value at position idx
+     */
+    bool read_bit(size_t idx) const;
+
+    /** Returns an integer value (if possible) made from the range of bits [start, end). Undefined behavior if the range to read is too big.
+     *
+     * @param {Number} start - The index to start slicing from. Inclusive.
+     * @param {Number} end - The index to end slicing at. Exclusive.
+     *
+     * @returns An integer representation of the sliced bits.
+     */
+    bitvec_data_t slice(size_t start, size_t end) const;
+
+    /** Assign bit at index idx to 1.
+     *
+     * @param {Number} idx - The index to set.
+     */
+    void set_bit(size_t idx);
+
+    /** Assign bit at index idx to 0.
+     *
+     * @param {Number} idx - The index to clear.
+     */
+    void clear_bit(size_t idx);
+
+    /** Creates a string version of the bit vector in B64. Does not keep the order of elements a sensible human readable format.
+     *
+     * @returns A b64 string representation of the BitVector.
+     */
+    std::string toB64() const;
+
+    /** Returns a BitVector in bitstring format. Probably only useful for dev debugging.
+     *
+     * @returns A bit string representation of the BitVector. Goes from higher-indexed bits to lower-indexed bits. (n ... 0)
+     */
+    std::string toString() const;
+
+    /** Returns a BitVector in bitstring format. Probably only useful for dev debugging.
+     *
+     * @returns A bit string representation of the BitVector. Goes from lower-indexed bits to higher-indexed bits. (0 ... n)
+     */
+    std::string toStringR() const;
+
+    /** Appends data to the BitVector.
+     *
+     * @param {Number | String} data - The data to append.
+     * @param {Number} length - The length, in bits, of the new data. This is ignored if data is a string.
+     */
+    void append(const BitVector& other);
+    void append(const std::string b64_data);
+    void append(bitvec_data_t num, size_t length);
+
+private:
+    std::vector<bitvec_data_t> data;
+    size_t length;
+};