Bitvector c++ implementation

2023-04-01 20:27:52 -07:00 · 2023-04-01 20:27:52 -07:00 · ed60e2ac6d
commit ed60e2ac6d
parent f656c944e8
5 changed files with 321 additions and 244 deletions
--- a/js/c++/Makefile
+++ b/js/c++/Makefile
@ -9,7 +9,7 @@ all: utils.js powders.js
 powders.js: powders.cpp
-utils.js: utils.cpp utils/math_utils.cpp utils/base64.cpp
+utils.js: utils.cpp utils/math_utils.cpp utils/base64.cpp utils/bitvector.cpp
 .PHONY: clean
 clean:
--- a/js/c++/utils.cpp
+++ b/js/c++/utils.cpp
@ -6,7 +6,9 @@ using namespace emscripten;
 #include "utils.h"
 #include "utils/math_utils.h"
 #include "utils/base64.h"
 #include "utils/bitvector.h"
 #include <algorithm>
 #include <memory>
 #include <vector>
 namespace utils {
@ -62,10 +64,28 @@ val __perm_wrap(val a) {
    }
    return return_array;
 }
 #endif
 /** Appends data to the BitVector.
 *
 * @param {Number | String} data - The data to append.
 * @param {Number} length - The length, in bits, of the new data. This is ignored if data is a string.
 */
 void __BitVector_append(BitVector& self, val data, val length) {
    if (data.typeOf().as<std::string>() == "string") {
        self.append(data.as<std::string>());
        return;
    }
    if (data.typeOf().as<std::string>() == "number") {
        size_t num = data.as<size_t>();
        //if (num >= 1<<bitvec_data_s) {
        //    throw std::range_error("Numerical data has to fit within a 32-bit integer range to append to a BitVector.");
        //}
        self.append(num, length.as<size_t>());
        return;
    }
    throw std::invalid_argument("BitVector must be appended with a Number or a B64 String");
 }
 #ifdef __EMSCRIPTEN__
 EMSCRIPTEN_BINDINGS(utils) {
    function("clamp", &clamp);
    function("round_near", &round_near);
@ -74,6 +94,19 @@ EMSCRIPTEN_BINDINGS(utils) {
    function("b64_toInt", &Base64::toInt);
    function("b64_toIntSigned", &Base64::toIntSigned);
    function("perm", &__perm_wrap);
    class_<BitVector>("BitVector")
        .constructor<std::string>()
        .constructor<size_t, size_t>()
        .function("read_bit", &BitVector::read_bit)
        .function("slice", &BitVector::slice)
        .function("set_bit", &BitVector::set_bit)
        .function("clear_bit", &BitVector::clear_bit)
        .function("toB64", &BitVector::toB64)
        .function("toString", &BitVector::toString)
        .function("toStringR", &BitVector::toStringR)
        .function("append", select_overload<void(std::string)>(&BitVector::append))
        .function("append", select_overload<void(bitvec_data_t, size_t)>(&BitVector::append))
        ;
 }
 #endif
--- a/js/c++/utils.js.in
+++ b/js/c++/utils.js.in
@ -10,7 +10,8 @@ const Base64 = {
    fromIntN: _module_utils.b64_fromIntN,
    toInt: _module_utils.b64_toInt,
    toIntSigned: _module_utils.b64_toIntSigned
-}
+};
 const BitVector = _module_utils.BitVector;
 // const perm = _module_utils.perm; way too garbage to use... we supply JS perm.
 // Permutations in js reference (also cool algorithm):
@ -74,246 +75,6 @@ function getValue(id) {
    return document.getElementById(id).value;
 }
 /** A class used to represent an arbitrary length bit vector. Very useful for encoding and decoding.
 *
 */
 class BitVector {
    /** Constructs an arbitrary-length bit vector.
     * @class
     * @param {String | Number} data - The data to append.
     * @param {Number} length - A set length for the data. Ignored if data is a string.
     *
     * The structure of the Uint32Array should be [[last, ..., first], ..., [last, ..., first], [empty space, last, ..., first]]
     */
    constructor(data, length) {
        let bit_vec = [];
        if (typeof data === "string") {
            let int = 0;
            let bv_idx = 0;
            length = data.length * 6;
            for (let i = 0; i < data.length; i++) {
                let char = Base64.toInt(data[i]);
                let pre_pos = bv_idx % 32;
                int |= (char << bv_idx);
                bv_idx += 6;
                let post_pos = bv_idx % 32;
                if (post_pos < pre_pos) { //we have to have filled up the integer
                    bit_vec.push(int);
                    int = (char >>> (6 - post_pos));
                }
                if (i == data.length - 1 && post_pos != 0) {
                    bit_vec.push(int);
                }
            }
        } else if (typeof data === "number") {
            if (typeof length === "undefined")
            if (length < 0) {
                throw new RangeError("BitVector must have nonnegative length.");
            }
            //convert to int just in case
            data = Math.round(data);
            //range of numbers that won't fit in a uint32
            if (data > 2**32 - 1 || data < -(2 ** 32 - 1)) {
                throw new RangeError("Numerical data has to fit within a 32-bit integer range to instantiate a BitVector.");
            }
            bit_vec.push(data);
        } else {
            throw new TypeError("BitVector must be instantiated with a Number or a B64 String");
        }
        this.length = length;
        this.bits = new Uint32Array(bit_vec);
    }
    /** Return value of bit at index idx.
     *
     * @param {Number} idx - The index to read
     *
     * @returns The bit value at position idx
     */
    read_bit(idx) {
        if (idx < 0 || idx >= this.length) {
            throw new RangeError("Cannot read bit outside the range of the BitVector. ("+idx+" > "+this.length+")");
        }
        return ((this.bits[Math.floor(idx / 32)] & (1 << idx)) == 0 ? 0 : 1);
    }
    /** Returns an integer value (if possible) made from the range of bits [start, end). Undefined behavior if the range to read is too big.
     *
     * @param {Number} start - The index to start slicing from. Inclusive.
     * @param {Number} end - The index to end slicing at. Exclusive.
     *
     * @returns An integer representation of the sliced bits.
     */
    slice(start, end) {
        //TO NOTE: JS shifting is ALWAYS in mod 32. a << b will do a << (b mod 32) implicitly.
        if (end < start) {
            throw new RangeError("Cannot slice a range where the end is before the start.");
        } else if (end == start) {
            return 0;
        } else if (end - start > 32) {
            //requesting a slice of longer than 32 bits (safe integer "length")
            throw new RangeError("Cannot slice a range of longer than 32 bits (unsafe to store in an integer).");
        }
        let res = 0;
        if (Math.floor((end - 1) / 32) == Math.floor(start / 32)) {
            //the range is within 1 uint32 section - do some relatively fast bit twiddling
            res = (this.bits[Math.floor(start / 32)] & ~((((~0) << ((end - 1))) << 1) | ~((~0) << (start)))) >>> (start % 32);
        } else {
            //the number of bits in the uint32s
            let start_pos = (start % 32);
            let int_idx = Math.floor(start/32);
            res = (this.bits[int_idx] & ((~0) << (start))) >>> (start_pos);
            res |= (this.bits[int_idx + 1] & ~((~0) << (end))) << (32 - start_pos);
        }
        return res;
        // General code - slow
        // for (let i = start; i < end; i++) {
        //     res |= (get_bit(i) << (i - start));
        // }
    }
    /** Assign bit at index idx to 1.
     *
     * @param {Number} idx - The index to set.
     */
    set_bit(idx) {
        if (idx < 0 || idx >= this.length) {
            throw new RangeError("Cannot set bit outside the range of the BitVector.");
        }
        this.bits[Math.floor(idx / 32)] |= (1 << idx % 32);
    }
    /** Assign bit at index idx to 0.
     *
     * @param {Number} idx - The index to clear.
     */
    clear_bit(idx) {
        if (idx < 0 || idx >= this.length) {
            throw new RangeError("Cannot clear bit outside the range of the BitVector.");
        }
        this.bits[Math.floor(idx / 32)] &= ~(1 << idx % 32);
    }
    /** Creates a string version of the bit vector in B64. Does not keep the order of elements a sensible human readable format.
     *
     * @returns A b64 string representation of the BitVector.
     */
    toB64() {
        if (this.length == 0) {
            return "";
        }
        let b64_str = "";
        let i = 0;
        while (i < this.length) {
            b64_str += Base64.fromIntN(this.slice(i, i + 6), 1);
            i += 6;
        }
        return b64_str;
    }
    /** Returns a BitVector in bitstring format. Probably only useful for dev debugging.
     *
     * @returns A bit string representation of the BitVector. Goes from higher-indexed bits to lower-indexed bits. (n ... 0)
     */
    toString() {
        let ret_str = "";
        for (let i = 0; i < this.length; i++) {
            ret_str = (this.read_bit(i) == 0 ? "0": "1") + ret_str;
        }
        return ret_str;
    }
     /** Returns a BitVector in bitstring format. Probably only useful for dev debugging.
     *
     * @returns A bit string representation of the BitVector. Goes from lower-indexed bits to higher-indexed bits. (0 ... n)
     */
    toStringR() {
        let ret_str = "";
        for (let i = 0; i < this.length; i++) {
            ret_str += (this.read_bit(i) == 0 ? "0": "1");
        }
        return ret_str;
    }
    /** Appends data to the BitVector.
     *
     * @param {Number | String} data - The data to append.
     * @param {Number} length - The length, in bits, of the new data. This is ignored if data is a string.
     */
     append(data, length) {
        if (length < 0) {
            throw new RangeError("BitVector length must increase by a nonnegative number.");
        }
        let bit_vec = [];
        for (const uint of this.bits) {
            bit_vec.push(uint);
        }
        if (typeof data === "string") {
            let int = bit_vec[bit_vec.length - 1];
            let bv_idx = this.length;
            length = data.length * 6;
            let updated_curr = false;
            for (let i = 0; i < data.length; i++) {
                let char = Base64.toInt(data[i]);
                let pre_pos = bv_idx % 32;
                int |= (char << bv_idx);
                bv_idx += 6;
                let post_pos = bv_idx % 32;
                if (post_pos < pre_pos) { //we have to have filled up the integer
                    if (bit_vec.length == this.bits.length && !updated_curr) {
                        bit_vec[bit_vec.length - 1] = int;
                        updated_curr = true;
                    } else {
                        bit_vec.push(int);
                    }
                    int = (char >>> (6 - post_pos));
                }
                if (i == data.length - 1) {
                    if (bit_vec.length == this.bits.length && !updated_curr) {
                        bit_vec[bit_vec.length - 1] = int;
                    } else if (post_pos != 0) {
                        bit_vec.push(int);
                    }
                }
            }
        } else if (typeof data === "number") {
            //convert to int just in case
            let int = Math.round(data);
            //range of numbers that "could" fit in a uint32 -> [0, 2^32) U [-2^31, 2^31)
            if (data > 2**32 - 1 || data < -(2 ** 31)) {
                throw new RangeError("Numerical data has to fit within a 32-bit integer range to instantiate a BitVector.");
            }
            //could be split between multiple new ints
            //reminder that shifts implicitly mod 32
            bit_vec[bit_vec.length - 1] |= ((int & ~((~0) << length)) << (this.length));
            if (((this.length - 1) % 32 + 1) + length > 32) {
                bit_vec.push(int >>> (32 - this.length));
            }
        } else {
            throw new TypeError("BitVector must be appended with a Number or a B64 String");
        }
        this.bits = new Uint32Array(bit_vec);
        this.length += length;
    }
 };
 /*
    Turns a raw stat and a % stat into a final stat on the basis that - raw and >= 100% becomes 0 and + raw and <=-100% becomes negative.
    Pct would be 0.80 for 80%, -1.20 for 120%, etc
--- a/js/c++/utils/bitvector.cpp
+++ b/js/c++/utils/bitvector.cpp
@ -0,0 +1,193 @@
 #include "bitvector.h"
 #include "base64.h"
 #include <algorithm>
 #include <stdexcept>
 #include <sstream>
 BitVector::BitVector(const std::string b64_data) {
    length = b64_data.length() * 6;
    data.reserve(length/bitvec_data_s + 1);
    bitvec_data_t scratch = 0;
    size_t bitvec_index = 0;
    for (size_t i = 0; i < b64_data.length(); ++i) {
        size_t char_num = Base64::digitsMap.find(b64_data[i])->second;
        unsigned int pre_pos = bitvec_index % bitvec_data_s;
        scratch |= char_num << pre_pos;
        bitvec_index += 6;  // b64 is 6 bits per character.
        unsigned int post_pos = bitvec_index % bitvec_data_s;
        if (post_pos < pre_pos) { //we have to have filled up the integer
            data.push_back(scratch);
            scratch = (char_num >> (6 - post_pos));
        }
        if (i == b64_data.length()-1 && post_pos != 0) {
            data.push_back(scratch);
        }
    }
 }
 BitVector::BitVector(bitvec_data_t num, size_t length) {
    if (length < 0) {
        throw std::range_error("BitVector must have nonnegative length.");
    }
    data.push_back(num);
    this->length = length;
 }
 /** Return value of bit at index idx.
 *
 * @param {Number} idx - The index to read
 *
 * @returns The bit value at position idx
 */
 bool BitVector::read_bit(size_t idx) const {
    if (idx < 0 || idx >= length) {
        std::stringstream ss;
        ss << "Cannot read bit outside the range of the BitVector. (" << idx << " > " << length << ")";
        throw std::range_error(ss.str());
    }
    return (data[idx / bitvec_data_s] & (1 << (idx % bitvec_data_s))) == 0 ? 0 : 1;
 }
 /** Returns an integer value (if possible) made from the range of bits [start, end). Undefined behavior if the range to read is too big.
 *
 * @param {Number} start - The index to start slicing from. Inclusive.
 * @param {Number} end - The index to end slicing at. Exclusive.
 *
 * @returns An integer representation of the sliced bits.
 */
 bitvec_data_t BitVector::slice(size_t start, size_t end) const {
    if (end < start) {
        throw std::range_error("Cannot slice a range where the end is before the start.");
    } else if (end == start) {
        return 0;
    } else if (end - start > bitvec_data_s) {
        //requesting a slice of longer than the size of a single data element (safe integer "length")
        std::stringstream ss;
        ss << "Cannot slice a range of longer than " << bitvec_data_s << " bits (unsafe to store in an integer).";
        throw std::range_error(ss.str());
    }
    bitvec_data_t res = 0;
    if ((end-1) / bitvec_data_s == start / bitvec_data_s) {
        //the range is within 1 uint32 section - do some relatively fast bit twiddling
        //res = (this.bits[Math.floor(start / 32)] & ~((((~0) << ((end - 1))) << 1) | ~((~0) << (start)))) >>> (start % 32);
        bitvec_data_t mask = (~(((~0) << ((end - 1) % bitvec_data_s + 1)))) & ((~0) << (start % bitvec_data_s));
        res = (data[start / bitvec_data_s] & mask) >> (start % bitvec_data_s);
    }
    else {
        //the number of bits in the uint32s
        //let start_pos = (start % 32);
        //let int_idx = Math.floor(start/32);
        //res = (this.bits[int_idx] & ((~0) << (start))) >>> (start_pos);
        //res |= (this.bits[int_idx + 1] & ~((~0) << (end))) << (32 - start_pos);
        unsigned int start_pos = start % bitvec_data_s;
        unsigned int int_idx = start / bitvec_data_s;
        res = (data[int_idx] & ((~0) << start_pos)) >> start_pos;
        // IMPORTANT: (end % bitvec_data_s) is never zero.
        res |= (data[int_idx + 1] & ~((~0) << (end % bitvec_data_s))) << (bitvec_data_s - start_pos);
    }
    return res;
    // General code - slow
    // for (let i = start; i < end; i++) {
    //     res |= (get_bit(i) << (i - start));
    // }
 }
 /** Assign bit at index idx to 1.
 *
 * @param {Number} idx - The index to set.
 */
 void BitVector::set_bit(size_t idx) {
    if (idx < 0 || idx >= length) {
        throw std::range_error("Cannot set bit outside the range of the BitVector.");
    }
    data[idx / bitvec_data_s] |= (1 << (idx % bitvec_data_s));
 }
 /** Assign bit at index idx to 0.
 *
 * @param {Number} idx - The index to clear.
 */
 void BitVector::clear_bit(size_t idx) {
    if (idx < 0 || idx >= length) {
        throw std::range_error("Cannot clear bit outside the range of the BitVector.");
    }
    data[idx / bitvec_data_s] &= ~(1 << (idx % bitvec_data_s));
 }
 /** Creates a string version of the bit vector in B64. Does not keep the order of elements a sensible human readable format.
 *
 * @returns A b64 string representation of the BitVector.
 */
 std::string BitVector::toB64() const {
    if (length == 0) {
        return "";
    }
    std::stringstream b64_str;
    size_t i = 0;
    while (i < length) {
        b64_str << Base64::fromIntN(this->slice(i, i + 6), 1);
        i += 6;
    }
    return b64_str.str();
 }
 /** Returns a BitVector in bitstring format. Probably only useful for dev debugging.
 *
 * @returns A bit string representation of the BitVector. Goes from higher-indexed bits to lower-indexed bits. (n ... 0)
 */
 std::string BitVector::toString() const {
    std::stringstream ret_str;
    for (size_t i = length; i != 0; --i) {
        ret_str << (this->read_bit(i-1) ? "1": "0");
    }
    return ret_str.str();
 }
 /** Returns a BitVector in bitstring format. Probably only useful for dev debugging.
 *
 * @returns A bit string representation of the BitVector. Goes from lower-indexed bits to higher-indexed bits. (0 ... n)
 */
 std::string BitVector::toStringR() const {
    std::stringstream ret_str;
    for (size_t i = 0; i < length; ++i) {
        ret_str << (this->read_bit(i) ? "1": "0");
    }
    return ret_str.str();
 }
 void BitVector::append(const BitVector& other) {
    data.reserve(data.size() + other.data.size());
    size_t other_index = 0;
    if (this->length % bitvec_data_s != 0) {
        // fill in the last block.
        bitvec_data_t scratch = data[data.size() - 1];
        size_t bits_remaining = bitvec_data_s - (this->length % bitvec_data_s);
        size_t n = std::min(other.length, bits_remaining);
        scratch |= (other.slice(0, n) << (this->length % bitvec_data_s));
        data[data.size() - 1] = scratch;
        other_index += n;
    }
    while (other_index != other.length) {
        size_t n = std::min(other.length - other_index, (size_t)bitvec_data_s);
        data.push_back(other.slice(other_index, other_index + n));
        other_index += n;
    }
    this->length += other.length;
 }
 void BitVector::append(const std::string b64_data) {
    BitVector tmp(b64_data);
    this->append(tmp);
 }
 void BitVector::append(bitvec_data_t num, size_t length) {
    BitVector tmp(num, length);
    this->append(tmp);
 }
--- a/js/c++/utils/bitvector.h
+++ b/js/c++/utils/bitvector.h
@ -0,0 +1,90 @@
 #pragma once
 #include <string>
 #include <vector>
 #ifdef __EMSCRIPTEN__
 #define bitvec_data_s 32
 #define bitvec_data_t uint32_t
 #else
 #define bitvec_data_s 64
 #define bitvec_data_t uint64_t
 #endif
 class BitVector {
 /** A class used to represent an arbitrary length bit vector. Very useful for encoding and decoding.
 *
 */
 public:
    /** Constructs an arbitrary-length bit vector.
     * @class
     * @param {String | Number} data - The data to append.
     * @param {Number} length - A set length for the data. Ignored if data is a string.
     *
     * The structure of the Uint32Array should be [[last, ..., first], ..., [last, ..., first], [empty space, last, ..., first]]
     */
    BitVector() {};
    BitVector(const BitVector& other) : data(other.data), length(other.length) {};
    BitVector(const std::string b64_data);
    BitVector(bitvec_data_t num, size_t length);
    /** Return value of bit at index idx.
     *
     * @param {Number} idx - The index to read
     *
     * @returns The bit value at position idx
     */
    bool read_bit(size_t idx) const;
    /** Returns an integer value (if possible) made from the range of bits [start, end). Undefined behavior if the range to read is too big.
     *
     * @param {Number} start - The index to start slicing from. Inclusive.
     * @param {Number} end - The index to end slicing at. Exclusive.
     *
     * @returns An integer representation of the sliced bits.
     */
    bitvec_data_t slice(size_t start, size_t end) const;
    /** Assign bit at index idx to 1.
     *
     * @param {Number} idx - The index to set.
     */
    void set_bit(size_t idx);
    /** Assign bit at index idx to 0.
     *
     * @param {Number} idx - The index to clear.
     */
    void clear_bit(size_t idx);
    /** Creates a string version of the bit vector in B64. Does not keep the order of elements a sensible human readable format.
     *
     * @returns A b64 string representation of the BitVector.
     */
    std::string toB64() const;
    /** Returns a BitVector in bitstring format. Probably only useful for dev debugging.
     *
     * @returns A bit string representation of the BitVector. Goes from higher-indexed bits to lower-indexed bits. (n ... 0)
     */
    std::string toString() const;
    /** Returns a BitVector in bitstring format. Probably only useful for dev debugging.
     *
     * @returns A bit string representation of the BitVector. Goes from lower-indexed bits to higher-indexed bits. (0 ... n)
     */
    std::string toStringR() const;
    /** Appends data to the BitVector.
     *
     * @param {Number | String} data - The data to append.
     * @param {Number} length - The length, in bits, of the new data. This is ignored if data is a string.
     */
    void append(const BitVector& other);
    void append(const std::string b64_data);
    void append(bitvec_data_t num, size_t length);
 private:
    std::vector<bitvec_data_t> data;
    size_t length;
 };