Bitvector c++ implementation

This commit is contained in:
hppeng 2023-04-01 20:27:52 -07:00
parent f656c944e8
commit ed60e2ac6d
5 changed files with 321 additions and 244 deletions

View file

@ -9,7 +9,7 @@ all: utils.js powders.js
powders.js: powders.cpp
utils.js: utils.cpp utils/math_utils.cpp utils/base64.cpp
utils.js: utils.cpp utils/math_utils.cpp utils/base64.cpp utils/bitvector.cpp
.PHONY: clean
clean:

View file

@ -6,7 +6,9 @@ using namespace emscripten;
#include "utils.h"
#include "utils/math_utils.h"
#include "utils/base64.h"
#include "utils/bitvector.h"
#include <algorithm>
#include <memory>
#include <vector>
namespace utils {
@ -62,10 +64,28 @@ val __perm_wrap(val a) {
}
return return_array;
}
#endif
/** Appends data to the BitVector.
*
* @param {Number | String} data - The data to append.
* @param {Number} length - The length, in bits, of the new data. This is ignored if data is a string.
*/
void __BitVector_append(BitVector& self, val data, val length) {
if (data.typeOf().as<std::string>() == "string") {
self.append(data.as<std::string>());
return;
}
if (data.typeOf().as<std::string>() == "number") {
size_t num = data.as<size_t>();
//if (num >= 1<<bitvec_data_s) {
// throw std::range_error("Numerical data has to fit within a 32-bit integer range to append to a BitVector.");
//}
self.append(num, length.as<size_t>());
return;
}
throw std::invalid_argument("BitVector must be appended with a Number or a B64 String");
}
#ifdef __EMSCRIPTEN__
EMSCRIPTEN_BINDINGS(utils) {
function("clamp", &clamp);
function("round_near", &round_near);
@ -74,6 +94,19 @@ EMSCRIPTEN_BINDINGS(utils) {
function("b64_toInt", &Base64::toInt);
function("b64_toIntSigned", &Base64::toIntSigned);
function("perm", &__perm_wrap);
class_<BitVector>("BitVector")
.constructor<std::string>()
.constructor<size_t, size_t>()
.function("read_bit", &BitVector::read_bit)
.function("slice", &BitVector::slice)
.function("set_bit", &BitVector::set_bit)
.function("clear_bit", &BitVector::clear_bit)
.function("toB64", &BitVector::toB64)
.function("toString", &BitVector::toString)
.function("toStringR", &BitVector::toStringR)
.function("append", select_overload<void(std::string)>(&BitVector::append))
.function("append", select_overload<void(bitvec_data_t, size_t)>(&BitVector::append))
;
}
#endif

View file

@ -10,7 +10,8 @@ const Base64 = {
fromIntN: _module_utils.b64_fromIntN,
toInt: _module_utils.b64_toInt,
toIntSigned: _module_utils.b64_toIntSigned
}
};
const BitVector = _module_utils.BitVector;
// const perm = _module_utils.perm; way too garbage to use... we supply JS perm.
// Permutations in js reference (also cool algorithm):
@ -74,246 +75,6 @@ function getValue(id) {
return document.getElementById(id).value;
}
/** A class used to represent an arbitrary length bit vector. Very useful for encoding and decoding.
*
*/
class BitVector {
/** Constructs an arbitrary-length bit vector.
* @class
* @param {String | Number} data - The data to append.
* @param {Number} length - A set length for the data. Ignored if data is a string.
*
* The structure of the Uint32Array should be [[last, ..., first], ..., [last, ..., first], [empty space, last, ..., first]]
*/
constructor(data, length) {
let bit_vec = [];
if (typeof data === "string") {
let int = 0;
let bv_idx = 0;
length = data.length * 6;
for (let i = 0; i < data.length; i++) {
let char = Base64.toInt(data[i]);
let pre_pos = bv_idx % 32;
int |= (char << bv_idx);
bv_idx += 6;
let post_pos = bv_idx % 32;
if (post_pos < pre_pos) { //we have to have filled up the integer
bit_vec.push(int);
int = (char >>> (6 - post_pos));
}
if (i == data.length - 1 && post_pos != 0) {
bit_vec.push(int);
}
}
} else if (typeof data === "number") {
if (typeof length === "undefined")
if (length < 0) {
throw new RangeError("BitVector must have nonnegative length.");
}
//convert to int just in case
data = Math.round(data);
//range of numbers that won't fit in a uint32
if (data > 2**32 - 1 || data < -(2 ** 32 - 1)) {
throw new RangeError("Numerical data has to fit within a 32-bit integer range to instantiate a BitVector.");
}
bit_vec.push(data);
} else {
throw new TypeError("BitVector must be instantiated with a Number or a B64 String");
}
this.length = length;
this.bits = new Uint32Array(bit_vec);
}
/** Return value of bit at index idx.
*
* @param {Number} idx - The index to read
*
* @returns The bit value at position idx
*/
read_bit(idx) {
if (idx < 0 || idx >= this.length) {
throw new RangeError("Cannot read bit outside the range of the BitVector. ("+idx+" > "+this.length+")");
}
return ((this.bits[Math.floor(idx / 32)] & (1 << idx)) == 0 ? 0 : 1);
}
/** Returns an integer value (if possible) made from the range of bits [start, end). Undefined behavior if the range to read is too big.
*
* @param {Number} start - The index to start slicing from. Inclusive.
* @param {Number} end - The index to end slicing at. Exclusive.
*
* @returns An integer representation of the sliced bits.
*/
slice(start, end) {
//TO NOTE: JS shifting is ALWAYS in mod 32. a << b will do a << (b mod 32) implicitly.
if (end < start) {
throw new RangeError("Cannot slice a range where the end is before the start.");
} else if (end == start) {
return 0;
} else if (end - start > 32) {
//requesting a slice of longer than 32 bits (safe integer "length")
throw new RangeError("Cannot slice a range of longer than 32 bits (unsafe to store in an integer).");
}
let res = 0;
if (Math.floor((end - 1) / 32) == Math.floor(start / 32)) {
//the range is within 1 uint32 section - do some relatively fast bit twiddling
res = (this.bits[Math.floor(start / 32)] & ~((((~0) << ((end - 1))) << 1) | ~((~0) << (start)))) >>> (start % 32);
} else {
//the number of bits in the uint32s
let start_pos = (start % 32);
let int_idx = Math.floor(start/32);
res = (this.bits[int_idx] & ((~0) << (start))) >>> (start_pos);
res |= (this.bits[int_idx + 1] & ~((~0) << (end))) << (32 - start_pos);
}
return res;
// General code - slow
// for (let i = start; i < end; i++) {
// res |= (get_bit(i) << (i - start));
// }
}
/** Assign bit at index idx to 1.
*
* @param {Number} idx - The index to set.
*/
set_bit(idx) {
if (idx < 0 || idx >= this.length) {
throw new RangeError("Cannot set bit outside the range of the BitVector.");
}
this.bits[Math.floor(idx / 32)] |= (1 << idx % 32);
}
/** Assign bit at index idx to 0.
*
* @param {Number} idx - The index to clear.
*/
clear_bit(idx) {
if (idx < 0 || idx >= this.length) {
throw new RangeError("Cannot clear bit outside the range of the BitVector.");
}
this.bits[Math.floor(idx / 32)] &= ~(1 << idx % 32);
}
/** Creates a string version of the bit vector in B64. Does not keep the order of elements a sensible human readable format.
*
* @returns A b64 string representation of the BitVector.
*/
toB64() {
if (this.length == 0) {
return "";
}
let b64_str = "";
let i = 0;
while (i < this.length) {
b64_str += Base64.fromIntN(this.slice(i, i + 6), 1);
i += 6;
}
return b64_str;
}
/** Returns a BitVector in bitstring format. Probably only useful for dev debugging.
*
* @returns A bit string representation of the BitVector. Goes from higher-indexed bits to lower-indexed bits. (n ... 0)
*/
toString() {
let ret_str = "";
for (let i = 0; i < this.length; i++) {
ret_str = (this.read_bit(i) == 0 ? "0": "1") + ret_str;
}
return ret_str;
}
/** Returns a BitVector in bitstring format. Probably only useful for dev debugging.
*
* @returns A bit string representation of the BitVector. Goes from lower-indexed bits to higher-indexed bits. (0 ... n)
*/
toStringR() {
let ret_str = "";
for (let i = 0; i < this.length; i++) {
ret_str += (this.read_bit(i) == 0 ? "0": "1");
}
return ret_str;
}
/** Appends data to the BitVector.
*
* @param {Number | String} data - The data to append.
* @param {Number} length - The length, in bits, of the new data. This is ignored if data is a string.
*/
append(data, length) {
if (length < 0) {
throw new RangeError("BitVector length must increase by a nonnegative number.");
}
let bit_vec = [];
for (const uint of this.bits) {
bit_vec.push(uint);
}
if (typeof data === "string") {
let int = bit_vec[bit_vec.length - 1];
let bv_idx = this.length;
length = data.length * 6;
let updated_curr = false;
for (let i = 0; i < data.length; i++) {
let char = Base64.toInt(data[i]);
let pre_pos = bv_idx % 32;
int |= (char << bv_idx);
bv_idx += 6;
let post_pos = bv_idx % 32;
if (post_pos < pre_pos) { //we have to have filled up the integer
if (bit_vec.length == this.bits.length && !updated_curr) {
bit_vec[bit_vec.length - 1] = int;
updated_curr = true;
} else {
bit_vec.push(int);
}
int = (char >>> (6 - post_pos));
}
if (i == data.length - 1) {
if (bit_vec.length == this.bits.length && !updated_curr) {
bit_vec[bit_vec.length - 1] = int;
} else if (post_pos != 0) {
bit_vec.push(int);
}
}
}
} else if (typeof data === "number") {
//convert to int just in case
let int = Math.round(data);
//range of numbers that "could" fit in a uint32 -> [0, 2^32) U [-2^31, 2^31)
if (data > 2**32 - 1 || data < -(2 ** 31)) {
throw new RangeError("Numerical data has to fit within a 32-bit integer range to instantiate a BitVector.");
}
//could be split between multiple new ints
//reminder that shifts implicitly mod 32
bit_vec[bit_vec.length - 1] |= ((int & ~((~0) << length)) << (this.length));
if (((this.length - 1) % 32 + 1) + length > 32) {
bit_vec.push(int >>> (32 - this.length));
}
} else {
throw new TypeError("BitVector must be appended with a Number or a B64 String");
}
this.bits = new Uint32Array(bit_vec);
this.length += length;
}
};
/*
Turns a raw stat and a % stat into a final stat on the basis that - raw and >= 100% becomes 0 and + raw and <=-100% becomes negative.
Pct would be 0.80 for 80%, -1.20 for 120%, etc

193
js/c++/utils/bitvector.cpp Normal file
View file

@ -0,0 +1,193 @@
#include "bitvector.h"
#include "base64.h"
#include <algorithm>
#include <stdexcept>
#include <sstream>
BitVector::BitVector(const std::string b64_data) {
length = b64_data.length() * 6;
data.reserve(length/bitvec_data_s + 1);
bitvec_data_t scratch = 0;
size_t bitvec_index = 0;
for (size_t i = 0; i < b64_data.length(); ++i) {
size_t char_num = Base64::digitsMap.find(b64_data[i])->second;
unsigned int pre_pos = bitvec_index % bitvec_data_s;
scratch |= char_num << pre_pos;
bitvec_index += 6; // b64 is 6 bits per character.
unsigned int post_pos = bitvec_index % bitvec_data_s;
if (post_pos < pre_pos) { //we have to have filled up the integer
data.push_back(scratch);
scratch = (char_num >> (6 - post_pos));
}
if (i == b64_data.length()-1 && post_pos != 0) {
data.push_back(scratch);
}
}
}
BitVector::BitVector(bitvec_data_t num, size_t length) {
if (length < 0) {
throw std::range_error("BitVector must have nonnegative length.");
}
data.push_back(num);
this->length = length;
}
/** Return value of bit at index idx.
*
* @param {Number} idx - The index to read
*
* @returns The bit value at position idx
*/
bool BitVector::read_bit(size_t idx) const {
if (idx < 0 || idx >= length) {
std::stringstream ss;
ss << "Cannot read bit outside the range of the BitVector. (" << idx << " > " << length << ")";
throw std::range_error(ss.str());
}
return (data[idx / bitvec_data_s] & (1 << (idx % bitvec_data_s))) == 0 ? 0 : 1;
}
/** Returns an integer value (if possible) made from the range of bits [start, end). Undefined behavior if the range to read is too big.
*
* @param {Number} start - The index to start slicing from. Inclusive.
* @param {Number} end - The index to end slicing at. Exclusive.
*
* @returns An integer representation of the sliced bits.
*/
bitvec_data_t BitVector::slice(size_t start, size_t end) const {
if (end < start) {
throw std::range_error("Cannot slice a range where the end is before the start.");
} else if (end == start) {
return 0;
} else if (end - start > bitvec_data_s) {
//requesting a slice of longer than the size of a single data element (safe integer "length")
std::stringstream ss;
ss << "Cannot slice a range of longer than " << bitvec_data_s << " bits (unsafe to store in an integer).";
throw std::range_error(ss.str());
}
bitvec_data_t res = 0;
if ((end-1) / bitvec_data_s == start / bitvec_data_s) {
//the range is within 1 uint32 section - do some relatively fast bit twiddling
//res = (this.bits[Math.floor(start / 32)] & ~((((~0) << ((end - 1))) << 1) | ~((~0) << (start)))) >>> (start % 32);
bitvec_data_t mask = (~(((~0) << ((end - 1) % bitvec_data_s + 1)))) & ((~0) << (start % bitvec_data_s));
res = (data[start / bitvec_data_s] & mask) >> (start % bitvec_data_s);
}
else {
//the number of bits in the uint32s
//let start_pos = (start % 32);
//let int_idx = Math.floor(start/32);
//res = (this.bits[int_idx] & ((~0) << (start))) >>> (start_pos);
//res |= (this.bits[int_idx + 1] & ~((~0) << (end))) << (32 - start_pos);
unsigned int start_pos = start % bitvec_data_s;
unsigned int int_idx = start / bitvec_data_s;
res = (data[int_idx] & ((~0) << start_pos)) >> start_pos;
// IMPORTANT: (end % bitvec_data_s) is never zero.
res |= (data[int_idx + 1] & ~((~0) << (end % bitvec_data_s))) << (bitvec_data_s - start_pos);
}
return res;
// General code - slow
// for (let i = start; i < end; i++) {
// res |= (get_bit(i) << (i - start));
// }
}
/** Assign bit at index idx to 1.
*
* @param {Number} idx - The index to set.
*/
void BitVector::set_bit(size_t idx) {
if (idx < 0 || idx >= length) {
throw std::range_error("Cannot set bit outside the range of the BitVector.");
}
data[idx / bitvec_data_s] |= (1 << (idx % bitvec_data_s));
}
/** Assign bit at index idx to 0.
*
* @param {Number} idx - The index to clear.
*/
void BitVector::clear_bit(size_t idx) {
if (idx < 0 || idx >= length) {
throw std::range_error("Cannot clear bit outside the range of the BitVector.");
}
data[idx / bitvec_data_s] &= ~(1 << (idx % bitvec_data_s));
}
/** Creates a string version of the bit vector in B64. Does not keep the order of elements a sensible human readable format.
*
* @returns A b64 string representation of the BitVector.
*/
std::string BitVector::toB64() const {
if (length == 0) {
return "";
}
std::stringstream b64_str;
size_t i = 0;
while (i < length) {
b64_str << Base64::fromIntN(this->slice(i, i + 6), 1);
i += 6;
}
return b64_str.str();
}
/** Returns a BitVector in bitstring format. Probably only useful for dev debugging.
*
* @returns A bit string representation of the BitVector. Goes from higher-indexed bits to lower-indexed bits. (n ... 0)
*/
std::string BitVector::toString() const {
std::stringstream ret_str;
for (size_t i = length; i != 0; --i) {
ret_str << (this->read_bit(i-1) ? "1": "0");
}
return ret_str.str();
}
/** Returns a BitVector in bitstring format. Probably only useful for dev debugging.
*
* @returns A bit string representation of the BitVector. Goes from lower-indexed bits to higher-indexed bits. (0 ... n)
*/
std::string BitVector::toStringR() const {
std::stringstream ret_str;
for (size_t i = 0; i < length; ++i) {
ret_str << (this->read_bit(i) ? "1": "0");
}
return ret_str.str();
}
void BitVector::append(const BitVector& other) {
data.reserve(data.size() + other.data.size());
size_t other_index = 0;
if (this->length % bitvec_data_s != 0) {
// fill in the last block.
bitvec_data_t scratch = data[data.size() - 1];
size_t bits_remaining = bitvec_data_s - (this->length % bitvec_data_s);
size_t n = std::min(other.length, bits_remaining);
scratch |= (other.slice(0, n) << (this->length % bitvec_data_s));
data[data.size() - 1] = scratch;
other_index += n;
}
while (other_index != other.length) {
size_t n = std::min(other.length - other_index, (size_t)bitvec_data_s);
data.push_back(other.slice(other_index, other_index + n));
other_index += n;
}
this->length += other.length;
}
void BitVector::append(const std::string b64_data) {
BitVector tmp(b64_data);
this->append(tmp);
}
void BitVector::append(bitvec_data_t num, size_t length) {
BitVector tmp(num, length);
this->append(tmp);
}

90
js/c++/utils/bitvector.h Normal file
View file

@ -0,0 +1,90 @@
#pragma once
#include <string>
#include <vector>
#ifdef __EMSCRIPTEN__
#define bitvec_data_s 32
#define bitvec_data_t uint32_t
#else
#define bitvec_data_s 64
#define bitvec_data_t uint64_t
#endif
class BitVector {
/** A class used to represent an arbitrary length bit vector. Very useful for encoding and decoding.
*
*/
public:
/** Constructs an arbitrary-length bit vector.
* @class
* @param {String | Number} data - The data to append.
* @param {Number} length - A set length for the data. Ignored if data is a string.
*
* The structure of the Uint32Array should be [[last, ..., first], ..., [last, ..., first], [empty space, last, ..., first]]
*/
BitVector() {};
BitVector(const BitVector& other) : data(other.data), length(other.length) {};
BitVector(const std::string b64_data);
BitVector(bitvec_data_t num, size_t length);
/** Return value of bit at index idx.
*
* @param {Number} idx - The index to read
*
* @returns The bit value at position idx
*/
bool read_bit(size_t idx) const;
/** Returns an integer value (if possible) made from the range of bits [start, end). Undefined behavior if the range to read is too big.
*
* @param {Number} start - The index to start slicing from. Inclusive.
* @param {Number} end - The index to end slicing at. Exclusive.
*
* @returns An integer representation of the sliced bits.
*/
bitvec_data_t slice(size_t start, size_t end) const;
/** Assign bit at index idx to 1.
*
* @param {Number} idx - The index to set.
*/
void set_bit(size_t idx);
/** Assign bit at index idx to 0.
*
* @param {Number} idx - The index to clear.
*/
void clear_bit(size_t idx);
/** Creates a string version of the bit vector in B64. Does not keep the order of elements a sensible human readable format.
*
* @returns A b64 string representation of the BitVector.
*/
std::string toB64() const;
/** Returns a BitVector in bitstring format. Probably only useful for dev debugging.
*
* @returns A bit string representation of the BitVector. Goes from higher-indexed bits to lower-indexed bits. (n ... 0)
*/
std::string toString() const;
/** Returns a BitVector in bitstring format. Probably only useful for dev debugging.
*
* @returns A bit string representation of the BitVector. Goes from lower-indexed bits to higher-indexed bits. (0 ... n)
*/
std::string toStringR() const;
/** Appends data to the BitVector.
*
* @param {Number | String} data - The data to append.
* @param {Number} length - The length, in bits, of the new data. This is ignored if data is a string.
*/
void append(const BitVector& other);
void append(const std::string b64_data);
void append(bitvec_data_t num, size_t length);
private:
std::vector<bitvec_data_t> data;
size_t length;
};