diff options
Diffstat (limited to 'src/ru/olamedia/math/OpenBitSet.java')
-rw-r--r-- | src/ru/olamedia/math/OpenBitSet.java | 1389 |
1 files changed, 1389 insertions, 0 deletions
diff --git a/src/ru/olamedia/math/OpenBitSet.java b/src/ru/olamedia/math/OpenBitSet.java new file mode 100644 index 0000000..93c26b0 --- /dev/null +++ b/src/ru/olamedia/math/OpenBitSet.java @@ -0,0 +1,1389 @@ +package ru.olamedia.math; + +import java.io.Serializable; +import java.util.Arrays; + +/** + * http://www.apache.org/licenses/LICENSE-2.0 (Lucene) + * + * An "open" BitSet implementation that allows direct access to the + * array of words + * storing the bits. + * <p/> + * Unlike java.util.bitet, the fact that bits are packed into an array of longs + * is part of the interface. This allows efficient implementation of other + * algorithms by someone other than the author. It also allows one to + * efficiently implement alternate serialization or interchange formats. + * <p/> + * <code>OpenBitSet</code> is faster than <code>java.util.BitSet</code> in most + * operations and *much* faster at calculating cardinality of sets and results + * of set operations. It can also handle sets of larger cardinality (up to 64 * + * 2**32-1) + * <p/> + * The goals of <code>OpenBitSet</code> are the fastest implementation possible, + * and maximum code reuse. Extra safety and encapsulation may always be built on + * top, but if that's built in, the cost can never be removed (and hence people + * re-implement their own version in order to get better performance). If you + * want a "safe", totally encapsulated (and slower and limited) BitSet class, + * use <code>java.util.BitSet</code>. + * <p/> + * <h3>Performance Results</h3> + * + * Test system: Pentium 4, Sun Java 1.5_06 -server -Xbatch -Xmx64M <br/> + * BitSet size = 1,000,000 <br/> + * Results are java.util.BitSet time divided by OpenBitSet time. + * <table border="1"> + * <tr> + * <th></th> + * <th>cardinality</th> + * <th>intersect_count</th> + * <th>union</th> + * <th>nextSetBit</th> + * <th>get</th> + * <th>iterator</th> + * </tr> + * <tr> + * <th>50% full</th> + * <td>3.36</td> + * <td>3.96</td> + * <td>1.44</td> + * <td>1.46</td> + * <td>1.99</td> + * <td>1.58</td> + * </tr> + * <tr> + * <th>1% full</th> + * <td>3.31</td> + * <td>3.90</td> + * <td> </td> + * <td>1.04</td> + * <td> </td> + * <td>0.99</td> + * </tr> + * </table> + * + * @author olamedia (modified version) + * + * @author yonik + * @version $Id$ + */ + +public class OpenBitSet implements Cloneable, Serializable { + /** + * + */ + private static final long serialVersionUID = 7041505168827686846L; + public long[] bits; + public int wlen; // number of words (elements) used in the array + + public OpenBitSet() { + + } + + public OpenBitSet(long numBits) { + bits = new long[(int) (((numBits - 1) >>> 6) + 1)]; + wlen = bits.length; + } + + public OpenBitSet(long[] bits, int numWords) { + this.bits = bits; + this.wlen = numWords; + } + + public long[] getBits() { + return bits; + } + + public void setBits(long[] bits) { + this.bits = bits; + } + + public int getNumWords() { + return wlen; + } + + public void setNumWords(int nWords) { + this.wlen = nWords; + } + + public static int wordNum(int index) { + // java doesn't currently accept 64 bit array indicies, so returning + // a long isn't needed. + return index >> 6; // div 64 + } + + public static int wordNum(long index) { + // java doesn't currently accept 64 bit array indicies, so returning + // a long isn't needed. + return (int) (index >> 6); // div 64 + } + + public static int bitNum(int index) { + // should and or cast come first for best efficiency? + return index & 0x0000003f; // mod 64 + } + + public static int bitNum(long index) { + // should and or cast come first for best efficiency? + return (int) index & 0x0000003f; // mod 64 + } + + public static long bitMask(int index) { + return 1L << (index & 0x0000003f); + } + + /** Returns true or false for the specified bit index */ + public boolean get(int index) { + int i = index >> 6; // div 64 + // signed shift will keep a negative index and force an exception, + // removing the need for an explicit check. + int bit = index & 0x3f; // mod 64 + long bitmask = 1L << bit; + return (bits[i] & bitmask) != 0; + } + + /** Returns true or false for the specified bit index */ + public boolean get(long index) { + int i = (int) (index >> 6); // div 64 + // if (i>=bits.length) return false; + int bit = (int) index & 0x3f; // mod 64 + long bitmask = 1L << bit; + return (bits[i] & bitmask) != 0; + } + + // alternate implementation of get() + public boolean get1(int index) { + int i = index >> 6; // div 64 + int bit = index & 0x3f; // mod 64 + return ((bits[i] >>> bit) & 0x01) != 0; + // this does a long shift and a bittest (on x86) vs + // a long shift, and a long AND, (the test for zero is prob a no-op) + // testing on a P4 indicates this is slower than (bits[i] & bitmask) != + // 0; + } + + /** returns 1 if the bit is set, 0 if not */ + public int getBit(int index) { + int i = index >> 6; // div 64 + int bit = index & 0x3f; // mod 64 + return ((int) (bits[i] >>> bit)) & 0x01; + } + + /*** + * public boolean get2(int index) { + * int word = index >> 6; // div 64 + * int bit = index & 0x0000003f; // mod 64 + * return (bits[word] << bit) < 0; // hmmm, this would work if bit + * order were reversed + * // we could right shift and check for parity bit, if it was available to + * us. + * } + ***/ + + public void set(int index) { + int wordNum = index >> 6; // div 64 + int bit = index & 0x3f; // mod 64 + long bitmask = 1L << bit; + bits[wordNum] |= bitmask; + } + + public void set(long index) { + int wordNum = (int) (index >> 6); // div 64 + int bit = (int) index & 0x3f; // mod 64 + long bitmask = 1L << bit; + bits[wordNum] |= bitmask; + } + + public void clear(int index) { + int wordNum = index >> 6; // div 64 + int bit = index & 0x0000003f; // mod 64 + long bitmask = 1L << bit; + bits[wordNum] &= ~bitmask; + // hmmm, it takes one more instruction to clear than it does to set... + // any + // way to work around this? If there were only 63 bits per word, we + // could + // use a right shift of 10111111...111 in binary to position the 0 in + // the + // correct place (using sign extension). + // Could also use Long.rotateRight() or rotateLeft() *if* they were + // converted + // by the JVM into a native instruction. + } + + public void clear(long index) { + int wordNum = (int) (index >> 6); // div 64 + int bit = (int) index & 0x3f; // mod 64 + long bitmask = 1L << bit; + bits[wordNum] &= ~bitmask; + } + + /*** + * public void clear2(int index) { + * int word = index >> 6; // div 64 + * int bit = index & 0x0000003f; // mod 64 + * bits[word] &= Long.rotateLeft(0xfffffffe,bit); + * } + ***/ + + public boolean getAndSet(int index) { + int wordNum = index >> 6; // div 64 + int bit = index & 0x3f; // mod 64 + long bitmask = 1L << bit; + boolean val = (bits[wordNum] & bitmask) != 0; + bits[wordNum] |= bitmask; + return val; + } + + public boolean getAndSet(long index) { + int wordNum = (int) (index >> 6); // div 64 + int bit = (int) index & 0x3f; // mod 64 + long bitmask = 1L << bit; + boolean val = (bits[wordNum] & bitmask) != 0; + bits[wordNum] |= bitmask; + return val; + } + + /** flips a bit */ + public void flip(int index) { + int wordNum = index >> 6; // div 64 + int bit = index & 0x3f; // mod 64 + long bitmask = 1L << bit; + bits[wordNum] ^= bitmask; + } + + /** flips a bit */ + public void flip(long index) { + int wordNum = (int) (index >> 6); // div 64 + int bit = (int) index & 0x3f; // mod 64 + long bitmask = 1L << bit; + bits[wordNum] ^= bitmask; + } + + /** flips a bit and returns the resulting bit value */ + public boolean flipAndGet(int index) { + int wordNum = index >> 6; // div 64 + int bit = index & 0x3f; // mod 64 + long bitmask = 1L << bit; + bits[wordNum] ^= bitmask; + return (bits[wordNum] & bitmask) != 0; + } + + /** flips a bit and returns the resulting bit value */ + public boolean flipAndGet(long index) { + int wordNum = (int) (index >> 6); // div 64 + int bit = (int) index & 0x3f; // mod 64 + long bitmask = 1L << bit; + bits[wordNum] ^= bitmask; + return (bits[wordNum] & bitmask) != 0; + } + + static long pop_array5(long A[], int wordOffset, int numWords) { + int n = numWords; + long tot = 0; + long ones = 0, twos = 0, twosA, twosB, fours = 0, foursA, foursB, eights; + + int i; + for (i = wordOffset; i <= n - 8; i += 8) { + /*** + * #define CSA(h,l, a,b,c) \ + * {unsigned u = a ^ b; unsigned v = c; \ + * h = (a & b) | (u & v); l = u ^ v;} + ***/ + + // CSA(twosA, ones, ones, A[i], A[i+1]) + { + long a = ones, b = A[i], c = A[i + 1]; + long u = a ^ b, v = c; + long h = (a & b) | (u & v), l = u ^ v; + twosA = h; + ones = l; + } + + // CSA(twosB, ones, ones, A[i+2], A[i+3]) + { + long a = ones, b = A[i + 2], c = A[i + 3]; + long u = a ^ b, v = c; + long h = (a & b) | (u & v), l = u ^ v; + twosB = h; + ones = l; + } + + // CSA(foursA, twos, twos, twosA, twosB) + { + long a = twos, b = twosA, c = twosB; + long u = a ^ b, v = c; + long h = (a & b) | (u & v), l = u ^ v; + foursA = h; + twos = l; + } + + // CSA(twosA, ones, ones, A[i+4], A[i+5]) + { + long a = ones, b = A[i + 4], c = A[i + 5]; + long u = a ^ b, v = c; + long h = (a & b) | (u & v), l = u ^ v; + twosA = h; + ones = l; + } + + // CSA(twosB, ones, ones, A[i+6], A[i+7]) + { + long a = ones, b = A[i + 6], c = A[i + 7]; + long u = a ^ b, v = c; + long h = (a & b) | (u & v), l = u ^ v; + twosB = h; + ones = l; + } + + // CSA(foursB, twos, twos, twosA, twosB) + { + long a = twos, b = twosA, c = twosB; + long u = a ^ b, v = c; + long h = (a & b) | (u & v), l = u ^ v; + foursB = h; + twos = l; + } + + // CSA(eights, fours, fours, foursA, foursB) + // CSA(foursB, twos, twos, twosA, twosB) + { + long a = fours, b = foursA, c = foursB; + long u = a ^ b, v = c; + long h = (a & b) | (u & v), l = u ^ v; + eights = h; + fours = l; + } + + tot += pop(eights); + } + + tot = (pop(fours) << 2) + (pop(twos) << 1) + pop(ones) + (tot << 3); + + for (i = i; i < n; i++) + // Add in the last elements + tot = tot + pop(A[i]); + + return tot; + } + + /** + * Returns the number of bits set in the long + */ + public static int pop(long x) { + /*** + * Hacker's Delight 32 bit pop function: + * http://www.hackersdelight.org/HDcode/newCode/pop_arrayHS.cc + * + * int pop(unsigned x) { + * x = x - ((x >> 1) & 0x55555555); + * x = (x & 0x33333333) + ((x >> 2) & 0x33333333); + * x = (x + (x >> 4)) & 0x0F0F0F0F; + * x = x + (x >> 8); + * x = x + (x >> 16); + * return x & 0x0000003F; + * } + ***/ + + // 64 bit extension of the C function from above + x = x - ((x >>> 1) & 0x5555555555555555L); + x = (x & 0x3333333333333333L) + ((x >>> 2) & 0x3333333333333333L); + x = (x + (x >>> 4)) & 0x0F0F0F0F0F0F0F0FL; + x = x + (x >>> 8); + x = x + (x >>> 16); + x = x + (x >>> 32); + return ((int) x) & 0x7F; + } + + /*** + * http://supertech.lcs.mit.edu/~heinz/dt/node7.html Ernst A. Heinz + * DARKTHOUGHT prefers the following non-iterative formulation that + * stems from the well-known ``Hacker's Memory'' collection of + * programming tricks. It performs better than intuitive methods with + * lookup tables because the tables get either too large or need too many + * lookups.1.3 + * + * #define m1 ((unsigned_64) 0x5555555555555555) + * #define m2 ((unsigned_64) 0x3333333333333333) + * + * unsigned int non_iterative_pop(const unsigned_64 b) { + * unsigned_32 n; + * const unsigned_64 a = b - ((b >> 1) & m1); + * const unsigned_64 c = (a & m2) + ((a >> 2) & m2); + * n = ((unsigned_32) c) + ((unsigned_32) (c >> 32)); + * n = (n & 0x0F0F0F0F) + ((n >> 4) & 0x0F0F0F0F); + * n = (n & 0xFFFF) + (n >> 16); + * n = (n & 0xFF) + (n >> 8); + * return n; + * } + * + * // Looks like 19 simple arithmetic operations -YCS + ***/ + + public static int pop(long v0, long v1, long v2, long v3) { + // derived from pop_array by setting last four elems to 0. + // exchanges one pop() call for 10 elementary operations + // saving about 7 instructions... is there a better way? + long twosA = v0 & v1; + long ones = v0 ^ v1; + + long u2 = ones ^ v2; + long twosB = (ones & v2) | (u2 & v3); + ones = u2 ^ v3; + + long fours = (twosA & twosB); + long twos = twosA ^ twosB; + + return (pop(fours) << 2) + (pop(twos) << 1) + pop(ones); + + } + + /*** + * Counts the number of set bits in an array of longs + */ + public static long pop_array(long A[], int wordOffset, int numWords) { + /* + * Robert Harley and David Seal's bit counting algorithm, as documented + * in the revisions of Hacker's Delight + * http://www.hackersdelight.org/revisions.pdf + * http://www.hackersdelight.org/HDcode/newCode/pop_arrayHS.cc + * + * This function was adapted to Java, and extended to use 64 bit words. + * if only we had access to wider registers like SSE from java... + * + * This function can be transformed to compute the popcoun of other + * functions + * on bitsets via something like this: + * sed 's/A\[\([^]]*\)\]/\(A[\1] \& B[\1]\)/g' + */ + int n = wordOffset + numWords; + long tot = 0, tot8 = 0; + long ones = 0, twos = 0, fours = 0; + + int i; + for (i = wordOffset; i <= n - 8; i += 8) { + /*** + * C macro from Hacker's Delight + * #define CSA(h,l, a,b,c) \ + * {unsigned u = a ^ b; unsigned v = c; \ + * h = (a & b) | (u & v); l = u ^ v;} + ***/ + + long twosA, twosB, foursA, foursB, eights; + + // CSA(twosA, ones, ones, A[i], A[i+1]) + { + long b = A[i], c = A[i + 1]; + long u = ones ^ b; + twosA = (ones & b) | (u & c); + ones = u ^ c; + } + // CSA(twosB, ones, ones, A[i+2], A[i+3]) + { + long b = A[i + 2], c = A[i + 3]; + long u = ones ^ b; + twosB = (ones & b) | (u & c); + ones = u ^ c; + } + // CSA(foursA, twos, twos, twosA, twosB) + { + long u = twos ^ twosA; + foursA = (twos & twosA) | (u & twosB); + twos = u ^ twosB; + } + // CSA(twosA, ones, ones, A[i+4], A[i+5]) + { + long b = A[i + 4], c = A[i + 5]; + long u = ones ^ b; + twosA = (ones & b) | (u & c); + ones = u ^ c; + } + // CSA(twosB, ones, ones, A[i+6], A[i+7]) + { + long b = A[i + 6], c = A[i + 7]; + long u = ones ^ b; + twosB = (ones & b) | (u & c); + ones = u ^ c; + } + // CSA(foursB, twos, twos, twosA, twosB) + { + long u = twos ^ twosA; + foursB = (twos & twosA) | (u & twosB); + twos = u ^ twosB; + } + + // CSA(eights, fours, fours, foursA, foursB) + { + long u = fours ^ foursA; + eights = (fours & foursA) | (u & foursB); + fours = u ^ foursB; + } + tot8 += pop(eights); + } + + if (i <= n - 4) { + long twosA, twosB, foursA, eights; + { + long b = A[i], c = A[i + 1]; + long u = ones ^ b; + twosA = (ones & b) | (u & c); + ones = u ^ c; + } + { + long b = A[i + 2], c = A[i + 3]; + long u = ones ^ b; + twosB = (ones & b) | (u & c); + ones = u ^ c; + } + { + long u = twos ^ twosA; + foursA = (twos & twosA) | (u & twosB); + twos = u ^ twosB; + } + eights = fours & foursA; + fours = fours ^ foursA; + + tot8 += pop(eights); + i += 4; + } + + if (i <= n - 2) { + long b = A[i], c = A[i + 1]; + long u = ones ^ b; + long twosA = (ones & b) | (u & c); + ones = u ^ c; + + long foursA = twos & twosA; + twos = twos ^ twosA; + + long eights = fours & foursA; + fours = fours ^ foursA; + + tot8 += pop(eights); + i += 2; + } + + if (i < n) { + tot += pop(A[i]); + } + + tot += (pop(fours) << 2) + (pop(twos) << 1) + pop(ones) + (tot8 << 3); + + return tot; + } + + /** + * Returns the popcount or cardinality of the two sets after an + * intersection. + * Neither array is modified. + */ + public static long pop_intersect(long A[], long B[], int wordOffset, int numWords) { + // generated from pop_array via sed 's/A\[\([^]]*\)\]/\(A[\1] \& + // B[\1]\)/g' + int n = wordOffset + numWords; + long tot = 0, tot8 = 0; + long ones = 0, twos = 0, fours = 0; + + int i; + for (i = wordOffset; i <= n - 8; i += 8) { + long twosA, twosB, foursA, foursB, eights; + + // CSA(twosA, ones, ones, (A[i] & B[i]), (A[i+1] & B[i+1])) + { + long b = (A[i] & B[i]), c = (A[i + 1] & B[i + 1]); + long u = ones ^ b; + twosA = (ones & b) | (u & c); + ones = u ^ c; + } + // CSA(twosB, ones, ones, (A[i+2] & B[i+2]), (A[i+3] & B[i+3])) + { + long b = (A[i + 2] & B[i + 2]), c = (A[i + 3] & B[i + 3]); + long u = ones ^ b; + twosB = (ones & b) | (u & c); + ones = u ^ c; + } + // CSA(foursA, twos, twos, twosA, twosB) + { + long u = twos ^ twosA; + foursA = (twos & twosA) | (u & twosB); + twos = u ^ twosB; + } + // CSA(twosA, ones, ones, (A[i+4] & B[i+4]), (A[i+5] & B[i+5])) + { + long b = (A[i + 4] & B[i + 4]), c = (A[i + 5] & B[i + 5]); + long u = ones ^ b; + twosA = (ones & b) | (u & c); + ones = u ^ c; + } + // CSA(twosB, ones, ones, (A[i+6] & B[i+6]), (A[i+7] & B[i+7])) + { + long b = (A[i + 6] & B[i + 6]), c = (A[i + 7] & B[i + 7]); + long u = ones ^ b; + twosB = (ones & b) | (u & c); + ones = u ^ c; + } + // CSA(foursB, twos, twos, twosA, twosB) + { + long u = twos ^ twosA; + foursB = (twos & twosA) | (u & twosB); + twos = u ^ twosB; + } + + // CSA(eights, fours, fours, foursA, foursB) + { + long u = fours ^ foursA; + eights = (fours & foursA) | (u & foursB); + fours = u ^ foursB; + } + tot8 += pop(eights); + } + + if (i <= n - 4) { + long twosA, twosB, foursA, eights; + { + long b = (A[i] & B[i]), c = (A[i + 1] & B[i + 1]); + long u = ones ^ b; + twosA = (ones & b) | (u & c); + ones = u ^ c; + } + { + long b = (A[i + 2] & B[i + 2]), c = (A[i + 3] & B[i + 3]); + long u = ones ^ b; + twosB = (ones & b) | (u & c); + ones = u ^ c; + } + { + long u = twos ^ twosA; + foursA = (twos & twosA) | (u & twosB); + twos = u ^ twosB; + } + eights = fours & foursA; + fours = fours ^ foursA; + + tot8 += pop(eights); + i += 4; + } + + if (i <= n - 2) { + long b = (A[i] & B[i]), c = (A[i + 1] & B[i + 1]); + long u = ones ^ b; + long twosA = (ones & b) | (u & c); + ones = u ^ c; + + long foursA = twos & twosA; + twos = twos ^ twosA; + + long eights = fours & foursA; + fours = fours ^ foursA; + + tot8 += pop(eights); + i += 2; + } + + if (i < n) { + tot += pop((A[i] & B[i])); + } + + tot += (pop(fours) << 2) + (pop(twos) << 1) + pop(ones) + (tot8 << 3); + + return tot; + } + + /** + * Returns the popcount or cardinality of the union of two sets. + * Neither array is modified. + */ + public static long pop_union(long A[], long B[], int wordOffset, int numWords) { + // generated from pop_array via sed 's/A\[\([^]]*\)\]/\(A[\1] \| + // B[\1]\)/g' + int n = wordOffset + numWords; + long tot = 0, tot8 = 0; + long ones = 0, twos = 0, fours = 0; + + int i; + for (i = wordOffset; i <= n - 8; i += 8) { + /*** + * C macro from Hacker's Delight + * #define CSA(h,l, a,b,c) \ + * {unsigned u = a ^ b; unsigned v = c; \ + * h = (a & b) | (u & v); l = u ^ v;} + ***/ + + long twosA, twosB, foursA, foursB, eights; + + // CSA(twosA, ones, ones, (A[i] | B[i]), (A[i+1] | B[i+1])) + { + long b = (A[i] | B[i]), c = (A[i + 1] | B[i + 1]); + long u = ones ^ b; + twosA = (ones & b) | (u & c); + ones = u ^ c; + } + // CSA(twosB, ones, ones, (A[i+2] | B[i+2]), (A[i+3] | B[i+3])) + { + long b = (A[i + 2] | B[i + 2]), c = (A[i + 3] | B[i + 3]); + long u = ones ^ b; + twosB = (ones & b) | (u & c); + ones = u ^ c; + } + // CSA(foursA, twos, twos, twosA, twosB) + { + long u = twos ^ twosA; + foursA = (twos & twosA) | (u & twosB); + twos = u ^ twosB; + } + // CSA(twosA, ones, ones, (A[i+4] | B[i+4]), (A[i+5] | B[i+5])) + { + long b = (A[i + 4] | B[i + 4]), c = (A[i + 5] | B[i + 5]); + long u = ones ^ b; + twosA = (ones & b) | (u & c); + ones = u ^ c; + } + // CSA(twosB, ones, ones, (A[i+6] | B[i+6]), (A[i+7] | B[i+7])) + { + long b = (A[i + 6] | B[i + 6]), c = (A[i + 7] | B[i + 7]); + long u = ones ^ b; + twosB = (ones & b) | (u & c); + ones = u ^ c; + } + // CSA(foursB, twos, twos, twosA, twosB) + { + long u = twos ^ twosA; + foursB = (twos & twosA) | (u & twosB); + twos = u ^ twosB; + } + + // CSA(eights, fours, fours, foursA, foursB) + { + long u = fours ^ foursA; + eights = (fours & foursA) | (u & foursB); + fours = u ^ foursB; + } + tot8 += pop(eights); + } + + if (i <= n - 4) { + long twosA, twosB, foursA, eights; + { + long b = (A[i] | B[i]), c = (A[i + 1] | B[i + 1]); + long u = ones ^ b; + twosA = (ones & b) | (u & c); + ones = u ^ c; + } + { + long b = (A[i + 2] | B[i + 2]), c = (A[i + 3] | B[i + 3]); + long u = ones ^ b; + twosB = (ones & b) | (u & c); + ones = u ^ c; + } + { + long u = twos ^ twosA; + foursA = (twos & twosA) | (u & twosB); + twos = u ^ twosB; + } + eights = fours & foursA; + fours = fours ^ foursA; + + tot8 += pop(eights); + i += 4; + } + + if (i <= n - 2) { + long b = (A[i] | B[i]), c = (A[i + 1] | B[i + 1]); + long u = ones ^ b; + long twosA = (ones & b) | (u & c); + ones = u ^ c; + + long foursA = twos & twosA; + twos = twos ^ twosA; + + long eights = fours & foursA; + fours = fours ^ foursA; + + tot8 += pop(eights); + i += 2; + } + + if (i < n) { + tot += pop((A[i] | B[i])); + } + + tot += (pop(fours) << 2) + (pop(twos) << 1) + pop(ones) + (tot8 << 3); + + return tot; + } + + /** + * Returns the popcount or cardinality of A & ~B + * Neither array is modified. + */ + public static long pop_andnot(long A[], long B[], int wordOffset, int numWords) { + // generated from pop_array via sed 's/A\[\([^]]*\)\]/\(A[\1] \& + // ~B[\1]\)/g' + int n = wordOffset + numWords; + long tot = 0, tot8 = 0; + long ones = 0, twos = 0, fours = 0; + + int i; + for (i = wordOffset; i <= n - 8; i += 8) { + /*** + * C macro from Hacker's Delight + * #define CSA(h,l, a,b,c) \ + * {unsigned u = a ^ b; unsigned v = c; \ + * h = (a & b) | (u & v); l = u ^ v;} + ***/ + + long twosA, twosB, foursA, foursB, eights; + + // CSA(twosA, ones, ones, (A[i] & ~B[i]), (A[i+1] & ~B[i+1])) + { + long b = (A[i] & ~B[i]), c = (A[i + 1] & ~B[i + 1]); + long u = ones ^ b; + twosA = (ones & b) | (u & c); + ones = u ^ c; + } + // CSA(twosB, ones, ones, (A[i+2] & ~B[i+2]), (A[i+3] & ~B[i+3])) + { + long b = (A[i + 2] & ~B[i + 2]), c = (A[i + 3] & ~B[i + 3]); + long u = ones ^ b; + twosB = (ones & b) | (u & c); + ones = u ^ c; + } + // CSA(foursA, twos, twos, twosA, twosB) + { + long u = twos ^ twosA; + foursA = (twos & twosA) | (u & twosB); + twos = u ^ twosB; + } + // CSA(twosA, ones, ones, (A[i+4] & ~B[i+4]), (A[i+5] & ~B[i+5])) + { + long b = (A[i + 4] & ~B[i + 4]), c = (A[i + 5] & ~B[i + 5]); + long u = ones ^ b; + twosA = (ones & b) | (u & c); + ones = u ^ c; + } + // CSA(twosB, ones, ones, (A[i+6] & ~B[i+6]), (A[i+7] & ~B[i+7])) + { + long b = (A[i + 6] & ~B[i + 6]), c = (A[i + 7] & ~B[i + 7]); + long u = ones ^ b; + twosB = (ones & b) | (u & c); + ones = u ^ c; + } + // CSA(foursB, twos, twos, twosA, twosB) + { + long u = twos ^ twosA; + foursB = (twos & twosA) | (u & twosB); + twos = u ^ twosB; + } + + // CSA(eights, fours, fours, foursA, foursB) + { + long u = fours ^ foursA; + eights = (fours & foursA) | (u & foursB); + fours = u ^ foursB; + } + tot8 += pop(eights); + } + + if (i <= n - 4) { + long twosA, twosB, foursA, eights; + { + long b = (A[i] & ~B[i]), c = (A[i + 1] & ~B[i + 1]); + long u = ones ^ b; + twosA = (ones & b) | (u & c); + ones = u ^ c; + } + { + long b = (A[i + 2] & ~B[i + 2]), c = (A[i + 3] & ~B[i + 3]); + long u = ones ^ b; + twosB = (ones & b) | (u & c); + ones = u ^ c; + } + { + long u = twos ^ twosA; + foursA = (twos & twosA) | (u & twosB); + twos = u ^ twosB; + } + eights = fours & foursA; + fours = fours ^ foursA; + + tot8 += pop(eights); + i += 4; + } + + if (i <= n - 2) { + long b = (A[i] & ~B[i]), c = (A[i + 1] & ~B[i + 1]); + long u = ones ^ b; + long twosA = (ones & b) | (u & c); + ones = u ^ c; + + long foursA = twos & twosA; + twos = twos ^ twosA; + + long eights = fours & foursA; + fours = fours ^ foursA; + + tot8 += pop(eights); + i += 2; + } + + if (i < n) { + tot += pop((A[i] & ~B[i])); + } + + tot += (pop(fours) << 2) + (pop(twos) << 1) + pop(ones) + (tot8 << 3); + + return tot; + } + + public static long pop_xor(long A[], long B[], int wordOffset, int numWords) { + int n = wordOffset + numWords; + long tot = 0, tot8 = 0; + long ones = 0, twos = 0, fours = 0; + + int i; + for (i = wordOffset; i <= n - 8; i += 8) { + /*** + * C macro from Hacker's Delight + * #define CSA(h,l, a,b,c) \ + * {unsigned u = a ^ b; unsigned v = c; \ + * h = (a & b) | (u & v); l = u ^ v;} + ***/ + + long twosA, twosB, foursA, foursB, eights; + + // CSA(twosA, ones, ones, (A[i] ^ B[i]), (A[i+1] ^ B[i+1])) + { + long b = (A[i] ^ B[i]), c = (A[i + 1] ^ B[i + 1]); + long u = ones ^ b; + twosA = (ones & b) | (u & c); + ones = u ^ c; + } + // CSA(twosB, ones, ones, (A[i+2] ^ B[i+2]), (A[i+3] ^ B[i+3])) + { + long b = (A[i + 2] ^ B[i + 2]), c = (A[i + 3] ^ B[i + 3]); + long u = ones ^ b; + twosB = (ones & b) | (u & c); + ones = u ^ c; + } + // CSA(foursA, twos, twos, twosA, twosB) + { + long u = twos ^ twosA; + foursA = (twos & twosA) | (u & twosB); + twos = u ^ twosB; + } + // CSA(twosA, ones, ones, (A[i+4] ^ B[i+4]), (A[i+5] ^ B[i+5])) + { + long b = (A[i + 4] ^ B[i + 4]), c = (A[i + 5] ^ B[i + 5]); + long u = ones ^ b; + twosA = (ones & b) | (u & c); + ones = u ^ c; + } + // CSA(twosB, ones, ones, (A[i+6] ^ B[i+6]), (A[i+7] ^ B[i+7])) + { + long b = (A[i + 6] ^ B[i + 6]), c = (A[i + 7] ^ B[i + 7]); + long u = ones ^ b; + twosB = (ones & b) | (u & c); + ones = u ^ c; + } + // CSA(foursB, twos, twos, twosA, twosB) + { + long u = twos ^ twosA; + foursB = (twos & twosA) | (u & twosB); + twos = u ^ twosB; + } + + // CSA(eights, fours, fours, foursA, foursB) + { + long u = fours ^ foursA; + eights = (fours & foursA) | (u & foursB); + fours = u ^ foursB; + } + tot8 += pop(eights); + } + + if (i <= n - 4) { + long twosA, twosB, foursA, eights; + { + long b = (A[i] ^ B[i]), c = (A[i + 1] ^ B[i + 1]); + long u = ones ^ b; + twosA = (ones & b) | (u & c); + ones = u ^ c; + } + { + long b = (A[i + 2] ^ B[i + 2]), c = (A[i + 3] ^ B[i + 3]); + long u = ones ^ b; + twosB = (ones & b) | (u & c); + ones = u ^ c; + } + { + long u = twos ^ twosA; + foursA = (twos & twosA) | (u & twosB); + twos = u ^ twosB; + } + eights = fours & foursA; + fours = fours ^ foursA; + + tot8 += pop(eights); + i += 4; + } + + if (i <= n - 2) { + long b = (A[i] ^ B[i]), c = (A[i + 1] ^ B[i + 1]); + long u = ones ^ b; + long twosA = (ones & b) | (u & c); + ones = u ^ c; + + long foursA = twos & twosA; + twos = twos ^ twosA; + + long eights = fours & foursA; + fours = fours ^ foursA; + + tot8 += pop(eights); + i += 2; + } + + if (i < n) { + tot += pop((A[i] ^ B[i])); + } + + tot += (pop(fours) << 2) + (pop(twos) << 1) + pop(ones) + (tot8 << 3); + + return tot; + } + + public long cardinality() { + return pop_array(bits, 0, wlen); + } + + /** + * Returns the popcount or cardinality of the intersection of the two sets. + * Neither set is modified. + */ + public static long intersectionCount(OpenBitSet a, OpenBitSet b) { + return pop_intersect(a.bits, b.bits, 0, Math.min(a.wlen, b.wlen)); + } + + /** + * Returns the popcount or cardinality of the union of the two sets. + * Neither set is modified. + */ + public static long unionCount(OpenBitSet a, OpenBitSet b) { + long tot = pop_union(a.bits, b.bits, 0, Math.min(a.wlen, b.wlen)); + if (a.wlen < b.wlen) { + tot += pop_array(b.bits, a.wlen, b.wlen - a.wlen); + } else if (a.wlen > b.wlen) { + tot += pop_array(a.bits, b.wlen, a.wlen - b.wlen); + } + return tot; + } + + /** + * Returns the popcount or cardinality of "a and not b" + * or "intersection(a, not(b))" + * Neither set is modified. + */ + public static long andNotCount(OpenBitSet a, OpenBitSet b) { + long tot = pop_andnot(a.bits, b.bits, 0, Math.min(a.wlen, b.wlen)); + if (a.wlen > b.wlen) { + tot += pop_array(a.bits, b.wlen, a.wlen - b.wlen); + } + return tot; + } + + /** + * Returns the popcount or cardinality of the exclusive-or of the two sets. + * Neither set is modified. + */ + public static long xorCount(OpenBitSet a, OpenBitSet b) { + long tot = pop_xor(a.bits, b.bits, 0, Math.min(a.wlen, b.wlen)); + if (a.wlen < b.wlen) { + tot += pop_array(b.bits, a.wlen, b.wlen - a.wlen); + } else if (a.wlen > b.wlen) { + tot += pop_array(a.bits, b.wlen, a.wlen - b.wlen); + } + return tot; + } + + /** + * Returns the index of the first set bit starting at the index specified. + * -1 is returned if there are no more set bits. + */ + public int nextSetBit(int index) { + int i = index >> 6; + if (i >= wlen) + return -1; + int subIndex = index & 0x3f; // index within the word + long word = bits[i] >> subIndex; // skip all the bits to the right of + // index + + if (word != 0) { + return (i << 6) + subIndex + ntz(word); + } + + while (++i < wlen) { + word = bits[i]; + if (word != 0) + return (i << 6) + ntz(word); + } + + return -1; + } + + /** + * Returns the index of the first set bit starting at the index specified. + * -1 is returned if there are no more set bits. + */ + public long nextSetBit(long index) { + int i = (int) (index >>> 6); + if (i >= wlen) + return -1; + int subIndex = (int) index & 0x3f; // index within the word + long word = bits[i] >>> subIndex; // skip all the bits to the right of + // index + + if (word != 0) { + return (((long) i) << 6) + (subIndex + ntz(word)); + } + + while (++i < wlen) { + word = bits[i]; + if (word != 0) + return (((long) i) << 6) + ntz(word); + } + + return -1; + } + + /*** + * python code to generate ntzTable + * def ntz(val): + * if val==0: return 8 + * i=0 + * while (val&0x01)==0: + * i = i+1 + * val >>= 1 + * return i + * print ','.join([ str(ntz(i)) for i in range(256) ]) + ***/ + public static final byte ntzTable[] = { 8, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, + 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, + 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, + 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 7, 0, 1, 0, 2, + 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, + 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, + 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, + 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 }; + + /** Returns number of trailing zeros in the 64 bit long value */ + public static int ntz(long val) { + // A full binary search to determine the low byte was slower than + // a linear search for nextSetBit(). This is most likely because + // the implementation of nextSetBit() shifts bits to the right, + // increasing + // the probability that the first non-zero byte is in the rhs. + // + // This implementation does a single binary search at the top level only + // so that all other bit shifting can be done on ints instead of longs + // to + // remain friendly to 32 bit architectures. In addition, the case of a + // non-zero first byte is checked for first because it is the most + // common + // in dense bit arrays. + + int lower = (int) val; + int lowByte = lower & 0xff; + if (lowByte != 0) + return ntzTable[lowByte]; + + if (lower != 0) { + lowByte = (lower >>> 8) & 0xff; + if (lowByte != 0) + return ntzTable[lowByte] + 8; + lowByte = (lower >>> 16) & 0xff; + if (lowByte != 0) + return ntzTable[lowByte] + 16; + // no need to mask off low byte for the last byte in the 32 bit word + // no need to check for zero on the last byte either. + return ntzTable[lower >>> 24] + 24; + } else { + // grab upper 32 bits + int upper = (int) (val >> 32); + lowByte = upper & 0xff; + if (lowByte != 0) + return ntzTable[lowByte] + 32; + lowByte = (upper >>> 8) & 0xff; + if (lowByte != 0) + return ntzTable[lowByte] + 40; + lowByte = (upper >>> 16) & 0xff; + if (lowByte != 0) + return ntzTable[lowByte] + 48; + // no need to mask off low byte for the last byte in the 32 bit word + // no need to check for zero on the last byte either. + return ntzTable[upper >>> 24] + 56; + } + } + + /** + * returns 0 based index of first set bit + * (only works for x!=0) + */ + public static int ntz2(long x) { + int n = 0; + int y = (int) x; + if (y == 0) { + n += 32; + y = (int) (x >>> 32); + } // the only 64 bit shift necessary + if ((y & 0x0000FFFF) == 0) { + n += 16; + y >>>= 16; + } + if ((y & 0x000000FF) == 0) { + n += 8; + y >>>= 8; + } + return (ntzTable[y & 0xff]) + n; + } + + int ntz3(long x) { + int n = 1; + + // do the first step as a long, all others as ints. + int y = (int) x; + if (y == 0) { + n += 32; + y = (int) (x >>> 32); + } + if ((y & 0x0000FFFF) == 0) { + n += 16; + y >>>= 16; + } + if ((y & 0x000000FF) == 0) { + n += 8; + y >>>= 8; + } + if ((y & 0x0000000F) == 0) { + n += 4; + y >>>= 4; + } + if ((y & 0x00000003) == 0) { + n += 2; + y >>>= 2; + } + return n - (y & 1); + } + + /*** + * Many 32 bit ntz algorithms at http://www.hackersdelight.org/HDcode/ntz.cc + */ + + public Object clone() { + try { + OpenBitSet obs = (OpenBitSet) super.clone(); + obs.bits = obs.bits.clone(); // hopefully an array clone is as + // fast(er) than arraycopy + return obs; + } catch (CloneNotSupportedException e) { + throw new RuntimeException(e); + } + } + + /** this = this AND other */ + public void intersect(OpenBitSet other) { + int newLen = Math.min(this.wlen, other.wlen); + long[] thisArr = this.bits; + long[] otherArr = other.bits; + // testing against zero can be more efficient + int pos = newLen; + while (--pos >= 0) { + thisArr[pos] &= otherArr[pos]; + } + if (this.wlen > newLen) { + // fill zeros from the new shorter length to the old length + Arrays.fill(bits, newLen, this.wlen, 0); + } + this.wlen = newLen; + } + + /** this = this OR other */ + public void union(OpenBitSet other) { + int newLen = Math.max(wlen, other.wlen); + ensureCapacityWords(newLen); + + long[] thisArr = this.bits; + long[] otherArr = other.bits; + int pos = this.wlen; + while (--pos >= 0) { + thisArr[pos] |= otherArr[pos]; + } + if (this.wlen < newLen) { + System.arraycopy(otherArr, this.wlen, thisArr, this.wlen, newLen - this.wlen); + } + this.wlen = newLen; + } + + /** Remove all elements set in other. this = this AND_NOT other */ + public void remove(OpenBitSet other) { + int idx = Math.min(wlen, other.wlen); + long[] thisArr = this.bits; + long[] otherArr = other.bits; + while (--idx >= 0) { + thisArr[idx] &= ~otherArr[idx]; + } + } + + /** this = this XOR other */ + public void xor(OpenBitSet other) { + int newLen = Math.max(wlen, other.wlen); + ensureCapacityWords(newLen); + + long[] thisArr = this.bits; + long[] otherArr = other.bits; + int pos = this.wlen; + while (--pos >= 0) { + thisArr[pos] ^= otherArr[pos]; + } + if (this.wlen < newLen) { + System.arraycopy(otherArr, this.wlen, thisArr, this.wlen, newLen - this.wlen); + } + this.wlen = newLen; + } + + // some BitSet compatability methods + + // ** see {@link intersect} */ + public void and(OpenBitSet other) { + intersect(other); + } + + // ** see {@link union} */ + public void or(OpenBitSet other) { + union(other); + } + + // ** see {@link andNot} */ + public void andNot(OpenBitSet other) { + remove(other); + } + + /** + * Resize the bitset with the size given as a number of words (64 + * bit longs) + */ + public void ensureCapacityWords(int numWords) { + if (bits.length < numWords) { + long[] newBits = new long[numWords]; + System.arraycopy(bits, 0, newBits, 0, wlen); + bits = newBits; + } + } + + public void trimTrailingZeros() { + int idx = wlen - 1; + while (idx >= 0 && bits[idx] == 0) + idx--; + wlen = idx + 1; + } + +} |