#include "blk256.h" int blk256_cmp(blk256_t *op1, blk256_t *op2) { if (blk256_sign_bit(op1) != blk256_sign_bit(op2)) return blk256_sign_bit(op1) ? -1 : 1; int neg = blk256_sign(op1); if (blk256_high_bit(op1) != blk256_high_bit(op2)) return blk256_high_bit(op1) ? neg : -neg; /* Compare limbs most significant to least */ int i; for (i = 3; i >= 0; i--) if (op1->x[i] != op2->x[i]) return op1->x[i] < op2->x[i] ? -neg : neg; return 0; } int blk256_cmp_blk128(blk256_t *op1, blk128_t *op2) { if (blk256_sign_bit(op1)) return -1; if (blk256_high_bit(op1) || op1->x[3] || op1->x[2]) return 1; if (op1->x[1] != op2->hi) return op1->x[1] < op2->hi ? -1 : 1; if (op1->x[0] != op2->lo) return op1->x[0] < op2->lo ? -1 : 1; return 0; } /** * NOTE Does not handle negative numbers! */ void blk256_inc(blk256_t *op) { /* Takes advantage of short-circuit OR. If increasing x[i] causes all * bits to be set to zero, we must carry and the next arm in the OR * expression will be evaluated. * * Consider "++x0 || ++x1", x1 will only be incremented in the case * where "++x0" evaluates to 0 (ie in the case all bits are 1 prior to * increment). */ if (++op->x[0] || ++op->x[1] || ++op->x[2]) return; if (!++op->x[3]) blk256_set_high_bit(op); } /** * NOTE Does not handle negative numbers! */ void blk256_dec(blk256_t *op) { if (op->x[0]-- || op->x[1]-- || op->x[2]--) return; if (!op->x[3]--) blk256_clear_high_bit(op); } /** * */ void blk256_add(blk256_t *rop, blk256_t *op1, blk256_t *op2) { /** * Handle cases where one of the arguments is negative */ if (blk256_sign_bit(op1) ^ blk256_sign_bit(op2)) { if (blk256_sign_bit(op1)) { blk256_t x; blk256_cp(&x, op1); blk256_clear_sign_bit(&x); blk256_sub(rop, op2, &x); } else { blk256_clear_sign_bit(op2); blk256_sub(rop, op1, op2); blk256_set_sign_bit(op2); } return; } /** * Now either (1) both are positive, or (2) both a negative */ if (rop != op1) blk256_cp(rop, op1); /* deref */ uint64_t *opx = rop->x; uint64_t *opy = op2->x; opx[0] += opy[0]; if (opx[0] >= opy[0] || ++opx[1] || ++opx[2]) goto add1; if (!++opx[3]) blk256_set_high_bit(rop); add1: opx[1] += opy[1]; if (opx[1] >= opy[1] || ++opx[2]) goto add2; if (!++opx[3]) blk256_set_high_bit(rop); add2: opx[2] += opy[2]; if (opx[2] >= opy[2]) goto add3; if (!++opx[3]) blk256_set_high_bit(rop); add3: opx[3] += opy[3]; if (opx[3] < opy[3]) blk256_set_high_bit(rop); else /** * This assignment deals with two things: * * (1) setting the high bit, if present on either op1 or op2 * (2) setting the sign bit, if present on both op1 and op2 */ rop->mx |= op2->mx; } void blk256_add_blk128(blk256_t *rop, blk256_t *op, blk128_t *inc) { if (rop != op) blk256_cp(rop, op); rop->x[1] += inc->hi; /* Propagate carry */ if (rop->x[1] >= inc->hi || ++rop->x[2]) goto add_lo; if (!++rop->x[3]) rop->mx = 1; add_lo: rop->x[0] += inc->lo; /* Propagate carry */ if (rop->x[0] >= inc->lo || ++rop->x[1] || ++rop->x[2]) return; if (!++rop->x[3]) rop->mx |= 1; } void blk256_sub(blk256_t *rop, blk256_t *op1, blk256_t *op2) { blk256_t r; /** * Handle negative numbers */ int neg = blk256_sign_bit(op1) << 1 | blk256_sign_bit(op2); switch (neg) { case 1: /** * op1 >= 0 and op2 < 0 * */ blk256_clear_sign_bit(op2); blk256_add(rop, op1, op2); blk256_set_sign_bit(op2); return; case 2: /** * op1 < 0 and op2 >= 0 */ blk256_cp(&r, op1); blk256_clear_sign_bit(&r); blk256_add(rop, &r, op2); blk256_set_sign_bit(rop); return; default: /* Nothing */ ; } /** * This subtraction assumes op1 >= op2, so result is positive! * * At this point either op1, op2 >= 0 or op1, op2 < 0. * * We need to figure if abs(op1) < abs(op2) */ int abs_cmp = blk256_sign(op1) * blk256_cmp(op1, op2); blk256_t *sop; if (abs_cmp == -1) { /* abs(opx) < abs(opy) */ blk256_cp(&r, op2); sop = op1; if (neg) blk256_clear_sign_bit(&r); else blk256_set_sign_bit(&r); } else { /* abs(opx) >= abs(opy) */ blk256_cp(&r, op1); sop = op2; if (neg) blk256_set_sign_bit(&r); else blk256_clear_sign_bit(&r); } /** * Arranged such that abs(opx) >= abs(opy) */ uint64_t *opx = r.x; uint64_t *opy = sop->x; /* Subtract opy[0] */ if (opx[0] < opy[0] && !opx[1]-- && !opx[2]-- && !opx[3]--) blk256_clear_high_bit(&r); opx[0] -= opy[0]; /* Subtract opy[1] */ if (opx[1] < opy[1] && !opx[2]-- && !opx[3]--) blk256_clear_high_bit(&r); opx[1] -= opy[1]; /* Subtract opy[2] */ if (opx[2] < opy[2] && !opx[3]--) blk256_clear_high_bit(&r); opx[2] -= opy[2]; /* Subtract opy[3] */ if (opx[3] < opy[3]) blk256_clear_high_bit(&r); else r.mx |= ((blk256_high_bit(&r) - blk256_high_bit(sop))) & 1; opx[3] -= opy[3]; /* Copy result into rop */ blk256_cp(rop, &r); } // TODO Handle negative numbers void blk256_sub_blk128(blk256_t *rop, blk256_t *op, blk128_t *dec) { /* Copy op1 into rop */ if (rop != op) blk256_cp(rop, op); /* deref */ uint64_t *opx = rop->x; if (opx[1] >= dec->hi || opx[2]--) goto sub_hi; if (!opx[3]--) rop->mx = 0; sub_hi: opx[1] -= dec->hi; if (opx[0] >= dec->lo || opx[1]-- || opx[2]--) goto sub_lo; if (!opx[3]--) rop->mx = 0; sub_lo: opx[0] -= dec->lo; } // TODO Handle negative numbers void blk256_sub_u64(blk256_t *rop, blk256_t *op, uint64_t x) { if (rop != op) blk256_cp(rop, op); /* deref */ uint64_t *n = rop->x; if (n[0] < x) if (n[1]-- || n[2]-- || n[3]--) rop->mx = 0; n[0] -= x; } /** * TODO Consider: what is the top limit for operands (without overflow)? * TODO After above: How is this handled? */ void blk256_mul_blk128(blk256_t *rop, blk256_t *op1, blk128_t *op2) { /** * We store our result here until the end */ uint32_t w[12] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /** * Represent op2 as four 32-bit numbers */ uint32_t yt[4] = { lo(op2->lo), hi(op2->lo), lo(op2->hi), hi(op2->hi) }; /** * - c is the carry * - y is the current 32 bits in op2 (from yt) * - t contains 64-bit results from 32-bit multiplications */ uint32_t c, y; uint64_t t; int i, j; for (i = 0; i < 4; i++) { y = yt[i]; for (j = 0; j < 8; j += 2) { t = w[i+j] + lo(op1->x[j>>1]) * y + c; w[i+j] = lo(t); c = hi(t); t = w[i+j+1] + hi(op1->x[j>>1]) * y + c; w[i+j+1] = lo(t); c = hi(t); } w[i+8] = c; c = 0; } char mx = (char)w[8] & 1; if (blk256_sign_bit(op1)) mx |= 0x2; /** * TODO Consider: This multiplication is "modulo" (2^257), ie the result * must fit in a 257-bit number. Is this a reasonable assumption? */ blk256_set(rop, mx , u64(w[7]) << 32 | u64(w[6]) , u64(w[5]) << 32 | u64(w[4]) , u64(w[3]) << 32 | u64(w[2]) , u64(w[1]) << 32 | u64(w[0])); } __inline__ double blk256_as_double(blk256_t *op) { return (double)(blk256_sign(op)) * ((double)blk256_high_bit(op) * B256 + (double)op->x[3] * B192 + (double)op->x[2] * B128 + (double)op->x[1] * B64 + (double)op->x[0]); } __inline__ double blk128_as_double(blk128_t *op) { return (double)op->hi * B64 + (double)op->lo; } __inline__ void double_as_blk256(blk256_t *rop, double x) { int sgn = !!(x < 0); if (sgn) x = -x; if (x < 1) { blk256_set(rop, sgn << 1, 0, 0, 0, 1); return; } rop->mx = (sgn << 1) | (((char)(x / B256)) & 1); rop->x[3] = u64(x / B192); rop->x[2] = u64(x / B128); rop->x[1] = u64(x / B64); rop->x[0] = u64(x); } /** * Division of 256-bit number by 128-bit number. This is useful for * decomposition in encoding * * The method employed is converting numbers to double representation and * dividing these. This is a much faster method than any iterative method. */ void blk256_div_blk128(blk256_t *q, blk128_t *r, blk256_t *x, blk128_t *d) { blk256_t qq, rr, t, m; blk256_set(&qq, 0, 0, 0, 0, 0); blk256_cp(&rr, x); double div = blk128_as_double(d); double z, xd; while (blk256_cmp_blk128(&rr, d) > -1 || blk256_sign_bit(&rr)) { xd = blk256_as_double(&rr); z = xd / div; double_as_blk256(&t, z); /** * Compute quotient (t) times divisor (d) * * If t*d is larger than the remaing value (rr) then our found * quotient is too large, and we iteratively make it smaller. */ blk256_mul_blk128(&m, &t, d); blk256_add(&qq, &qq, &t); /* Add to divisor found so far */ blk256_sub(&rr, &rr, &m); } /** * Set quotient */ blk256_cp(q, &qq); /** * Set remainder */ blk128_set(r, rr.x[1], rr.x[0]); }