#include #include "blk128.h" #include "blk256.h" #define blk256_print(msg, blk) \ do { \ printf("%s: %lu,%lu,%lu,%lu\n", msg, (blk).x[3], (blk).x[2], (blk).x[1], (blk).x[0]); \ } while(0) #define blk128_print(msg, blk) \ do { \ printf("%s (%p) %lu, %lu\n", msg, blk, (blk)->hi, (blk)->lo); \ } while(0) #define init_blk128(name) \ do { \ name = (blk128_t *)malloc(sizeof(blk128_t)); \ if (!name) \ return SOLE_MEM_ERROR; \ blk128_zero(name); \ } while(0) #define init_blk256(name) \ do { \ name = (blk256_t *)malloc(sizeof(blk256_t)); \ if (!name) \ return SOLE_MEM_ERROR; \ blk256_zero(name); \ } while(0) /** * Internal Declarations */ void sole_pass1_compose(blk256_t *, blk128_t *, blk128_t *); void sole_pass1_decompose(blk256_t *, blk128_t *, blk256_t *, blk128_t *, blk128_t *); void sole_pass2_regroup(blk256_t *, blk256_t *, blk128_t *, blk128_t *); void sole_pass2_rev_regroup(blk256_t *, blk128_t *, blk256_t *, blk128_t *); void sole_pass1_rev_decompose(blk256_t *, blk256_t *, blk128_t *, blk128_t *); void sole_pass1_rev_compose(blk256_t *, blk256_t *, blk256_t *); int sole_encode_blk256(sole_stream *); int sole_decode_blk256(sole_stream *); int sole_stream_init(sole_stream *strm) { strm->flags = strm->total_in = strm->total_out = 0; init_blk128(strm->i3); init_blk128(strm->b3i); init_blk128(strm->r); init_blk256(strm->q); init_blk128(strm->b1); init_blk128(strm->b2); return SOLE_OK; } int sole_encode_u64(sole_stream *strm, uint64_t x) { return 0; } #define stream_stat(strm) /* #define stream_stat(strm) \ */ /* do { \ */ /* printf("strm: total_in=%lu, total_out=%lu, avail_in=%lu, avail_out=%lu, offset=%u, in=%p, out=%p\n", \ */ /* strm->total_in, strm->total_out, strm->avail_in, strm->avail_out, strm->offset, strm->inp, strm->outp); \ */ /* } while(0) */ /** * Output a 256-bit block */ int sole_put_blk256(sole_stream *strm, blk256_t *b) { if (strm->avail_out < BLK256SIZ) return SOLE_NO_BUF_SPC; blk256_t c; blk256_set(&c, 0, b->x[0], b->x[1], b->x[2], b->x[3]); memcpy(strm->outp, c.x, BLK256SIZ); strm->outp += BLK256SIZ; strm->avail_out -= BLK256SIZ; strm->total_out += BLK256SIZ; stream_stat(strm); return SOLE_OK; } /** * Rarely used */ int sole_put_blk128(sole_stream *strm, blk128_t *b) { if (strm->avail_out < BLK128SIZ) return SOLE_NO_BUF_SPC; stream_stat(strm); memcpy(strm->outp, b, BLK128SIZ); strm->outp += BLK128SIZ; strm->avail_out -= BLK128SIZ; strm->total_out += BLK128SIZ; return SOLE_OK; } #define stream_inp_is_aligned(strm) (!strm->offset) #define stream_inp_set_aligned(strm) (strm->offset = 0) /** * In case the given buffer was not aligned to 256 bits, realign on subsequent * invocations of sole_encode(). */ int sole_stream_realign(sole_stream *strm) { /* Here strm->offset > 0. If strm->offset > BLK128SIZ then we have data in strm->b1 and need to complete strm->b2, otherwise we need to first complete strm->b1, then strm->b2. We need to check availability of data here (who knows if we work with extremely small buffers?) */ size_t to_align = BLK256SIZ - strm->offset; /* Data required to * re-align */ if (strm->avail_in < to_align) { /* Not enough data to realign, just consume it and set a new offset */ if (to_align < BLK128SIZ) { /* Have full block in b1, offset large */ memcpy(strm->b2 + (strm->offset - BLK128SIZ), strm->inp, strm->avail_in); strm->offset += strm->avail_in; strm->inp += strm->avail_in; strm->total_in += strm->avail_in; strm->avail_in = 0; return SOLE_OK; } /* Small offset, still need to complete first block */ size_t rem_b1 = BLK128SIZ - strm->offset; if (rem_b1 > strm->avail_in) { /* Not even enough to complete first block */ memcpy(strm->b1 + strm->offset, strm->inp, strm->avail_in); strm->offset += strm->avail_in; strm->inp += strm->avail_in; strm->total_in += strm->avail_in; strm->avail_in = 0; return SOLE_OK; } /* Complete first block */ memcpy(strm->b1 + strm->offset, strm->inp, rem_b1); strm->offset += rem_b1; strm->inp += rem_b1; strm->total_in += rem_b1; strm->avail_in -= rem_b1; if (strm->avail_in > 0) { memcpy(strm->b2, strm->inp, strm->avail_in); strm->offset += strm->avail_in; strm->inp += strm->avail_in; strm->total_in += strm->avail_in; strm->avail_in = 0; } return SOLE_OK; } /* There is definitely enough data */ if (to_align < BLK128SIZ) { strm->offset -= BLK128SIZ; memcpy(strm->b2 + strm->offset, strm->inp, to_align); strm->inp += to_align; } else { /* Need to complete blocks b1 and b2 */ size_t to_read = to_align - BLK128SIZ; memcpy(strm->b1 + strm->offset, strm->inp, to_read); strm->inp += to_read; memcpy(strm->b2, strm->inp, BLK128SIZ); strm->inp += BLK128SIZ; } strm->avail_in -= to_align; strm->total_in += to_align; strm->offset = 0; return SOLE_OK; /* return sole_encode_blk256(strm); */ } /** * Encode the available bytes * * This should always consume all available input, to allow the user to specify * the next input buffer (if present). */ int sole_encode(sole_stream *strm) { int ret; stream_stat(strm); if (!strm->avail_in) return SOLE_OK; /* TODO Check avail_out > avail_in. We can actually compute how much * bigger this value should be */ if (strm->avail_in > strm->avail_out) return SOLE_NO_BUF_SPC; if (!stream_inp_is_aligned(strm)) { ret = sole_stream_realign(strm); if (ret != SOLE_OK) return ret; /* If aligned, then we have a completed double-block */ if (stream_inp_is_aligned(strm)) sole_encode_blk256(strm); } /* Main body - Read into 128-bit blocks */ for (; strm->avail_in >= BLK256SIZ; strm->avail_in -= BLK256SIZ, strm->total_in += BLK256SIZ) { memcpy(strm->b1, strm->inp, BLK128SIZ); strm->inp += BLK128SIZ; memcpy(strm->b2, strm->inp, BLK128SIZ); strm->inp += BLK128SIZ; ret = sole_encode_blk256(strm); if (ret != SOLE_OK) return ret; } /* Finish reading the remaining bytes */ if (strm->avail_in >= BLK128SIZ) { strm->offset = strm->avail_in; memcpy(strm->b1, strm->inp, BLK128SIZ); strm->inp += BLK128SIZ; strm->avail_in -= BLK128SIZ; strm->total_in += BLK128SIZ; if (strm->avail_in > 0) { blk128_zero(strm->b2); memcpy(strm->b2, strm->inp, strm->avail_in); strm->total_in += strm->avail_in; strm->avail_in = 0; } } else if (strm->avail_in > 0) { strm->offset = strm->avail_in; blk128_zero(strm->b1); memcpy(strm->b1, strm->inp, strm->avail_in); strm->total_in += strm->avail_in; strm->avail_in = 0; } return SOLE_OK; } int sole_encode_blk256(sole_stream *strm) { blk256_t db; /* Pass 1 */ sole_pass1_compose(&db, strm->b1, strm->b2); sole_pass1_decompose(&db, strm->r, &db, strm->b3i, strm->i3); if (blk128_is_zero(strm->i3)) { sole_put_blk128(strm, strm->r); blk256_cp(strm->q, &db); } else { /* Pass 2 */ blk256_t res; sole_pass2_regroup(&res, strm->q, strm->r, strm->i3); sole_put_blk256(strm, &res); blk256_cp(strm->q, &db); } /* Adjust counters */ blk128_inc3(strm->i3); blk128_dec3(strm->b3i); return SOLE_OK; } /** * Instruct the given stream that it is finished. This takes care of finalising * the output and encoding EOF. */ void sole_encode_end(sole_stream *strm) { stream_stat(strm); /* Writes the last two-three blocks */ blk256_t db, res; int have_even_blocks = strm->offset > BLK128SIZ; if (have_even_blocks) { sole_encode_blk256(strm); blk256_set(&db, 1, 0, 2, 0, 0); /* Composed double EOF */ sole_pass1_decompose(&db, strm->r, &db, strm->b3i, strm->i3); } else { blk256_t eof; blk256_set(&db, 0, strm->b1->hi, strm->b1->lo, strm->b1->hi, strm->b1->lo); blk256_set(&eof, 0, 0, 1, 0, 0); /* Custom compose */ blk256_add(&db, &db, &eof); sole_pass1_decompose(&db, strm->r, &db, strm->b3i, strm->i3); } sole_pass2_regroup(&res, strm->q, strm->r, strm->i3); sole_put_blk256(strm, &res); /* Cheeky implementation of final Pass 2 */ sole_put_blk256(strm, &db); } /** * Drop and release all resources associated with the given stream. This does * not free() the stream itself. */ void sole_stream_free(sole_stream *strm) { if (!strm) return; free(strm->i3); free(strm->b3i); free(strm->r); free(strm->q); free(strm->b1); free(strm->b2); } /** * Compose two 128-bit numbers */ void sole_pass1_compose(blk256_t *rop, blk128_t *x, blk128_t *y) { blk256_t r; blk256_set(&r, 0, x->hi, x->lo, y->hi, y->lo); blk256_add_blk128(rop, &r, x); } /** * Decompose a 256-bit block by (B-3i) * */ void sole_pass1_decompose(blk256_t *q, blk128_t *rem, blk256_t *op, blk128_t *b3i, blk128_t *i3) { /** * Special case: When i3 == 0, it means i == 0, and our decomposition is * simply dividing by B (shifting b bits down) */ if (blk128_is_zero(i3)) { blk128_set(rem, op->x[1], op->x[0]); blk256_set(q, 0, 0, op->mx, op->x[3], op->x[2]); /* op >> b */ return; } blk256_div_blk128(q, rem, op, b3i); /* blk256_print("Pass1 (decomp) q=", *q); */ /* blk128_print("Pass1 (decomp) r=", rem); */ } /** * In pass 2 we first compose a 256-bit and 128-bit number then extract the relevant bits * * say x is a 256-bit number (a quotient from a div) * y is a 128-bit number (a remainder) * * We do y(B+3i) + x * * Compose a new 256-bit number: * * w = yB + 3i*y + x */ void sole_pass2_regroup(blk256_t *rop, blk256_t *x, blk128_t *y, blk128_t *i3) { blk256_t w, m; blk256_set(&w, 0, y->hi, y->lo, 0, 0); /* y << B */ blk128_mul(&m, y, i3); blk256_add(&w, &w, &m); blk256_add(rop, x, &w); /* blk256_print("Pass2 ", *rop); */ } #define is_eof(blk) \ (!(blk).mx && !(blk).x[3] && (blk).x[2] == 1 && !(blk).x[1] && !(blk).x[0]) int sole_decode_blk256(sole_stream *strm) { blk128_t r_next; blk256_t dblk, x1, x2; int ret; blk256_set(&dblk, 0, strm->b1->hi, strm->b1->lo, strm->b2->hi, strm->b2->lo); /* Reverse Pass 2 */ sole_pass2_rev_regroup(strm->q, &r_next, &dblk, strm->i3); /* Reverse Pass 1: Now we have strm->r, strm->q */ sole_pass1_rev_decompose(&dblk, strm->q, strm->r, strm->i3); sole_pass1_rev_compose(&x1, &x2, &dblk); if (is_eof(x2)) { if (is_eof(x1)) return SOLE_DECODE_FINISH; blk128_t x = { x1.x[1], x1.x[0] }; ret = sole_put_blk128(strm, &x); return ret != SOLE_OK ? ret : SOLE_DECODE_FINISH; } x2.x[3] = x1.x[1]; x2.x[2] = x1.x[0]; ret = sole_put_blk256(strm, &x2); if (ret != SOLE_OK) return ret; blk128_cp(strm->r, &r_next); blk128_inc3(strm->i3); blk128_dec3(strm->b3i); return SOLE_OK; } /** * Reverse the encoding scheme. First run sole_pass2_rev_regroup() */ int sole_decode(sole_stream *strm) { int ret; /** * Initial checks */ if (blk128_is_zero(strm->i3)) { memcpy(strm->r, strm->inp, BLK128SIZ); blk128_print("r ", strm->r); strm->inp += BLK128SIZ; strm->avail_in -= BLK128SIZ; strm->total_in += BLK128SIZ; blk128_inc3(strm->i3); blk128_dec3(strm->b3i); } else if (!stream_inp_is_aligned(strm)) { ret = sole_stream_realign(strm); if (ret != SOLE_OK) return ret; if (stream_inp_is_aligned(strm)) sole_decode_blk256(strm); } for (; strm->avail_in >= BLK256SIZ; strm->avail_in -= BLK256SIZ, strm->total_in += BLK256SIZ) { memcpy(strm->b1, strm->inp, BLK128SIZ); strm->inp += BLK128SIZ; blk128_print("b1 ", strm->b1); memcpy(strm->b2, strm->inp, BLK128SIZ); strm->inp += BLK128SIZ; blk128_print("b2 ", strm->b2); ret = sole_decode_blk256(strm); if (ret != SOLE_OK) return ret; stream_stat(strm); } /* Finish reading the remaining bytes */ if (strm->avail_in >= BLK128SIZ) { strm->offset = strm->avail_in; memcpy(strm->b1, strm->inp, BLK128SIZ); strm->inp += BLK128SIZ; strm->avail_in -= BLK128SIZ; strm->total_in += BLK128SIZ; blk128_print("b1 (rem)", strm->b1); if (strm->avail_in > 0) { blk128_zero(strm->b2); memcpy(strm->b2, strm->inp, strm->avail_in); blk128_print("b2 (rem)", strm->b2); strm->total_in += strm->avail_in; strm->avail_in = 0; } } else if (strm->avail_in > 0) { strm->offset = strm->avail_in; blk128_zero(strm->b1); memcpy(strm->b1, strm->inp, strm->avail_in); strm->total_in += strm->avail_in; strm->avail_in = 0; blk128_print("b1 (rem)", strm->b1); } return SOLE_OK; } /** * A specialised multiplication method for sole_pass2_rev_regroup() * * We need to multiply some 256-bit number x by B+3i. There should be some upper limit to the value of x, */ void blk256_mul_blk128_i3(blk256_t *rop, blk256_t *x, blk128_t *i3) { blk256_t a, b; blk256_print("x ", *x); blk256_set(&a, x->x[2] & 1, x->x[1], x->x[0], 0, 0); if (!blk256_is_positive(x)) blk256_set_sign_bit(&a); blk256_mul_blk128(&b, x, i3); blk256_add(rop, &a, &b); } /** * Reverse the operation of sole_pass2_regroup recovering the quotient and * remainder that were merged. * * op = r * (B + 3i) + q * */ void sole_pass2_rev_regroup(blk256_t *q, blk128_t *r, blk256_t *op, blk128_t *i3) { blk256_t bp3i, qq, rr, t; blk256_print("rev", *op); blk256_set(&bp3i, 0, 0, 1, i3->hi, i3->lo); blk256_set(&qq, 0, 0, 0, 0, 0); blk256_cp(&rr, op); double div = blk256_as_double(&bp3i); double z, xd; while (blk256_cmp(&rr, &bp3i) > -1 || blk256_sign_bit(&rr)) { xd = blk256_as_double(&rr); z = xd / div; double_as_blk256(&t, z); blk256_add(&qq, &qq, &t); /* Add to divisor found so far */ /** * Compute quotient (t) times divisor (B+3i) * * If t*d is larger than the remaing value (rr) then our found * quotient is too large, and we iteratively make it smaller. */ blk256_mul_blk128_i3(&t, &t, i3); blk256_sub(&rr, &rr, &t); } /** * Set quotient */ blk256_set(q, 0, 0, rr.x[2], rr.x[1], rr.x[0]); /** * Set remainder */ blk128_set(r, qq.x[1], qq.x[0]); blk256_print("Pass2 (rev)", rr); blk256_print("Pass2 (rev)", qq); } /** * Given q, r and 3*i, compute * * rop = q*(B-3i) + r */ void sole_pass1_rev_decompose(blk256_t *rop, blk256_t *q, blk128_t *r, blk128_t *i3) { blk256_t w; blk256_set(&w, q->x[2] & 1, q->x[1], q->x[0], 0, 0); /* qB + r */ /* Subtract q*3i */ blk256_t m; blk256_mul_blk128(&m, q, i3); blk256_sub(&w, &w, &m); blk256_add_blk128(rop, &w, r); blk256_print("Pass 1 (rev. decomp)", *rop); } /** * Decompose op which is op = x1*(B+1) + x2 */ void sole_pass1_rev_compose(blk256_t *x, blk256_t *y, blk256_t *op) { if (op->mx == 1 && !op->x[3] && op->x[2] == 2 && !op->x[1] && !op->x[0]) { /* Double EOF */ blk256_set(x, 0, 0, 1, 0, 0); blk256_set(y, 0, 0, 1, 0, 0); return; } blk256_t tmp; blk256_set(x, 0, 0, op->mx & 1, op->x[3], op->x[2]); /* Shift down */ blk256_set(&tmp, 0, 0, 0, op->x[1], op->x[0]); int c = blk256_cmp(x, &tmp); /* blk256_print("### x", *x); */ /* blk256_print("### y", tmp); */ /** Idea: let x2 = blk128_t */ /* switch (blk256_cmp(x, &tmp)) { */ switch (c) { case 0: blk256_set(y, 0, 0, 0, 0, 0); break; case -1: /* x < tmp */ blk256_sub(y, &tmp, x); /* blk256_set(y, 0, 0, 0, tmp.x[1] - x->x[1] - (tmp.x[0] < x->x[0]), tmp.x[0] - x->x[0]); */ break; case 1: /* x > tmp */ blk256_dec(x); if (x->x[1] == tmp.x[1] && x->x[0] == tmp.x[0]) { blk256_set(y, 0, 0, 1, 0, 0); break; } blk256_set(y, 0, 0, 0, tmp.x[1] - x->x[1] - (x->x[0] > tmp.x[0]), tmp.x[0] - x->x[0]); break; } blk256_print("Pass 1 (rev. comp)", *x); blk256_print("Pass 1 (rev. comp)", *y); }