#include <stdlib.h>
#include <stdint.h>
#include <stdio.h>

#define u64(x) ((uint64_t)x)

typedef struct __blk128_t {
	uint64_t hi;
	uint64_t lo;
} blk128_t;

typedef struct __blk256_t {
	char     mx;
	uint64_t x[4];
} blk256_t;

/*
 * Useful macros
 */
#define blk256_print(t) \
	do {\
		printf("c: %d, {%lu, %lu, %lu, %lu}\n", t.mx, t.x[3], t.x[2], t.x[1], t.x[0]);\
	} while(0);

#define blk256() {0, {0, 0, 0, 0}}
#define blk256_set(t, max, x3, x2, x1, x0) \
	do {				\
		(t)->mx   = max;\
		(t)->x[3] = x3;\
		(t)->x[2] = x2;\
		(t)->x[1] = x1;\
		(t)->x[0] = x0;\
	} while(0)


#define blk128_cmp(x, y) (((x).hi > (y).hi) ? 1 : ((x).hi < (y).hi) ? -1 : ((x).lo > (y).lo ? 1 : (((x).lo < (y).lo) ? -1 : 0)))

/* Compare to blk256 for equality. Returns -1, 0 or 1 if x is smaller than,
   equal to or larger than y (respectively) */
int blk256_cmp(blk256_t *x, blk256_t *y)
{
	if (x->mx != y->mx)
		return x->mx < y->mx ? -1 : 1;

	/* Compare the limbs from most significant to least */
	int i;
	for (i = 3; i > 0; i--)
		if (x->x[i] != y->x[i])
			return x->x[i] < y->x[i] ? -1 : 1;

	return 0;
}

/* Add a 128-bit number (as two 64-bit numbers) to rop. NOTE In the case where
   we require to use the 257th bit, we simply set it to zero (instead of
   incrementing it. */
void blk256_add(blk256_t *rop, uint64_t x1, uint64_t x0)
{
	int i = 1;
	rop->x[i] += x1;
	if (rop->x[i] >= x1)
		goto add_x0;

	/* Propagate carry */
	rop->x[++i]++; // 2
	if (rop->x[i])
		goto add_x0;

	rop->x[++i]++; // 3
	if (!rop->x[i])
		rop->mx = 1;

add_x0:
	i = 0;
	rop->x[i] += x0;
	if (rop->x[i] >= x1)
		return;

	/* Propagate carry */
	rop->x[++i]++; // 1
	if (rop->x[i])
		return;

	rop->x[++i]++; // 2
	if (rop->x[i])
		return;

	rop->x[++i]++; // 3
	if (!rop->x[i])
		rop->mx = 1;
}

/* Subtraction: Subtract op2 from op1 under the assumption that the subtraction
   will yield a positive number (otherwise it is not valid) */
void blk256_sub(blk256_t *rop, blk256_t *op1, blk256_t *op2)
{
	/* Copy op1 into rop */
	blk256_set(rop, op1->mx, op1->x[3], op1->x[2], op1->x[1], op1->x[0]);

	/* deref */
	uint64_t *opx = rop->x;
	uint64_t *opy = op2->x;

	opx[0] = opx[0] - opy[0];
	if (opx[0] <= opy[0])
		goto sub1;

	if (opx[1]--)
		goto sub1;
	if (opx[2]--)
		goto sub1;
	if (opx[3]--)
		op1->mx = 0;

sub1:
	opx[1] = opx[1] - opy[1];
	if (opx[1] <= opy[1])
		goto sub2;

	if (opx[2]--)
		goto sub2;
	if (opx[3]--)
		op1->mx = 0;

sub2:
	opx[2] = opx[2] - opy[2];
	if (opx[2] <= opy[2])
		goto sub3;

	if (opx[3]--)
		op1->mx = 0;

sub3:
	opx[3] = opx[3] - opy[3];
	if (opx[3] > opy[3])
		op1->mx = 0;
	else
		op1->mx = op1->mx - op2->mx;
}

void blk256_sub_blk128(blk256_t *rop, blk256_t *op1, blk128_t *op2)
{
	/* Copy op1 into rop */
	blk256_set(rop, op1->mx, op1->x[3], op1->x[2], op1->x[1], op1->x[0]);

	/* deref */
	uint64_t *opx = rop->x;

	opx[0] -= op2->lo;
	if (opx[0] <= op2->lo)
		goto sub1;

	if (opx[1]--)
		goto sub1;
	if (opx[2]--)
		goto sub1;
	if (opx[3]--)
		op1->mx = 0;

sub1:
	opx[0] -= op2->hi;
	if (opx[1] <= op2->hi)
		return;

	if (opx[2]--)
		return;
	if (opx[3]--)
		op1->mx = 0;
}

/*
 * Compose two 128 bit blocks (as 2*64 bit numbers) as one big number
 *
 * TODO Rewrite as two blk128_t's
 */
void blk256_compose(blk256_t *rop, uint64_t x1, uint64_t x0, uint64_t y1, uint64_t y0)
{
	rop->x[3] = x1;
	rop->x[2] = x0;
	rop->x[1] = y1;
	rop->x[0] = y0;
	blk256_add(rop, x1, x0);
}


void blk128_inc(blk128_t *i)
{
	if (++i->lo)
		++i->hi;
}

void blk128_sub(blk128_t *rop, blk128_t *x, blk128_t *y)
{
	rop->lo = x->lo - y->lo;
	rop->hi = x->hi - y->hi;
	if (rop->lo > y->lo)
		rop->hi--;
}

/*
 * Decompose a large block by a division by (B - 3i). In our case we know
 *
 * 	B = b^128,
 *
 * and we will use the fact the _i_ is generally small. Both the quotient and
 * the remainder are output by this function, the quotient in the first argument
 * (which is also the input operand) and the remainder in the 'rem' argument.
 *
 * Algorithm:
 *  - set up q = rop / B (easy)
 *  - compute prelim rem = rop - q
 *  - while rem >= B - 3i
 *      rem = rem - (B - 3i)
 *      q = q+1
 *
 * Definitely in TODO
 *  - Keep track of current 3i and B - 3i
 */
//void blk256_decompose(blk256_t *rop, blk256_t *rem, blk128_t *i3)
//{
//	/* Special case when i == 0 */
//	if (i3->hi == 0 && i3->lo == 0) {
//		blk256_set(rem, 0, 0, 0, rop->x[1], rop->x[0]);
//		blk256_set(rop, 0, 0, rop->mx, rop->x[3], rop->x[2]);
//		return;
//	}
//
//	blk256_t q   = blk256();
//	blk128_t b   = {0, 0};
//	blk256_t tmp = blk256();
//
//	/* q = rop >> B */
//	blk256_set(&q, 0, 0, rop->mx, rop->x[3], rop->x[2]);
//	blk256_set(&tmp, rop->mx, rop->x[3], rop->x[2], 0, 0);
//
//	/* Compute rem = rop - q(B-3i) */
//	blk256_sub(rem, rop, &tmp); /* rem = rop - qB */
//	blk128_t i = {0, 0};
//
//	blk128_sub(&b, &b, i3); /* B - 3i */
//
//	/* Do rem = rem - q(3i) */
//	for (; blk128_cmp(i, *i3); blk128_inc(i))
//		blk256_sub_128(rem, rem, &q);
//
//	while (blk256_cmp(rem, &b) > 0) {
//
//	}
//
//
//	blk256_sub(rem, rop, ...)
//}

int main(int argc, char *argv[])
{
	blk256_t t = blk256();

	blk256_compose(&t, 0, 5, 0, 7);
	blk256_print(t);

	/* Compose the two biggest numbers in [2^128]*/
	blk256_compose(&t, u64(~0), u64(~0), u64(~0), u64(~0));
	blk256_print(t);
	t.mx = 0;

	/* Add  */
	blk256_compose(&t, u64(~0), u64(~0), u64(~0), u64(~0));
	printf("t = ");
	blk256_print(t);

	/* Compare */
	blk256_t x = blk256();
	blk256_t y = blk256();
	x.mx = 1;

	printf("x = ");
	blk256_print(x);

	printf("y = ");
	blk256_print(y);

	printf("cmp(x, y) = %d, cmp(y, x) = %d, cmp(x, x) = %d\n", blk256_cmp(&x,&y)
			, blk256_cmp(&y, &x), blk256_cmp(&x, &x));

	printf("cmp(x, t)=%d\n", blk256_cmp(&x, &t));
	printf("cmp(t, x)=%d\n", blk256_cmp(&t, &x));

	printf("cmp(y, t)=%d\n", blk256_cmp(&y, &t));
	printf("cmp(t, y)=%d\n", blk256_cmp(&t, &y));

	/* Subtraction */
	blk256_sub(&x, &x, &y);
	printf("x - y = ");
	blk256_print(x);

	blk256_sub(&t, &t, &x);
	printf("t - x = ");
	blk256_print(t);

	blk256_set(&t, 0, 0, 0, 1, 0);
	blk256_set(&x, 0, 0, 0, 0, 1);
	blk256_sub(&t, &t, &x);
	printf("t - x = ");
	blk256_print(t);

	blk256_sub(&y, &y, &y); /* 0 - 0 */
	printf("y - y = ");
	blk256_print(y);


	return 0;
}