/*
 * Reed-Solomon coding and decoding
 * Phil Karn (karn at ka9q.ampr.org) August 1997
 * 
 * This file is derived from the program "new_rs_erasures.c" by Robert
 * Morelos-Zaragoza (robert at spectra.eng.hawaii.edu) and Hari Thirumoorthy
 * (harit at spectra.eng.hawaii.edu), Aug 1995
 *
 * This version is hard-wired to encode and decode the (255,223) code
 * over GF(256). It contains calls to i386 assembler routines optimized
 * for the Pentium.
 */
#include <stdio.h>
#include "rs32.h"

/* Primitive polynomials - see Lin & Costello, Error Control Coding Appendix A,
 * and  Lee & Messerschmitt, Digital Communication p. 453.
 */

/* 1+x^2+x^3+x^4+x^8 */
int Pp[MM+1] = { 1, 0, 1, 1, 1, 0, 0, 0, 1 };

/* Alpha exponent for the first root of the generator polynomial */
#define B0	1

/* index->polynomial form conversion table */
gf Alpha_to[NN + 1];

/* Polynomial->index form conversion table */
gf Index_of[NN + 1];

/* No legal value in index form represents zero, so
 * we need a special value for this purpose
 */
#define A0	(NN)

/* Generator polynomial g(x)
 * Degree of g(x) = 2*TT
 * has roots @**B0, @**(B0+1), ... ,@^(B0+2*TT-1)
 */
gf Gg[NN - KK + 1];

/* Lookup table for multiplying by the 32 generator polynomial terms,
 * packed 4 terms to each 32-bit word
 */
unsigned long Gtab[8][256];

/* Lookup table for GF multiplication
 * Mtab[i][j] = j * alpha^(B0+i) (note limited range of i)
 */
unsigned char Mtab[NN-KK+1][NN+1];

/* Compute x % NN, where NN is 2**MM - 1,
 * without a slow divide
 */
static inline gf
modnn(int x)
{
	while (x >= NN) {
		x -= NN;
		x = (x >> MM) + (x & NN);
	}
	return x;
}

#define	min(a,b)	((a) < (b) ? (a) : (b))

#define	CLEAR(a,n) {\
	int ci;\
	for(ci=(n)-1;ci >=0;ci--)\
		(a)[ci] = 0;\
	}

#define	COPY(a,b,n) {\
	int ci;\
	for(ci=(n)-1;ci >=0;ci--)\
		(a)[ci] = (b)[ci];\
	}
#define	COPYDOWN(a,b,n) {\
	int ci;\
	for(ci=(n)-1;ci >=0;ci--)\
		(a)[ci] = (b)[ci];\
	}


/* generate GF(2**m) from the irreducible polynomial p(X) in p[0]..p[m]
   lookup tables:  index->polynomial form   alpha_to[] contains j=alpha**i;
                   polynomial form -> index form  index_of[j=alpha**i] = i
   alpha=2 is the primitive element of GF(2**m)
   HARI's COMMENT: (4/13/94) alpha_to[] can be used as follows:
        Let @ represent the primitive element commonly called "alpha" that
   is the root of the primitive polynomial p(x). Then in GF(2^m), for any
   0 <= i <= 2^m-2,
        @^i = a(0) + a(1) @ + a(2) @^2 + ... + a(m-1) @^(m-1)
   where the binary vector (a(0),a(1),a(2),...,a(m-1)) is the representation
   of the integer "alpha_to[i]" with a(0) being the LSB and a(m-1) the MSB. Thus for
   example the polynomial representation of @^5 would be given by the binary
   representation of the integer "alpha_to[5]".
                   Similarily, index_of[] can be used as follows:
        As above, let @ represent the primitive element of GF(2^m) that is
   the root of the primitive polynomial p(x). In order to find the power
   of @ (alpha) that has the polynomial representation
        a(0) + a(1) @ + a(2) @^2 + ... + a(m-1) @^(m-1)
   we consider the integer "i" whose binary representation with a(0) being LSB
   and a(m-1) MSB is (a(0),a(1),...,a(m-1)) and locate the entry
   "index_of[i]". Now, @^index_of[i] is that element whose polynomial 
    representation is (a(0),a(1),a(2),...,a(m-1)).
   NOTE:
        The element alpha_to[2^m-1] = 0 always signifying that the
   representation of "@^infinity" = 0 is (0,0,0,...,0).
        Similarily, the element index_of[0] = A0 always signifying
   that the power of alpha which has the polynomial representation
   (0,0,...,0) is "infinity".
 
*/

static void
generate_gf(void)
{
	register int i, mask;

	mask = 1;
	Alpha_to[MM] = 0;
	for (i = 0; i < MM; i++) {
		Alpha_to[i] = mask;
		Index_of[Alpha_to[i]] = i;
		/* If Pp[i] == 1 then, term @^i occurs in poly-repr of @^MM */
		if (Pp[i] != 0)
			Alpha_to[MM] ^= mask;	/* Bit-wise EXOR operation */
		mask <<= 1;	/* single left-shift */
	}
	Index_of[Alpha_to[MM]] = MM;
	/*
	 * Have obtained poly-repr of @^MM. Poly-repr of @^(i+1) is given by
	 * poly-repr of @^i shifted left one-bit and accounting for any @^MM
	 * term that may occur when poly-repr of @^i is shifted.
	 */
	mask >>= 1;
	for (i = MM + 1; i < NN; i++) {
		if (Alpha_to[i - 1] >= mask)
			Alpha_to[i] = Alpha_to[MM] ^ ((Alpha_to[i - 1] ^ mask) << 1);
		else
			Alpha_to[i] = Alpha_to[i - 1] << 1;
		Index_of[Alpha_to[i]] = i;
	}
	Index_of[0] = A0;
	Alpha_to[NN] = 0;
}


/*
 * Obtain the generator polynomial of the TT-error correcting, length
 * NN=(2**MM -1) Reed Solomon code from the product of (X+@**(B0+i)), i = 0,
 * ... ,(2*TT-1)
 *
 * Examples:
 *
 * If B0 = 1, TT = 1. deg(g(x)) = 2*TT = 2.
 * g(x) = (x+@) (x+@**2)
 *
 * If B0 = 0, TT = 2. deg(g(x)) = 2*TT = 4.
 * g(x) = (x+1) (x+@) (x+@**2) (x+@**3)
 */
static void
gen_poly(void)
{
	register int i, j;

	Gg[0] = Alpha_to[B0];
	Gg[1] = 1;		/* g(x) = (X+@**B0) initially */
	for (i = 2; i <= NN - KK; i++) {
		Gg[i] = 1;
		/*
		 * Below multiply (Gg[0]+Gg[1]*x + ... +Gg[i]x^i) by
		 * (@**(B0+i-1) + x)
		 */
		for (j = i - 1; j > 0; j--)
			if (Gg[j] != 0)
				Gg[j] = Gg[j - 1] ^ Alpha_to[modnn((Index_of[Gg[j]]) + B0 + i - 1)];
			else
				Gg[j] = Gg[j - 1];
		/* Gg[0] can never be zero */
		Gg[0] = Alpha_to[modnn((Index_of[Gg[0]]) + B0 + i - 1)];
	}
	/* convert Gg[] to index form for quicker encoding */
	for (i = 0; i <= NN - KK; i++)
		Gg[i] = Index_of[Gg[i]];
}


/* Generate lookup table for fast encoding
 * Each table entry contains four packed result per 32-bit word
 */
static void
gen_gtab(void)
{
	int i,ii,j,k,val;

	memset(Gtab,0,sizeof(Gtab));
	for(i=1;i<256;i++){
		ii = Index_of[i];
		for(j=0;j<8;j++){
			for(k=3;k>=0;k--){
				val = (Gg[4*j+k] == A0) ? 0 : Alpha_to[modnn(ii+Gg[4*j+k])];
				Gtab[j][i] = (Gtab[j][i] << 8) | val;
			}
		}
	}
}

/* Generate lookup table for fast syndrome computation in the decoder
 * Given a field element x in polynomial form, the table generated here
 * when indexed as Mtab[i][x] gives x * alpha**(B0+i)
 */
static void
gen_mtab(void)
{
	int i,j;

	for(i=0;i<NN-KK;i++){
		Mtab[i][0] = 0;
		for(j=1;j<=NN;j++){
			Mtab[i][j] = Alpha_to[modnn(Index_of[j] + B0 + i)];
		}
	}
}
void
init_rs(void)
{
	generate_gf();
	gen_poly();
	gen_gtab();
	gen_mtab();
}

/* Errors + erasures decoding of (255,223) RS code */
int
rsd32(dtype data[NN], int eras_pos[NN-KK], int no_eras)
{
	int deg_lambda, el, deg_omega;
	int i, j, r;
	gf u,q,tmp,num1,num2,den,discr_r;
	gf lambda[NN-KK + 1], s[NN-KK + 1];	/* Err+Eras Locator poly
						 * and syndrome poly */
	gf b[NN-KK + 1], t[NN-KK + 1], omega[NN-KK + 1];
	gf root[NN-KK], reg[NN-KK + 1];
	int syn_error, count;
	unsigned char st[NN-KK];

/*#define noasm */
#ifdef noasm
	memset(st,0,sizeof(st));
	for(i=254;i>=0;i--)
		for(j=0;j<32;j++)
			st[j] = data[i] ^ Mtab[j][st[j]];
#else
	rssyndrome(data,st);
#endif
	/* Convert syndromes to index form, checking for nonzero condition */
	syn_error = 0;
	for(i=0;i<NN-KK;i++){
		syn_error |= st[i];
		s[i+1] = Index_of[st[i]];
	}
	if (!syn_error) {
		/*
		 * if syndrome is zero, data[] is a codeword and there are no
		 * errors to correct. So return data[] unmodified
		 */
		return 0;
	}
	/* Check for and quickly correct the special case of a single error.
	 * This is indicated by
	 * s[i] / s[i+1] = @^-k for all i and some constant k,
	 *	 (s in polynomial form), or
	 * s[i] - s[i+1] = -k,
	 *	 (s in index form)
	 *
	 * See Clark & Cain, "Error Correction Coding for Digital
	 * Communications", p209-210.
	 */
	if(s[2] != A0 && s[1] != A0){
		tmp = modnn(s[2] - s[1] + NN);	/* s[] in index form */
		for(i=2;i<NN-KK;i++){
			if(s[i+1] == A0 || s[i] == A0
			 || modnn(s[i+1] - s[i] + NN) != tmp)
			break;
		}
		if(i == NN-KK){
			/* single error */
			data[tmp] ^= Alpha_to[modnn(s[1] - tmp + NN)];
			return 1;
		}
	}
	CLEAR(&lambda[1],NN-KK);
	lambda[0] = 1;
	if (no_eras > 0) {
		/* Init lambda to be the erasure locator polynomial */
		lambda[1] = Alpha_to[eras_pos[0]];
		for (i = 1; i < no_eras; i++) {
			u = eras_pos[i];
			for (j = i+1; j > 0; j--) {
				tmp = Index_of[lambda[j - 1]];
				if(tmp != A0)
					lambda[j] ^= Alpha_to[modnn(u + tmp)];
			}
		}
	}
	for(i=0;i<NN-KK+1;i++)
		b[i] = Index_of[lambda[i]];

	/*
	 * Begin Berlekamp-Massey algorithm to determine error+erasure
	 * locator polynomial
	 */
	r = no_eras;
	el = no_eras;
	while (++r <= NN-KK) {	/* r is the step number */
		/* Compute discrepancy at the r-th step in poly-form */
		discr_r = 0;
		for (i = 0; i < r; i++){
			if ((lambda[i] != 0) && (s[r - i] != A0)) {
				discr_r ^= Alpha_to[modnn(Index_of[lambda[i]] + s[r - i])];
			}
		}
		discr_r = Index_of[discr_r];	/* Index form */
		if (discr_r == A0) {
			/* 2 lines below: B(x) <-- x*B(x) */
			COPYDOWN(&b[1],b,NN-KK);
			b[0] = A0;
		} else {
			/* 7 lines below: T(x) <-- lambda(x) - discr_r*x*b(x) */
			t[0] = lambda[0];
			for (i = 0 ; i < NN-KK; i++) {
				if(b[i] != A0)
					t[i+1] = lambda[i+1] ^ Alpha_to[modnn(discr_r + b[i])];
				else
					t[i+1] = lambda[i+1];
			}
			if (2 * el <= r + no_eras - 1) {
				el = r + no_eras - el;
				/*
				 * 2 lines below: B(x) <-- inv(discr_r) *
				 * lambda(x)
				 */
				for (i = 0; i <= NN-KK; i++)
					b[i] = (lambda[i] == 0) ? A0 : modnn(Index_of[lambda[i]] - discr_r + NN);
			} else {
				/* 2 lines below: B(x) <-- x*B(x) */
				COPYDOWN(&b[1],b,NN-KK);
				b[0] = A0;
			}
			COPY(lambda,t,NN-KK+1);
		}
	}

	/* Convert lambda to index form and compute deg(lambda(x)) */
	deg_lambda = 0;
	for(i=0;i<NN-KK+1;i++){
		lambda[i] = Index_of[lambda[i]];
		if(lambda[i] != A0)
			deg_lambda = i;
	}
	/*
	 * Find roots of the error+erasure locator polynomial. By Chien
	 * Search
	 */
	COPY(&reg[1],&lambda[1],NN-KK);
	count = 0;		/* Number of roots of lambda(x) */
	for (i = 1; i <= NN; i++) {
		q = 1;
		for (j = deg_lambda; j > 0; j--)
			if (reg[j] != A0) {
				reg[j] = modnn(reg[j] + j);
				q ^= Alpha_to[reg[j]];
			}
		if (!q) {
			/* store root (index-form) and error location number */
			root[count++] = i;
			if(count == deg_lambda)
				/* Found all possible roots, no point in continuing */
				break;
		}
	}

#ifdef DEBUG
	printf("\n Final error positions:\t");
	for (i = 0; i < count; i++)
		printf("%d ", NN - root[i]);
	printf("\n");
#endif
	if (deg_lambda != count) {
		/*
		 * deg(lambda) unequal to number of roots => uncorrectable
		 * error detected
		 */
		return -1;
	}
	/*
	 * Compute err+eras evaluator poly omega(x) = s(x)*lambda(x) (modulo
	 * x**(NN-KK)). in index form. Also find deg(omega).
	 */
	deg_omega = 0;
	for (i = 0; i < NN-KK;i++){
		tmp = 0;
		j = (deg_lambda < i) ? deg_lambda : i;
		for(;j >= 0; j--){
			if ((s[i + 1 - j] != A0) && (lambda[j] != A0))
				tmp ^= Alpha_to[modnn(s[i + 1 - j] + lambda[j])];
		}
		if(tmp != 0)
			deg_omega = i;
		omega[i] = Index_of[tmp];
	}
	omega[NN-KK] = A0;

	/*
	 * Compute error values in poly-form. num1 = omega(inv(X(l))), num2 =
	 * inv(X(l))**(B0-1) and den = lambda_pr(inv(X(l))) all in poly-form
	 */
	for (j = count-1; j >=0; j--) {
		num1 = 0;
		for (i = deg_omega; i >= 0; i--) {
			if (omega[i] != A0)
				num1  ^= Alpha_to[modnn(omega[i] + i * root[j])];
		}
		num2 = Alpha_to[modnn(root[j] * (B0 - 1) + NN)];
		den = 0;

		/* lambda[i+1] for i even is the formal derivative lambda_pr of lambda[i] */
		for (i = min(deg_lambda,NN-KK-1) & ~1; i >= 0; i -=2) {
			if(lambda[i+1] != A0)
				den ^= Alpha_to[modnn(lambda[i+1] + i * root[j])];
		}
		if (den == 0) {
#ifdef DEBUG
			printf("\n ERROR: denominator = 0\n");
#endif
			return -1;
		}
		/* Apply error to data */
		if (num1 != 0) {
			data[NN - root[j]] ^= Alpha_to[modnn(Index_of[num1] + Index_of[num2] + NN - Index_of[den])];
		}
	}
	return count;
}