//	This source and Text Sequences 32-bit enthropy quotient method apply to GNU General Public License. 
//			Copyright (C) 2001-2014 Jasenko Dzinleski 

//		This program is free software; you can redistribute it
//	and/or modify it under the terms of the GNU General Public License as
//	published by the Free Software Foundation; either version 2 of the
//	License, or (at your option) any later version. 

//	This program is distributed in the hope that it will be useful, but
//	WITHOUT ANY WARRANTY; without even the implied warranty of
//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
//	General Public License for more details. 

//	You should have received a copy of the GNU General Public License along
//	with this program; if not, write to the Free Software Foundation, Inc.,
//	51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

//  	Text Sequences 32-bit enthropy quotient 
//	written by Dzinleski Jasenko  May 2017

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>

FILE	*f1;

char	infn[64]="fasta__.txt";

int	ba32[64][64][64][64][2];int ba32i;
char	sv[256];

int main(int argc,char *argv[])
{

int	fb;
int	a,b,cc;
int	d,e;
int	i,j,k,l,m,n;

	for(i=0;i<64;++i){for(j=0;j<64;++j){for(k=0;k<64;++k){for(l=0;l<64;++l){ba32[i][j][k][l][0]=-1;ba32[i][j][k][l][1]=0;}}}}ba32i=0;
	cc=0;f1=fopen(infn,"rb");if(f1==NULL){return(0);}fb=getc(f1);
	while(!feof(f1))
	{
		for(i=0;i<256;++i){sv[i]='\0';}i=0;
		while((fb!=10)&&(fb!=13)){sv[i]=fb;++i;fb=getc(f1);}
		while((fb==10)||(fb==13)){fb=getc(f1);}
		for(i=0;i<strlen(sv)-4;++i)
		{++cc;if(ba32[-64+(int)sv[i]][-64+(int)sv[1+i]][-64+(int)sv[1+1+i]][-64+(int)sv[1+1+1+i]][0]==-1)
		{ba32[-64+(int)sv[i]][-64+(int)sv[1+i]][-64+(int)sv[1+1+i]][-64+(int)sv[1+1+1+i]][0]=ba32i;++ba32[-64+(int)sv[i]][-64+(int)sv[1+i]][-64+(int)sv[1+1+i]][-64+(int)sv[1+1+1+i]][1];++ba32i;}else{++ba32[-64+(int)sv[i]][-64+(int)sv[1+i]][-64+(int)sv[1+1+i]][-64+(int)sv[1+1+1+i]][1];}
		}	
	}
	fclose(f1);

	printf("cc count %d\n",cc);
	printf("b32i count %d\n",ba32i);

	double	da[5000][5];int dai=0;
	for(i=0;i<64;++i){for(j=0;j<64;++j){for(k=0;k<64;++k){for(l=0;l<64;++l){
		//if(ba32[i][j][k][l][0]!=-1){printf("%c%c%c%c\t%d\t%e\n",(char)64+i,(char)64+j,(char)64+k,(char)64+l,ba32[i][j][k][l][1],(double)ba32[i][j][k][1]/(double)cc);}
		if(ba32[i][j][k][l][0]!=-1){da[dai][0]=(double)64+i;da[dai][1]=(double)64+j;da[dai][2]=(double)64+k;da[dai][3]=(double)64+l;da[dai][4]=log((double)1/((double)ba32[i][j][k][l][1]/(double)cc));++dai;}
	}}}}
	double t1,t2,t3,t4,t5;
	for(i=0;i<dai;++i)
	{
		for(j=0;j<dai;++j)
		{
			if(da[i][4]<da[j][4])
			{
			t1=da[j][0];t2=da[j][1];t3=da[j][2];t4=da[j][3];t5=da[j][4];
			da[j][0]=da[i][0];da[j][1]=da[i][1];da[j][2]=da[i][2];da[j][3]=da[i][3];da[j][4]=da[i][4];
			da[i][0]=t1;da[i][1]=t2;da[i][2]=t3;da[i][3]=t4;da[i][4]=t5;
			}
		}
	}
	for(i=0;i<dai;++i){printf("%c%c%c%c\t%e\t",(char)(int)da[i][0],(char)(int)da[i][1],(char)(int)da[i][2],(char)(int)da[i][3],da[i][4]);if(i){printf("%e\n",da[i][4]-da[i-1][4]);}else{printf("\n");}}

	return(0);

}