//	This source and Text Sequences 48-bit enthropy quotient method apply to GNU General Public License. 
//			Copyright (C) 2001-2014 Jasenko Dzinleski 

//		This program is free software; you can redistribute it
//	and/or modify it under the terms of the GNU General Public License as
//	published by the Free Software Foundation; either version 2 of the
//	License, or (at your option) any later version. 

//	This program is distributed in the hope that it will be useful, but
//	WITHOUT ANY WARRANTY; without even the implied warranty of
//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
//	General Public License for more details. 

//	You should have received a copy of the GNU General Public License along
//	with this program; if not, write to the Free Software Foundation, Inc.,
//	51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

//  	Text Sequences 48-bit enthropy quotient 
//	written by Dzinleski Jasenko  May 2017

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>

FILE	*f1;

char	infn[64]="fasta__.txt";

int	ba48[5][5][5][5][5][5][2];int ba48i;
char	sv[256];

int main(int argc,char *argv[])
{

int	fb;
int	a,b,cc;
int	d,e;
int	i,j,k,l,m,n;

	for(i=0;i<5;++i){for(j=0;j<5;++j){for(k=0;k<5;++k){for(l=0;l<5;++l){for(m=0;m<5;++m){for(n=0;n<5;++n){
		ba48[i][j][k][l][m][n][0]=-1;ba48[i][j][k][l][m][n][1]=0;
	}}}}}}ba48i=0;
	int i1,i2,i3,i4,i5,i6;
	cc=0;f1=fopen(infn,"rb");if(f1==NULL){return(0);}fb=getc(f1);
	while(!feof(f1))
	{
		for(i=0;i<256;++i){sv[i]='\0';}i=0;
		while((fb!=10)&&(fb!=13)){sv[i]=fb;++i;fb=getc(f1);}
		while((fb==10)||(fb==13)){fb=getc(f1);}
		for(i=0;i<strlen(sv)-4;++i)
		{
		++cc;
		if(sv[i]=='A'){i1=0;}else{
		if(sv[i]=='C'){i1=1;}else{if(sv[i]=='G'){i1=2;}else{if(sv[i]=='T'){i1=3;}else{i1=4;}}}}
		if(sv[i+1]=='A'){i2=0;}else{
		if(sv[i+1]=='C'){i2=1;}else{if(sv[i+1]=='G'){i2=2;}else{if(sv[i+1]=='T'){i2=3;}else{i2=4;}}}}
		if(sv[i+1+1]=='A'){i3=0;}else{
		if(sv[i+1+1]=='C'){i3=1;}else{if(sv[i+1+1]=='G'){i3=2;}else{if(sv[i+1+1]=='T'){i3=3;}else{i3=4;}}}}
		if(sv[i+1+1+1]=='A'){i4=0;}else{
		if(sv[i+1+1+1]=='C'){i4=1;}else{if(sv[i+1+1+1]=='G'){i4=2;}else{if(sv[i+1+1+1]=='T'){i4=3;}else{i4=4;}}}}
		if(sv[i+1+1+1+1]=='A'){i5=0;}else{
		if(sv[i+1+1+1+1]=='C'){i5=1;}else{if(sv[i+1+1+1+1]=='G'){i5=2;}else{if(sv[i+1+1+1+1]=='T'){i5=3;}else{i5=4;}}}}
		if(sv[i+1+1+1+1+1]=='A'){i6=0;}else{
		if(sv[i+1+1+1+1+1]=='C'){i6=1;}else{if(sv[i+1+1+1+1+1]=='G'){i6=2;}else{if(sv[i+1+1+1+1+1]=='T'){i6=3;}else{i6=4;}}}}
		if(ba48[i1][i2][i3][i4][i5][i6][0]==-1){ba48[i1][i2][i3][i4][i5][i6][0]=ba48i;++ba48[i1][i2][i3][i4][i5][i6][1];++ba48i;}else{
			++ba48[i1][i2][i3][i4][i5][i6][1];}	
		}
	}
	fclose(f1);

	printf("cc count %d\n",cc);
	printf("b48i count %d\n",ba48i);

	double	da[50000][7];int dai=0;
	for(i=0;i<5;++i){for(j=0;j<5;++j){for(k=0;k<5;++k){for(l=0;l<5;++l){for(m=0;m<5;++m){for(n=0;n<5;++n){
		if(ba48[i][j][k][l][m][n][0]!=-1)
		{
			da[dai][0]=(double)i;da[dai][1]=(double)j;
			da[dai][2]=(double)k;da[dai][3]=(double)l;
			da[dai][4]=(double)m;da[dai][5]=(double)n;
			da[dai][6]=log((double)1/((double)ba48[i][j][k][l][m][n][1]/(double)cc));++dai;
		}
	}}}}}}
	double t1,t2,t3,t4,t5,t6,t7;
	for(i=0;i<dai;++i)
	{
		for(j=0;j<dai;++j)
		{
			if(da[i][6]<da[j][6])
			{
			t1=da[j][0];t2=da[j][1];
			t3=da[j][2];t4=da[j][3];
			t5=da[j][4];t6=da[j][5];t7=da[j][6];
			da[j][0]=da[i][0];da[j][1]=da[i][1];
			da[j][2]=da[i][2];da[j][3]=da[i][3];
			da[j][4]=da[i][4];da[j][5]=da[i][5];da[j][6]=da[i][6];
			da[i][0]=t1;da[i][1]=t2;
			da[i][2]=t3;da[i][3]=t4;
			da[i][4]=t5;da[i][5]=t6;da[i][6]=t7;
			}
		}
	}
	for(i=0;i<dai;++i)
	{
		if(da[i][0]==0){i1='A';}else{if(da[i][0]==1){i1='C';}else{if(da[i][0]==2){i1='G';}else{if(da[i][0]==3){i1='T';}else{i1='_';}}}}
		if(da[i][1]==0){i2='A';}else{if(da[i][1]==1){i2='C';}else{if(da[i][1]==2){i2='G';}else{if(da[i][1]==3){i2='T';}else{i2='_';}}}}
		if(da[i][2]==0){i3='A';}else{if(da[i][2]==1){i3='C';}else{if(da[i][2]==2){i3='G';}else{if(da[i][2]==3){i3='T';}else{i3='_';}}}}
		if(da[i][3]==0){i4='A';}else{if(da[i][3]==1){i4='C';}else{if(da[i][3]==2){i4='G';}else{if(da[i][3]==3){i4='T';}else{i4='_';}}}}
		if(da[i][4]==0){i5='A';}else{if(da[i][4]==1){i5='C';}else{if(da[i][4]==2){i5='G';}else{if(da[i][4]==3){i5='T';}else{i5='_';}}}}
		if(da[i][5]==0){i6='A';}else{if(da[i][5]==1){i6='C';}else{if(da[i][5]==2){i6='G';}else{if(da[i][5]==3){i6='T';}else{i6='_';}}}}
		printf("%c%c%c%c%c%c\t%e\t",(char)i1,(char)i2,(char)i3,(char)i4,(char)i5,(char)i6,da[i][6]);
		if(i){printf("%e\n",da[i][6]-da[i-1][6]);}else{printf("\n");}
	}

	return(0);

}