//	This source and similar Text Sequences eq method apply to GNU General Public License. 
//			Copyright (C) 2001-2014 Jasenko Dzinleski 

//		This program is free software; you can redistribute it
//	and/or modify it under the terms of the GNU General Public License as
//	published by the Free Software Foundation; either version 2 of the
//	License, or (at your option) any later version. 

//	This program is distributed in the hope that it will be useful, but
//	WITHOUT ANY WARRANTY; without even the implied warranty of
//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
//	General Public License for more details. 

//	You should have received a copy of the GNU General Public License along
//	with this program; if not, write to the Free Software Foundation, Inc.,
//	51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

//  	Text Sequences eq 
//	written by Dzinleski Jasenko  May 2017

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>

FILE	*f1;

char	infn[256]="fasta__.txt";

int	ba24[256][256][256][2];int ba24i;
char	sv[256];

int main(int argc,char *argv[])
{

int	fb;
int	a,b,cc;
int	d,e;
int	i,j,k,l,m,n;

	for(i=0;i<256;++i){for(j=0;j<256;++j){for(k=0;k<256;++k){ba24[i][j][k][0]=-1;ba24[i][j][k][1]=0;}}}ba24i=0;
	cc=0;f1=fopen(infn,"rb");fb=getc(f1);
	while(!feof(f1))
	{
		for(i=0;i<256;++i){sv[i]='\0';}i=0;
		while((fb!=10)&&(fb!=13)){sv[i]=fb;++i;fb=getc(f1);}
		while((fb==10)||(fb==13)){fb=getc(f1);}
		for(i=0;i<strlen(sv)-3;++i)
		{
		++cc;if(ba24[(int)sv[i]][(int)sv[1+i]][(int)sv[1+1+i]][0]==-1)
		{ba24[(int)sv[i]][(int)sv[1+i]][(int)sv[1+1+i]][0]=ba24i;++ba24[(int)sv[i]][(int)sv[1+i]][(int)sv[1+1+i]][1];++ba24i;}else{++ba24[(int)sv[i]][(int)sv[1+i]][(int)sv[1+1+i]][1];}
		}	
	}
	fclose(f1);

	printf("cc count %d\n",cc);
	printf("b24i count %d\n",ba24i);

	double	da[300][4];int dai=0;
	for(i=0;i<256;++i){for(j=0;j<256;++j){for(k=0;k<256;++k){
		//if(ba24[i][j][k][0]!=-1){printf("%c%c%c\t%d\t%e\n",(char)i,(char)j,(char)k,ba24[i][j][k][1],(double)ba24[i][j][k][1]/(double)cc);}
		if(ba24[i][j][k][0]!=-1){da[dai][0]=(double)i;da[dai][1]=(double)j;da[dai][2]=(double)k;da[dai][3]=log((double)1/((double)ba24[i][j][k][1]/(double)cc));++dai;}
	}}}
	double t1,t2,t3,t4;
	for(i=0;i<dai;++i)
	{
		for(j=0;j<dai;++j)
		{
			if(da[i][3]<da[j][3])
			{
			t1=da[j][0];t2=da[j][1];t3=da[j][2];t4=da[j][3];
			da[j][0]=da[i][0];da[j][1]=da[i][1];da[j][2]=da[i][2];da[j][3]=da[i][3];
			da[i][0]=t1;da[i][1]=t2;da[i][2]=t3;da[i][3]=t4;
			}
		}
	}
	for(i=0;i<dai;++i){printf("%c%c%c\t%e\t",(char)(int)da[i][0],(char)(int)da[i][1],(char)(int)da[i][2],da[i][3]);if(i){printf("%e\n",da[i][3]-da[i-1][3]);}else{printf("\n");}}

	return(0);

}