//	This source and short text variances method apply to GNU General Public License. 
//			Copyright (C) 2003  Jasenko Dzinleski 

//	This source applies to the GNU General Public License as
//	published by the Free Software Foundation 
//	and can not be used, copied, sold, redistributed or 
//	used in any other way but only by written permission by Jasenko Dzinleski . 
//	Copyright (C) from 2003 - 2014 and later by Jasenko Dzinleski 

//	This program is distributed in the hope that it will be useful, but
//	WITHOUT ANY WARRANTY; without even the implied warranty of
//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
//	General Public License for more details. 

//	You should have received a copy of the GNU General Public License along
//	with this program; if not, write to the Free Software Foundation, Inc.,
//	51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

//
//		short text variances
//		written by Dzinleski Jasenko  2003 - March , 2014
//

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>

FILE		*f1,*f2;

char		infn[256]="fasta_.txt";
char		outfn[256]="v_.txt";

int 		ba2b[256][256][256][2];	int ba2bi=0;
int 		bab[256][2][50000];	int babi=0;
int 		bab_[256][50000];	int babi_=0;

int main(int argc,char *argv[])
{

int	a,b,c,d,e;
int	i,j,k,l,m;

int	b1_,b2_,b3_,b4_,b5_;
int	b1,b2,b3,b4;


	for(i=0;i<256;++i){
	for(j=0;j<256;++j){
	for(k=0;k<256;++k){
		ba2b[i][j][k][0]=-1;
	}}}ba2bi=0;

	for(i=0;i<256;++i){
	for(j=0;j<50000;++j){
		bab[i][0][j]=-1;
		bab[i][1][j]=0;
	}}babi=0;

	for(i=0;i<256;++i){
	for(j=0;j<50000;++j){
		bab_[i][j]=0;
	}}babi_=0;

	f1=fopen(infn,"rb");

	b1_=getc(f1);
	b2_=getc(f1);
	b3_=getc(f1);
	b4_=getc(f1);
	b5_=getc(f1);

	while(!feof(f1))
	{

	if(
	ba2b[(b1_&0xff)][(b2_&0xff)][(b3_&0xff)][0]==-1
	)
	{
	ba2b[(b1_&0xff)][(b2_&0xff)][(b3_&0xff)][0]=ba2bi;	
	if(bab[(b4_&0xff)][0][ba2b[(b1_&0xff)][(b2_&0xff)][(b3_&0xff)][0]]==-1)
	{
	bab[(b4_&0xff)][0][ba2b[(b1_&0xff)][(b2_&0xff)][(b3_&0xff)][0]]=babi;
	++bab_[(b5_&0xff)][babi];
	++babi;
	++bab[(b4_&0xff)][1][ba2b[(b1_&0xff)][(b2_&0xff)][(b3_&0xff)][0]];
	}else{++bab[(b4_&0xff)][1][ba2b[(b1_&0xff)][(b2_&0xff)][(b3_&0xff)][0]];++bab_[(b5_&0xff)][bab[(b4_&0xff)][0][ba2b[(b1_&0xff)][(b2_&0xff)][(b3_&0xff)][0]]];}
	++ba2bi;
	ba2b[(b1_&0xff)][(b2_&0xff)][(b3_&0xff)][1]=1;
	}else{
	++ba2b[(b1_&0xff)][(b2_&0xff)][(b3_&0xff)][1];
	if(bab[(b4_&0xff)][0][ba2b[(b1_&0xff)][(b2_&0xff)][(b3_&0xff)][0]]==-1)
	{
	bab[(b4_&0xff)][0][ba2b[(b1_&0xff)][(b2_&0xff)][(b3_&0xff)][0]]=babi;
	++bab_[(b5_&0xff)][babi];
	++babi;
	++bab[(b4_&0xff)][1][ba2b[(b1_&0xff)][(b2_&0xff)][(b3_&0xff)][0]];
	}else{++bab[(b4_&0xff)][1][ba2b[(b1_&0xff)][(b2_&0xff)][(b3_&0xff)][0]];++bab_[(b5_&0xff)][bab[(b4_&0xff)][0][ba2b[(b1_&0xff)][(b2_&0xff)][(b3_&0xff)][0]]];}
	}

	b1_=b2_;
	b2_=b3_;
	b3_=b4_;
	b4_=b5_;
	b5_=getc(f1);

	}
	fclose(f1);

	f2=fopen(outfn,"wb");

	for(i=0;i<256;++i){
	if((char)i==' '||i==13||i==10){continue;}
	for(j=0;j<256;++j){
	if((char)j==' '||j==13||j==10){continue;}
	for(k=0;k<256;++k){
	if((char)k==' '||k==13||k==10){continue;}
	if(ba2b[i][j][k][0]!=-1)
	{
	fprintf(f2,"%d\t%c%c%c\t",ba2b[i][j][k][1],i,j,k);
	for(l=0;l<256;++l){
	if((char)l==' '||l==13||l==10){continue;}
	if(bab[l][1][ba2b[i][j][k][0]]>=1){fprintf(f2,"%c\t%d\t",l,bab[l][1][ba2b[i][j][k][0]]);}}fprintf(f2,"\n");
	}}}}

	fprintf(f2,"\n");
	fprintf(f2,"\n");

	for(i=0;i<256;++i){
	if((char)i==' '||i==13||i==10){continue;}
	for(j=0;j<256;++j){
	if((char)j==' '||j==13||j==10){continue;}
	for(k=0;k<256;++k){
	if((char)k==' '||k==13||k==10){continue;}
	if(ba2b[i][j][k][0]!=-1)
	{
	fprintf(f2,"%d\t%c%c%c\t",ba2b[i][j][k][1],i,j,k);
	for(l=0;l<256;++l){
	if((char)l==' '||l==13||l==10){continue;}
	if(bab[l][1][ba2b[i][j][k][0]]>=1)
	{
		fprintf(f2,"%c\t%d\t",l,bab[l][1][ba2b[i][j][k][0]]);
		for(m=0;m<256;++m){
		if((char)m==' '||m==13||m==10){continue;}
		if(bab_[m][bab[l][0][ba2b[i][j][k][0]]]>=1)
		{
		fprintf(f2,"%c%c\t%d\t",l,m,bab_[m][bab[l][0][ba2b[i][j][k][0]]]);
		}
		}
	}
	}
	fprintf(f2,"\n");
	}}}}
	fclose(f2);
	printf("ok\n");
	return(0);

}