//	This source and similar text sequences method apply to GNU General Public License. 
//			Copyright (C) 2001-2013 Jasenko Dzinleski 

//		This program is free software; you can redistribute it
//	and/or modify it under the terms of the GNU General Public License as
//	published by the Free Software Foundation; either version 2 of the
//	License, or (at your option) any later version. 

//	This program is distributed in the hope that it will be useful, but
//	WITHOUT ANY WARRANTY; without even the implied warranty of
//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
//	General Public License for more details. 

//	You should have received a copy of the GNU General Public License along
//	with this program; if not, write to the Free Software Foundation, Inc.,
//	51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

//  Similar Text Sequences 
//	written by Dzinleski Jasenko  March 2014


#include <stdio.h>
#include <string.h>
#include <vector>
#include <iostream>

FILE		*f1;

char		infn[256]="fasta_1.txt";

int 	ba2b[256][256][5];	int ba2bi=0;
int		d[256][256][256][2];

int		bc=0,bitc=0;
int		b8o,b8e;

using std::vector; vector<vector<vector <int> > > h_;
int	h_c1=10000;
int	h_c2=9;
int	h_c3=3;
int	h_i=0;

using std::vector; vector<vector<vector <int> > > ha_;
int	ha_c1=1000;
int	ha_c2=9;
int	ha_c3=3;
int	ha_i=0;

int p_1()
{

int	i,j,k;
int	b1,b2,b3;
int	b1_,b2_,b3_;
int	d1,d2,d3;
int	d1_,d2_,d3_;
int	fbyte;
int	mc=0,unmc=0;

	b8o=0;
	b8o|=1<<0;
	b8o|=1<<2;
	b8o|=1<<4;
	b8o|=1<<6;

	b8e=0;
	b8e|=1<<1;
	b8e|=1<<3;
	b8e|=1<<5;
	b8e|=1<<7;

	for(i=0;i<256;++i){for(j=0;j<256;++j){for(k=0;k<256;++k){d[i][j][k][0]=-1;}}}
	for(i=0;i<256;++i){for(j=0;j<256;++j){ba2b[i][j][0]=-1;}}ba2bi=0;
	//for(i=0;i<256;++i){for(j=0;j<256;++j){ba2b_[i][j][0]=-1;}}ba2bi_=0;
	//for(i=0;i<256;++i){for(j=0;j<256;++j){ba2b__[i][j][0]=-1;}}ba2bi__=0;

	f1=fopen(infn,"rb");
	b1_=getc(f1);
	b2_=getc(f1);
	b3_=getc(f1);

	while(!feof(f1))
	{

		b1=b1_<<1;
		b2=b2_<<1;
		b3=b3_<<1;

		d1=((((b1&b8e)>>1)^(b2&b8o))|((b1&b8e)^((b2&b8o)<<1)));d1=d1<<1;	
		d2=((((b1&b8e)>>1)^(b3&b8o))|((b1&b8e)^((b3&b8o)<<1)));d2=d2<<1;
		d3=((((d1&b8e)>>1)^(d2&b8o))|((d1&b8e)^((d2&b8o)<<1)));

		d1_=((((b2&b8e)>>1)^(b1&b8o))|((b2&b8e)^((b1&b8o)<<1)));d1_=d1_<<1;	
		d2_=((((b2&b8e)>>1)^(b3&b8o))|((b2&b8e)^((b3&b8o)<<1)));d2_=d2_<<1;
		d3_=((((d2_&b8e)>>1)^(d1_&b8o))|((d2_&b8e)^((d1_&b8o)<<1)));

		if(ba2b[(b1_&0xff)][(b3_&0xff)][0]==-1)
		{

			ba2b[(b1_&0xff)][(b3_&0xff)][0]=ba2bi;++ba2bi;
			ba2b[(b1_&0xff)][(b3_&0xff)][1]=b2_;
			ba2b[(b1_&0xff)][(b3_&0xff)][2]=1;
			ba2b[(b1_&0xff)][(b3_&0xff)][3]=d3;
			ba2b[(b1_&0xff)][(b3_&0xff)][4]=d3_;

			d[b1_][b2_][b3_][1]=3;
			
		}else{

			if
			(
			(ba2b[(b1_&0xff)][(b3_&0xff)][3]==d3)
			&&
			(ba2b[(b1_&0xff)][(b3_&0xff)][4]==d3_)
			)
			{

			++mc;
			++ba2b[(b1_&0xff)][(b3_&0xff)][2];

			if(d[b1_][b2_][b3_][0]==-1)
			{
			d[b1_][b2_][b3_][0]=1;
			d[b1_][b2_][b3_][1]=1;
			}else{++d[b1_][b2_][b3_][0];}

			}else{

			if
			(
			(ba2b[(b1_&0xff)][(b3_&0xff)][3]!=d3)
			&&
			(ba2b[(b1_&0xff)][(b3_&0xff)][4]!=d3_)
			)
			{

			++unmc;

			if(d[b1_][b2_][b3_][0]==-1)
			{
			d[b1_][b2_][b3_][0]=1;
			d[b1_][b2_][b3_][1]=3;
			}else{++d[b1_][b2_][b3_][0];}

			}else{

			++mc;

			if(d[b1_][b2_][b3_][0]==-1)
			{
			d[b1_][b2_][b3_][0]=1;
			d[b1_][b2_][b3_][1]=2;
			}else{++d[b1_][b2_][b3_][0];}

			}

			}
		}

		b1_=b2_;
		b2_=b3_;
		b3_=getc(f1);

	}
	fclose(f1);

	//printf("%d\n",mc);
	//printf("%d\n",unmc);
	printf("\t Ratio %e \n\n",((double)unmc/(double)mc));

	return(0);

}

int main(int argc,char *argv[])
{

int	i,j;
int	a,b,e;

int	b1_,b2_,b3_;
int	b12,b22,b32;
int	b13,b23,b33;
int	c=0;

char	s_[2*9];	int s_i=0;
char	s1[9][9];
int	h1[9][3];

char	sa[256];	int	sai=0;



	printf("\n\t mdump 5  Routine\n");
	printf("\t written by Dzinleski Jasenko March , 2014\n");
	printf("\n\n\n");

	p_1();

	h_.resize(h_c1);for(i=0;i<h_c1;++i){h_[i].resize(h_c2);for(j=0;j<h_c2;++j){h_[i][j].resize(h_c3);}}
	ha_.resize(ha_c1);for(i=0;i<ha_c1;++i){ha_[i].resize(ha_c2);for(j=0;j<ha_c2;++j){ha_[i][j].resize(ha_c3);}}

//2

	f1=fopen(infn,"rb");

	b1_=getc(f1);
	b2_=getc(f1);
	b3_=getc(f1);
	//printf("%d",d[b1_][b2_][b3_][1]);
	s_[s_i]=48+d[b1_][b2_][b3_][1];++s_i;
	b12=getc(f1);
	b22=getc(f1);
	b32=getc(f1);
	//printf("%d",d[b12][b22][b32][1]);
	s_[s_i]=48+d[b12][b22][b32][1];++s_i;
	b13=getc(f1);
	b23=getc(f1);
	b33=getc(f1);
	//printf("%d",d[b13][b23][b33][1]);
	s_[s_i]=48+d[b13][b23][b33][1];++s_i;
	//++c;

	b1_=getc(f1);
	b2_=getc(f1);
	b3_=getc(f1);
	//printf("%d",d[b1_][b2_][b3_][1]);
	s_[s_i]=48+d[b1_][b2_][b3_][1];++s_i;
	b12=getc(f1);
	b22=getc(f1);
	b32=getc(f1);
	//printf("%d",d[b12][b22][b32][1]);
	s_[s_i]=48+d[b12][b22][b32][1];++s_i;
	b13=getc(f1);
	b23=getc(f1);
	b33=getc(f1);
	//printf("%d",d[b13][b23][b33][1]);
	s_[s_i]=48+d[b13][b23][b33][1];++s_i;
	//++c;

	b1_=getc(f1);
	b2_=getc(f1);
	b3_=getc(f1);
	//printf("%d",d[b1_][b2_][b3_][1]);
	s_[s_i]=48+d[b1_][b2_][b3_][1];++s_i;
	b12=getc(f1);
	b22=getc(f1);
	b32=getc(f1);
	//printf("%d",d[b12][b22][b32][1]);
	s_[s_i]=48+d[b12][b22][b32][1];++s_i;
	b13=getc(f1);
	b23=getc(f1);
	b33=getc(f1);
	//printf("%d",d[b13][b23][b33][1]);
	s_[s_i]=48+d[b13][b23][b33][1];++s_i;
	//++c;

	while(!feof(f1))
	{

	b1_=getc(f1);if(feof(f1)){break;}
	b2_=getc(f1);if(feof(f1)){break;}
	b3_=getc(f1);if(feof(f1)){break;}
	//printf("%d",d[b1_][b2_][b3_][1]);
	s_[s_i]=48+d[b1_][b2_][b3_][1];++s_i;
	b12=getc(f1);if(feof(f1)){break;}
	b22=getc(f1);if(feof(f1)){break;}
	b32=getc(f1);if(feof(f1)){break;}
	//printf("%d",d[b12][b22][b32][1]);
	s_[s_i]=48+d[b12][b22][b32][1];++s_i;
	b13=getc(f1);if(feof(f1)){break;}
	b23=getc(f1);if(feof(f1)){break;}
	b33=getc(f1);if(feof(f1)){break;}
	//printf("%d",d[b13][b23][b33][1]);
	s_[s_i]=48+d[b13][b23][b33][1];++s_i;
	++c;

	if(c==3)
	{
		//printf("\n");
		j=0;for(i=0;i<0+9;++i){s1[0][j]=s_[i];++j;}
		j=0;for(i=1;i<1+9;++i){s1[1][j]=s_[i];++j;}
		j=0;for(i=2;i<2+9;++i){s1[2][j]=s_[i];++j;}
		j=0;for(i=3;i<3+9;++i){s1[3][j]=s_[i];++j;}
		j=0;for(i=4;i<4+9;++i){s1[4][j]=s_[i];++j;}
		j=0;for(i=5;i<5+9;++i){s1[5][j]=s_[i];++j;}
		j=0;for(i=6;i<6+9;++i){s1[6][j]=s_[i];++j;}
		j=0;for(i=7;i<7+9;++i){s1[7][j]=s_[i];++j;}
		j=0;for(i=8;i<8+9;++i){s1[8][j]=s_[i];++j;}

		//for(i=0;i<9;++i){printf("%c%c%c%c%c%c%c%c%c\n",s1[i][0],s1[i][1],s1[i][2],s1[i][3],s1[i][4],s1[i][5],s1[i][6],s1[i][7],s1[i][8]);}

		for(i=0;i<9;++i){h1[i][0]=0;h1[i][1]=0;h1[i][2]=0;}

		for(i=0;i<9;++i)
		{
		++h1[0][-1+(-48+s1[0][i])];
		++h1[1][-1+(-48+s1[1][i])];
		++h1[2][-1+(-48+s1[2][i])];
		++h1[3][-1+(-48+s1[3][i])];
		++h1[4][-1+(-48+s1[4][i])];
		++h1[5][-1+(-48+s1[5][i])];
		++h1[6][-1+(-48+s1[6][i])];
		++h1[7][-1+(-48+s1[7][i])];
		++h1[8][-1+(-48+s1[8][i])];
		}

		//for(i=0;i<9;++i){printf("%d %d %d %d\n",i,h1[i][0],h1[i][1],h1[i][2]);}
		
		if(h_i==0)
		{
		for(i=0;i<9;++i)
		{
		h_[h_i][i][0]=h1[i][0];h_[h_i][i][1]=h1[i][1];h_[h_i][i][2]=h1[i][2];
		}
		++h_i;
		}else{
		for(j=0;j<h_i;++j)
		{
		for(i=0;i<9;++i)
		{
		if((h_[j][i][0]!=h1[i][0])||(h_[j][i][1]!=h1[i][1])||(h_[j][i][2]!=h1[i][2])){break;}
		}
		if(i==9)
		{
		//printf(".");
		for(i=0;i<9;++i)
		{
		ha_[ha_i][i][0]=h1[i][0];ha_[ha_i][i][1]=h1[i][1];ha_[ha_i][i][2]=h1[i][2];
		}
		++ha_i;//printf("ha_i %d %d\n",ha_i,ha_c1);
		if(ha_i>=ha_c1)
		{a=0;while(ha_i>=(a+ha_c1)){a+=32;}e=ha_c1;ha_c1+=a;ha_.resize(ha_c1);for(a=e;a<ha_c1;++a){ha_[a].resize(ha_c2);for(b=0;b<ha_c2;++b){ha_[a][b].resize(ha_c3);}}}
		break;
		}
		}
		if(h_i==j)
		{
		for(i=0;i<9;++i)
		{
		h_[h_i][i][0]=h1[i][0];h_[h_i][i][1]=h1[i][1];h_[h_i][i][2]=h1[i][2];
		}
		++h_i;//printf("h_i %d %d\n",h_i,h_c1);
		if(h_i>=h_c1)
		{a=0;while(h_i>=(a+h_c1)){a+=32;}e=h_c1;h_c1+=a;h_.resize(h_c1);for(a=e;a<h_c1;++a){h_[a].resize(h_c2);for(b=0;b<h_c2;++b){h_[a][b].resize(h_c3);}}}
		}
		}

		s_i=0;
		c=0;
		j=0;for(i=9;i<9+9;++i){s_[j]=s_[i];++j;}s_i=9;

	}
	}
	fclose(f1);

	for(j=0;j<ha_i;++j)
	{
	printf("i %d\n",j);
	for(i=0;i<9;++i){printf("%d %d %d %d\n",i,ha_[j][i][0],ha_[j][i][1],ha_[j][i][2]);}printf("\n");
	}

//2

//3

	s_i=0;c=0;
	f1=fopen(infn,"rb");

	b1_=getc(f1);sa[sai]=b1_;++sai;
	b2_=getc(f1);sa[sai]=b2_;++sai;
	b3_=getc(f1);sa[sai]=b3_;++sai;
	//printf("%d",d[b1_][b2_][b3_][1]);
	s_[s_i]=48+d[b1_][b2_][b3_][1];++s_i;
	b12=getc(f1);sa[sai]=b12;++sai;
	b22=getc(f1);sa[sai]=b22;++sai;
	b32=getc(f1);sa[sai]=b32;++sai;
	//printf("%d",d[b12][b22][b32][1]);
	s_[s_i]=48+d[b12][b22][b32][1];++s_i;
	b13=getc(f1);sa[sai]=b13;++sai;
	b23=getc(f1);sa[sai]=b23;++sai;
	b33=getc(f1);sa[sai]=b33;++sai;
	//printf("%d",d[b13][b23][b33][1]);
	s_[s_i]=48+d[b13][b23][b33][1];++s_i;
	//++c;

	b1_=getc(f1);sa[sai]=b1_;++sai;
	b2_=getc(f1);sa[sai]=b2_;++sai;
	b3_=getc(f1);sa[sai]=b3_;++sai;
	//printf("%d",d[b1_][b2_][b3_][1]);
	s_[s_i]=48+d[b1_][b2_][b3_][1];++s_i;
	b12=getc(f1);sa[sai]=b12;++sai;
	b22=getc(f1);sa[sai]=b22;++sai;
	b32=getc(f1);sa[sai]=b32;++sai;
	//printf("%d",d[b12][b22][b32][1]);
	s_[s_i]=48+d[b12][b22][b32][1];++s_i;
	b13=getc(f1);sa[sai]=b13;++sai;
	b23=getc(f1);sa[sai]=b23;++sai;
	b33=getc(f1);sa[sai]=b33;++sai;
	//printf("%d",d[b13][b23][b33][1]);
	s_[s_i]=48+d[b13][b23][b33][1];++s_i;
	//++c;

	b1_=getc(f1);sa[sai]=b1_;++sai;
	b2_=getc(f1);sa[sai]=b2_;++sai;
	b3_=getc(f1);sa[sai]=b3_;++sai;
	//printf("%d",d[b1_][b2_][b3_][1]);
	s_[s_i]=48+d[b1_][b2_][b3_][1];++s_i;
	b12=getc(f1);sa[sai]=b12;++sai;
	b22=getc(f1);sa[sai]=b22;++sai;
	b32=getc(f1);sa[sai]=b32;++sai;
	//printf("%d",d[b12][b22][b32][1]);
	s_[s_i]=48+d[b12][b22][b32][1];++s_i;
	b13=getc(f1);sa[sai]=b13;++sai;
	b23=getc(f1);sa[sai]=b23;++sai;
	b33=getc(f1);sa[sai]=b33;++sai;
	//printf("%d",d[b13][b23][b33][1]);
	s_[s_i]=48+d[b13][b23][b33][1];++s_i;
	//++c;

	while(!feof(f1))
	{
	b1_=getc(f1);if(feof(f1)){break;}sa[sai]=b1_;++sai;
	b2_=getc(f1);if(feof(f1)){break;}sa[sai]=b2_;++sai;
	b3_=getc(f1);if(feof(f1)){break;}sa[sai]=b3_;++sai;
	//printf("%d",d[b1_][b2_][b3_][1]);
	s_[s_i]=48+d[b1_][b2_][b3_][1];++s_i;
	b12=getc(f1);if(feof(f1)){break;}sa[sai]=b12;++sai;
	b22=getc(f1);if(feof(f1)){break;}sa[sai]=b22;++sai;
	b32=getc(f1);if(feof(f1)){break;}sa[sai]=b32;++sai;
	//printf("%d",d[b12][b22][b32][1]);
	s_[s_i]=48+d[b12][b22][b32][1];++s_i;
	b13=getc(f1);if(feof(f1)){break;}sa[sai]=b13;++sai;
	b23=getc(f1);if(feof(f1)){break;}sa[sai]=b23;++sai;
	b33=getc(f1);if(feof(f1)){break;}sa[sai]=b33;++sai;
	//printf("%d",d[b13][b23][b33][1]);
	s_[s_i]=48+d[b13][b23][b33][1];++s_i;
	++c;

	if(c==3)
	{
		//printf("\n");
		j=0;for(i=0;i<0+9;++i){s1[0][j]=s_[i];++j;}
		j=0;for(i=1;i<1+9;++i){s1[1][j]=s_[i];++j;}
		j=0;for(i=2;i<2+9;++i){s1[2][j]=s_[i];++j;}
		j=0;for(i=3;i<3+9;++i){s1[3][j]=s_[i];++j;}
		j=0;for(i=4;i<4+9;++i){s1[4][j]=s_[i];++j;}
		j=0;for(i=5;i<5+9;++i){s1[5][j]=s_[i];++j;}
		j=0;for(i=6;i<6+9;++i){s1[6][j]=s_[i];++j;}
		j=0;for(i=7;i<7+9;++i){s1[7][j]=s_[i];++j;}
		j=0;for(i=8;i<8+9;++i){s1[8][j]=s_[i];++j;}

		//for(i=0;i<9;++i){printf("%c%c%c%c%c%c%c%c%c\n",s1[i][0],s1[i][1],s1[i][2],s1[i][3],s1[i][4],s1[i][5],s1[i][6],s1[i][7],s1[i][8]);}

		for(i=0;i<9;++i){h1[i][0]=0;h1[i][1]=0;h1[i][2]=0;}

		for(i=0;i<9;++i)
		{
		++h1[0][-1+(-48+s1[0][i])];
		++h1[1][-1+(-48+s1[1][i])];
		++h1[2][-1+(-48+s1[2][i])];
		++h1[3][-1+(-48+s1[3][i])];
		++h1[4][-1+(-48+s1[4][i])];
		++h1[5][-1+(-48+s1[5][i])];
		++h1[6][-1+(-48+s1[6][i])];
		++h1[7][-1+(-48+s1[7][i])];
		++h1[8][-1+(-48+s1[8][i])];
		}

		//for(i=0;i<9;++i){printf("%d %d %d %d\n",i,h1[i][0],h1[i][1],h1[i][2]);}
		for(j=0;j<ha_i;++j)
		{
		for(i=0;i<9;++i)
		{
		if((ha_[j][i][0]!=h1[i][0])||(ha_[j][i][1]!=h1[i][1])||(ha_[j][i][2]!=h1[i][2])){break;}
		}
		if(i==9)
		{
		printf("%d\t",j);
		for(i=0;i<sai;++i){if(sa[i]!=13&&sa[i]!=10){printf("%c",sa[i]);}}printf("\t%d\n",j);
		break;
		}
		}
		
		s_i=0;
		c=0;
		sai=0;
		j=0;for(i=9;i<9+9;++i){s_[j]=s_[i];++j;}s_i=9;

	}
	}
	fclose(f1);

//3

	return(0);

}