//	This source and similar text sequences method apply to GNU General Public License. 
//			Copyright (C) 2001-2013 Jasenko Dzinleski 

//		This program is free software; you can redistribute it
//	and/or modify it under the terms of the GNU General Public License as
//	published by the Free Software Foundation; either version 2 of the
//	License, or (at your option) any later version. 

//	This program is distributed in the hope that it will be useful, but
//	WITHOUT ANY WARRANTY; without even the implied warranty of
//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
//	General Public License for more details. 

//	You should have received a copy of the GNU General Public License along
//	with this program; if not, write to the Free Software Foundation, Inc.,
//	51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

//  Similar Text Sequences 
//	written by Dzinleski Jasenko  February 2013

#include 	<stdio.h>
#include 	<string.h>
#include 	<math.h>

char		infn[256]  = "fasta_.txt";
//char		infn[256]  = "War_and_Peace_NT.txt";
//char		infn[256]  = "out.wav";
//char		infn[256]  = "mdr.pdf";
//char		infn[256]  = "bit_parity_compression_73_111.zip";

char		outfn[256] = "out.txt";
char		out1fn[256] = "1.txt";
char		out2fn[256] = "2.txt";
char		out3fn[256] = "3.txt";
char		out4fn[256] = "4.txt";

FILE		*f1,*f2,*f3;

int			b8o,b8e;
int			b16o,b16e;

int			fb_[256];			int	fb_i=0;
long 		b_[256][3];			int	b_i=0;
long 		da_[10000][2];		int	da_i=0;

bool		df=false;
long		pos,ppos;

int			lc=0;

int	m_c(long spos)
{

	int		fbyte,fbyte_;
	int		fbyte1,fbyte2,fbyte3;

	int		i,j,k,l,m,n;
	int		a,b,c,d,e;
	int		k_h,k_l;

	long	bp;
	
	f3=fopen(out2fn,"wb");		
	f2=fopen(outfn,"wb");
	f1=fopen(infn,"rb");if(f1==NULL){return(0);}
	fseek(f1,spos,SEEK_SET);	
		
	while(!feof(f1))
	{
		b=0;
		while(b<4)
		{
			fbyte1=getc(f1);
			i=0;while(fbyte1==13||fbyte1==10){fbyte1=getc(f1);++i;}if(i!=0){++lc;}
			fb_[fb_i]=fbyte1;++fb_i;
			
			fbyte2=getc(f1);
			i=0;while(fbyte2==13||fbyte2==10){fbyte2=getc(f1);++i;}if(i!=0){++lc;}			
			fb_[fb_i]=fbyte2;++fb_i;
		
			fbyte3=getc(f1);
			i=0;while(fbyte3==13||fbyte3==10){fbyte3=getc(f1);++i;}if(i!=0){++lc;}			
			fb_[fb_i]=fbyte3;++fb_i;
			
			fbyte1=fbyte1<<1;
			fbyte2=fbyte2<<1;
			fbyte3=fbyte3<<1;
		
			c=((((fbyte1&b8e)>>1)^(fbyte2&b8o))|((fbyte1&b8e)^((fbyte2&b8o)<<1)));		
			d=((((fbyte1&b8e)>>1)^(fbyte3&b8o))|((fbyte1&b8e)^((fbyte3&b8o)<<1)));		
			e=((((fbyte2&b8e)>>1)^(fbyte3&b8o))|((fbyte2&b8e)^((fbyte3&b8o)<<1)));				
			
			b_[b_i][0]=c;
			b_[b_i][1]=d;
			b_[b_i][2]=e;
			++b_i;	

			++b;
		}
	
		//printf(" %d ",b_i);
				
		if(b_i>=(4*3)*4)
		{
			l=0;
			for(i=0;i<b_i;i+=4)
			{
				
				if(df){printf("\t%d\t",lc);}
				fprintf(f2,"%c%d%c",(char)9,lc,(char)9);
				
				for(j=l;j<l+(3*4);j+=3)
				{
					if(df){printf("%c%c%c",fb_[j],fb_[j+1],fb_[j+2]);}
					fprintf(f2,"%c%c%c",fb_[j],fb_[j+1],fb_[j+2]);
					fprintf(f3,"%c%c%c",fb_[j],fb_[j+1],fb_[j+2]);
				}
				l=j;

				if(df){printf("\t");}
				fprintf(f2,"%c",(char)9);fprintf(f3,"%c",(char)9);								
				
				bp=0;
				if(b_[i][0]==b_[i][1]){if(df){printf("1");}fprintf(f2,"1");bp|=1<<11;}else{
				if(df){printf("0");}fprintf(f2,"0");}
				if(b_[i][0]==b_[i][2]){if(df){printf("1");}fprintf(f2,"1");bp|=1<<10;}else{
				if(df){printf("0");}fprintf(f2,"0");}			
				if(b_[i][1]==b_[i][2]){if(df){printf("1");}fprintf(f2,"1");bp|=1<<9; }else{
				if(df){printf("0");}fprintf(f2,"0");}							
				
				if(b_[i+1][0]==b_[i+1][1]){if(df){printf("1");}fprintf(f2,"1");bp|=1<<8;}else{
				if(df){printf("0");}fprintf(f2,"0");}
				if(b_[i+1][0]==b_[i+1][2]){if(df){printf("1");}fprintf(f2,"1");bp|=1<<7;}else{
				if(df){printf("0");}fprintf(f2,"0");}			
				if(b_[i+1][1]==b_[i+1][2]){if(df){printf("1");}fprintf(f2,"1");bp|=1<<6;}else{
				if(df){printf("0");}fprintf(f2,"0");}							
				
				if(b_[i+2][0]==b_[i+2][1]){if(df){printf("1");}fprintf(f2,"1");bp|=1<<5;}else{
				if(df){printf("0");}fprintf(f2,"0");}
				if(b_[i+2][0]==b_[i+2][2]){if(df){printf("1");}fprintf(f2,"1");bp|=1<<4;}else{
				if(df){printf("0");}fprintf(f2,"0");}			
				if(b_[i+2][1]==b_[i+2][2]){if(df){printf("1");}fprintf(f2,"1");bp|=1<<3;}else{
				if(df){printf("0");}fprintf(f2,"0");}															
				
				if(b_[i+3][0]==b_[i+3][1]){if(df){printf("1");}fprintf(f2,"1");bp|=1<<2;}else{
				if(df){printf("0");}fprintf(f2,"0");}
				if(b_[i+3][0]==b_[i+3][2]){if(df){printf("1");}fprintf(f2,"1");bp|=1<<1;}else{
				if(df){printf("0");}fprintf(f2,"0");}			
				if(b_[i+3][1]==b_[i+3][2]){if(df){printf("1");}fprintf(f2,"1");bp|=1<<0;}else{
				if(df){printf("0");}fprintf(f2,"0");}															

				
				if(da_i==0){da_[da_i][0]=bp;da_[da_i][1]=1;if(df){printf("\t%d",da_i);}++da_i;}else{
					for(j=0;j<da_i;++j)
					{
						if(da_[j][0]==bp){++da_[j][1];if(df){printf("\t%d",j);}break;}
					}if(j==da_i){da_[da_i][0]=bp;da_[da_i][1]=1;if(df){printf("\t%d",da_i);}++da_i;}	
				}
				
				fprintf(f3,"%d%c%c",bp,(char)13,(char)10);				
				if(df){printf("\n");}
				fprintf(f2,"%c%c",(char)13,(char)10);				
				
			}
						
			b_i=0;
			for(i=0;i<256;++i){fb_[i]=0;}fb_i=0;			
		}
		
	
	}
	fclose(f1);
	fclose(f2);
	fclose(f3);

	return(0);
	
}

int main(int argc,char *argv[])
{
	
	int		i,j,k,l,m;
			
	b8o=0;
	b8o|=1<<0;
	b8o|=1<<2;
	b8o|=1<<4;
	b8o|=1<<6;

	b8e=0;
	b8e|=1<<1;
	b8e|=1<<3;
	b8e|=1<<5;
	b8e|=1<<7;

	b16o=0;
	b16o|=1<<0;
	b16o|=1<<2;
	b16o|=1<<4;
	b16o|=1<<6;
	b16o|=1<<8;
	b16o|=1<<10;
	b16o|=1<<12;
	b16o|=1<<14;
	
	b16e=0;
	b16e|=1<<1;
	b16e|=1<<3;
	b16e|=1<<5;
	b16e|=1<<7;
	b16e|=1<<9;
	b16e|=1<<11;
	b16e|=1<<13;
	b16e|=1<<15;


	m_c(0);

	f2=fopen(out1fn,"wb");
	for(i=0;i<da_i;++i)
	{
		
		//fprintf(f2,"%d\t%d\n",da_[i][0],da_[i][1]);
		
		if(((da_[i][0]<<0)>>11)&0x1){fprintf(f2,"1");}else{fprintf(f2,"0");}
		if(((da_[i][0]<<1)>>11)&0x1){fprintf(f2,"1");}else{fprintf(f2,"0");}
		if(((da_[i][0]<<2)>>11)&0x1){fprintf(f2,"1");}else{fprintf(f2,"0");}
		if(((da_[i][0]<<3)>>11)&0x1){fprintf(f2,"1");}else{fprintf(f2,"0");}
		if(((da_[i][0]<<4)>>11)&0x1){fprintf(f2,"1");}else{fprintf(f2,"0");}
		if(((da_[i][0]<<5)>>11)&0x1){fprintf(f2,"1");}else{fprintf(f2,"0");}
		if(((da_[i][0]<<6)>>11)&0x1){fprintf(f2,"1");}else{fprintf(f2,"0");}
		if(((da_[i][0]<<7)>>11)&0x1){fprintf(f2,"1");}else{fprintf(f2,"0");}
		if(((da_[i][0]<<8)>>11)&0x1){fprintf(f2,"1");}else{fprintf(f2,"0");}
		if(((da_[i][0]<<9)>>11)&0x1){fprintf(f2,"1");}else{fprintf(f2,"0");}
		if(((da_[i][0]<<10)>>11)&0x1){fprintf(f2,"1");}else{fprintf(f2,"0");}																				
		if(((da_[i][0]<<11)>>11)&0x1){fprintf(f2,"1");}else{fprintf(f2,"0");}																						
		fprintf(f2,"\t");
		fprintf(f2,"%d\t%d\n",da_[i][0],da_[i][1]);
	}
	fclose(f2);


	char	fa[3][10][256];
	int		fl[3][10];
	int		fi[3];
	int		fr=0;
	
	int		fbyte;
	int		fbyte1,fbyte2,fbyte3;
	int		c,d,e;
	long	bp;
	
	df=false;
	b_i=0;da_i=0;
	for(i=0;i<256;++i){fb_[i]=0;}fb_i=0;			
	
	for(l=0;l<3;++l){for(i=0;i<10;++i){fl[l][i]=0;for(j=0;j<156;++j){fa[l][i][j]='\0';}}fi[l]=0;}fr=0;
	
	f1=fopen(out2fn,"rb");	
	f2=fopen(out3fn,"wb");		
	fbyte=getc(f1);
	while(!feof(f1))
	{
		while(fbyte!=13&&fbyte!=10){if(fbyte==9){++fi[fr];}else{fa[fr][fi[fr]][fl[fr][fi[fr]]]=fbyte;++fl[fr][fi[fr]];}fbyte=getc(f1);}
		while(fbyte==10||fbyte==13){fbyte=getc(f1);}
		++fr;
		if(fr==3)
		{
			/*
			printf("1.\t");for(i=0;i<fl[0][1];++i){printf("%c",fa[0][1][i]);}printf("\n");
			printf("2.\t");for(i=0;i<fl[1][1];++i){printf("%c",fa[1][1][i]);}printf("\n");
			printf("3.\t");for(i=0;i<fl[2][1];++i){printf("%c",fa[2][1][i]);}printf("\n");
			*/
			
			fbyte1=0;
			l=0;for(i=-1+fl[0][1];i>=0;--i){fbyte1+=(fa[0][1][i]-48)*pow(10,l);++l;}
			//printf("%d\n",fbyte1);
			fbyte2=0;
			l=0;for(i=-1+fl[1][1];i>=0;--i){fbyte2+=(fa[1][1][i]-48)*pow(10,l);++l;}
			//printf("%d\n",fbyte2);
			fbyte3=0;
			l=0;for(i=-1+fl[2][1];i>=0;--i){fbyte3+=(fa[2][1][i]-48)*pow(10,l);++l;}
			//printf("%d\n",fbyte3);

			fbyte1=fbyte1<<1;
			fbyte2=fbyte2<<1;
			fbyte3=fbyte3<<1;
		
			c=((((fbyte1&b16e)>>1)^(fbyte2&b16o))|((fbyte1&b16e)^((fbyte2&b16o)<<1)));		
			d=((((fbyte1&b16e)>>1)^(fbyte3&b16o))|((fbyte1&b16e)^((fbyte3&b16o)<<1)));		
			e=((((fbyte2&b16e)>>1)^(fbyte3&b16o))|((fbyte2&b16e)^((fbyte3&b16o)<<1)));				

			b_[b_i][0]=c;
			b_[b_i][1]=d;
			b_[b_i][2]=e;
			++b_i;	
				
			for(i=0;i<fl[0][0];++i){fb_[fb_i]=fa[0][0][i];++fb_i;}
			for(i=0;i<fl[1][0];++i){fb_[fb_i]=fa[1][0][i];++fb_i;}
			for(i=0;i<fl[2][0];++i){fb_[fb_i]=fa[2][0][i];++fb_i;}
			

			if(b_i>=4*3)
			{
				
				l=0;				
				for(i=0;i<b_i;i+=4)
				{

				for(j=l;j<l+4*fl[0][0];++j)
				{
					if(df){printf("%c",fb_[j]);}
					fprintf(f2,"%c",fb_[j]);
				}
				l=j;

				if(df){printf("\t");}
				fprintf(f2,"%c",(char)9);

				bp=0;
				if(b_[i][0]==b_[i][1]){if(df){printf("1");}fprintf(f2,"1");bp|=1<<11;}else{
				if(df){printf("0");}fprintf(f2,"0");}
				if(b_[i][0]==b_[i][2]){if(df){printf("1");}fprintf(f2,"1");bp|=1<<10;}else{
				if(df){printf("0");}fprintf(f2,"0");}			
				if(b_[i][1]==b_[i][2]){if(df){printf("1");}fprintf(f2,"1");bp|=1<<9; }else{
				if(df){printf("0");}fprintf(f2,"0");}							
				
				if(b_[i+1][0]==b_[i+1][1]){if(df){printf("1");}fprintf(f2,"1");bp|=1<<8;}else{
				if(df){printf("0");}fprintf(f2,"0");}
				if(b_[i+1][0]==b_[i+1][2]){if(df){printf("1");}fprintf(f2,"1");bp|=1<<7;}else{
				if(df){printf("0");}fprintf(f2,"0");}			
				if(b_[i+1][1]==b_[i+1][2]){if(df){printf("1");}fprintf(f2,"1");bp|=1<<6;}else{
				if(df){printf("0");}fprintf(f2,"0");}							
				
				if(b_[i+2][0]==b_[i+2][1]){if(df){printf("1");}fprintf(f2,"1");bp|=1<<5;}else{
				if(df){printf("0");}fprintf(f2,"0");}
				if(b_[i+2][0]==b_[i+2][2]){if(df){printf("1");}fprintf(f2,"1");bp|=1<<4;}else{
				if(df){printf("0");}fprintf(f2,"0");}			
				if(b_[i+2][1]==b_[i+2][2]){if(df){printf("1");}fprintf(f2,"1");bp|=1<<3;}else{
				if(df){printf("0");}fprintf(f2,"0");}															
				
				if(b_[i+3][0]==b_[i+3][1]){if(df){printf("1");}fprintf(f2,"1");bp|=1<<2;}else{
				if(df){printf("0");}fprintf(f2,"0");}
				if(b_[i+3][0]==b_[i+3][2]){if(df){printf("1");}fprintf(f2,"1");bp|=1<<1;}else{
				if(df){printf("0");}fprintf(f2,"0");}			
				if(b_[i+3][1]==b_[i+3][2]){if(df){printf("1");}fprintf(f2,"1");bp|=1<<0;}else{
				if(df){printf("0");}fprintf(f2,"0");}															

				if(da_i==0){da_[da_i][0]=bp;da_[da_i][1]=1;if(df){printf("\t%d",da_i);}++da_i;}else{
					for(j=0;j<da_i;++j)
					{
						if(da_[j][0]==bp){++da_[j][1];if(df){printf("\t%d",j);}break;}
					}if(j==da_i){da_[da_i][0]=bp;da_[da_i][1]=1;if(df){printf("\t%d",da_i);}++da_i;}	
				}

				if(df){printf("\n");}
				fprintf(f2,"%c%c",(char)13,(char)10);				
								
				}
				
				b_i=0;
				for(i=0;i<256;++i){fb_[i]=0;}fb_i=0;			
			}
						
			for(l=0;l<3;++l){for(i=0;i<10;++i){fl[l][i]=0;for(j=0;j<156;++j){fa[l][i][j]='\0';}}fi[l]=0;}fr=0;
			
		}
	
	}
	fclose(f1);
	fclose(f2);

	f2=fopen(out4fn,"wb");
	for(i=0;i<da_i;++i)
	{
		
		//fprintf(f2,"%d\t%d\n",da_[i][0],da_[i][1]);
		
		if(((da_[i][0]<<0)>>11)&0x1){fprintf(f2,"1");}else{fprintf(f2,"0");}
		if(((da_[i][0]<<1)>>11)&0x1){fprintf(f2,"1");}else{fprintf(f2,"0");}
		if(((da_[i][0]<<2)>>11)&0x1){fprintf(f2,"1");}else{fprintf(f2,"0");}
		if(((da_[i][0]<<3)>>11)&0x1){fprintf(f2,"1");}else{fprintf(f2,"0");}
		if(((da_[i][0]<<4)>>11)&0x1){fprintf(f2,"1");}else{fprintf(f2,"0");}
		if(((da_[i][0]<<5)>>11)&0x1){fprintf(f2,"1");}else{fprintf(f2,"0");}
		if(((da_[i][0]<<6)>>11)&0x1){fprintf(f2,"1");}else{fprintf(f2,"0");}
		if(((da_[i][0]<<7)>>11)&0x1){fprintf(f2,"1");}else{fprintf(f2,"0");}
		if(((da_[i][0]<<8)>>11)&0x1){fprintf(f2,"1");}else{fprintf(f2,"0");}
		if(((da_[i][0]<<9)>>11)&0x1){fprintf(f2,"1");}else{fprintf(f2,"0");}
		if(((da_[i][0]<<10)>>11)&0x1){fprintf(f2,"1");}else{fprintf(f2,"0");}																				
		if(((da_[i][0]<<11)>>11)&0x1){fprintf(f2,"1");}else{fprintf(f2,"0");}																						
		fprintf(f2,"\t");
		fprintf(f2,"%d\t%d\n",da_[i][0],da_[i][1]);
	}
	fclose(f2);
	
	return(0);
	
}