//	This source and similar text sequences method apply to GNU General Public License. 
//			Copyright (C) 2001-2016 Jasenko Dzinleski 

//		This program is free software; you can redistribute it
//	and/or modify it under the terms of the GNU General Public License as
//	published by the Free Software Foundation; either version 2 of the
//	License, or (at your option) any later version. 

//	This program is distributed in the hope that it will be useful, but
//	WITHOUT ANY WARRANTY; without even the implied warranty of
//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
//	General Public License for more details. 

//	You should have received a copy of the GNU General Public License along
//	with this program; if not, write to the Free Software Foundation, Inc.,
//	51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

//  		Text Context Similar Sequences 
//		written by Dzinleski Jasenko  November , 2016



#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>

FILE	*f1,*f2,*f3,*f4,*f5;

char	infn[256]="fasta__.txt";

int    	ba1[256][256][2];	int ba1i=0;
int    	ba2[256][256][2];	int ba2i=0;
int    	ba3[256][256][2];	int ba3i=0;

int 	bac[0xffffff][4];
int 	fbuff[3][4096];
int 	d1,d2,d3,d1_,d2_,d3_,d1__,d2__,d3__;
int	b8e,b8o;
char 	sq_[4096];

char 	ps[256]="out_d";
char 	ips[256]="iout_d";
char 	ss[256]=".txt";

char	sv[256];
char	sv_[256][256];int sv_i,sv_j;
char 	ssl[256];

int p_2(char infn_[256], char outfn_[256], int sl, char ssl_[256])
{

int	a,b;
int	d,e;
int	i,j,k,l,m,n;
int	fb;
char 	outfn__[256];

	for(i=0;i<256;++i){outfn__[i]='\0';}
	f1=fopen(infn_,"rb");
	strcat(outfn__,outfn_);
	outfn__[strlen(outfn__)]='_';outfn__[strlen(outfn__)]='l';
	strcat(outfn__,ssl);
	strcat(outfn__,ss);
	f2=fopen(outfn__,"wb");
	fb=getc(f1);
	while(!feof(f1))
	{
		for(i=0;i<256;++i){sv[i]='\0';}i=0;
		while((fb!=10)&&(fb!=13)&&!feof(f1)){sv[i]=fb;++i;fb=getc(f1);}
		while((fb==10)||(fb==13)&&!feof(f1)){fb=getc(f1);}
		for(i=0;i<256;++i){for(j=0;j<256;++j){sv_[i][j]='\0';}}sv_i=0;sv_j=0;
		for(i=0;i<strlen(sv);++i){if(sv[i]=='_'){if(sv_i){sv_i=0;++sv_j;}}else{sv_[sv_j][sv_i]=sv[i];++sv_i;}}
		for(i=0;i<sv_j;++i){if(strlen(sv_[i])==sl){fprintf(f2,"%05d%c%s%c%c",strlen(sv_[i]),(char)9,sv_[i],(char)13,(char)10);}}
	}
	fclose(f1);
	fclose(f2);
	return(0);

}

int bp_(int b1 , int b2 , int b3, int b4, int b5)
{
	int b1_,b2_,b3_,b4_,b5_;
	
	b8o=0;
	b8o|=1<<0;
	b8o|=1<<2;
	b8o|=1<<4;
	b8o|=1<<6;

	b8e=0;
	b8e|=1<<1;
	b8e|=1<<3;
	b8e|=1<<5;
	b8e|=1<<7;

	b1_=b1<<1;
	b2_=b2<<1;
	b3_=b3<<1;
	b4_=b4<<1;
	b5_=b5<<1;	
	
	d1=((((b1_&b8e)>>1)^(b2_&b8o))|((b1_&b8e)^((b2_&b8o)<<1)));	
	d2=((((b1_&b8e)>>1)^(b3_&b8o))|((b1_&b8e)^((b3_&b8o)<<1)));
  	//d3=((((d1&b8e)>>1)^(d2&b8o))|((d1&b8e)^((d2&b8o)<<1)));

	d1_=((((b2_&b8e)>>1)^(b3_&b8o))|((b2_&b8e)^((b3_&b8o)<<1)));	
	d2_=((((b2_&b8e)>>1)^(b4_&b8o))|((b2_&b8e)^((b4_&b8o)<<1)));
  	//d3_=((((d2_&b8e)>>1)^(d1_&b8o))|((d2_&b8e)^((d1_&b8o)<<1)));

	d1__=((((b3_&b8e)>>1)^(b4_&b8o))|((b3_&b8e)^((b4_&b8o)<<1)));	
	d2__=((((b3_&b8e)>>1)^(b5_&b8o))|((b3_&b8e)^((b5_&b8o)<<1)));
  	//d3__=((((d2__&b8e)>>1)^(d1__&b8o))|((d2__&b8e)^((d1__&b8o)<<1)));

	return(0);
}

int p_1(int l,char outfn[256],char outifn[256])
{

	int	i,j,k,m;
	int	fbyte;
	int 	b8e,b8o;
	int	b1,b2,b3,b4,b5;
	int	a,b,c;

	ba1i=0;ba2i=0;ba3i=0;
	for(i=0;i<0xffffff;++i){bac[i][0]=-1;bac[i][3]=0;}
	for(i=0;i<256;++i){for(j=0;j<256;++j){ba1[i][j][0]=-1;ba2[i][j][1]=0;ba2[i][j][0]=-1;ba3[i][j][1]=0;ba3[i][j][0]=-1;ba1[i][j][3]=0;ba2[i][j][3]=0;ba3[i][j][3]=0;}}
	f1=fopen(infn,"rb");
	f3=fopen(outfn,"wb");
	fbuff[0][0]=1;
	while(!feof(f1))
	{
		
		while(!feof(f1)&&(fbuff[0][0]<4096)){fbuff[0][fbuff[0][0]]=getc(f1);fbuff[1][fbuff[0][0]]=0;fbuff[2][fbuff[0][0]]=0;++fbuff[0][0];}
		i=1;b1=fbuff[0][l+i];b2=fbuff[0][1+l+i];b3=fbuff[0][1+1+l+i];b4=fbuff[0][1+1+1+l+i];b5=fbuff[0][1+1+1+1+l+i];++i;
		while(i<fbuff[0][0]-l-5)
		{
			bp_(b1,b2,b3,b4,b5);
			if(ba1[d1][d2][0]==-1){ba1[d1][d2][0]=ba1i;++ba1i;}else{++ba1[d1][d2][1];}
			if(ba2[d1_][d2_][0]==-1){ba2[d1_][d2_][0]=ba2i;++ba2i;}else{++ba2[d1_][d2_][1];}
			if(ba3[d1__][d2__][0]==-1){ba3[d1__][d2__][0]=ba3i;++ba3i;}else{++ba3[d1__][d2__][1];}			
			if(bac[(b2<<16)|(b3<<8)|(b4<<0)][0]==-1)
			{
				bac[(b2<<16)|(b3<<8)|(b4<<0)][0]=ba1[d1][d2][0];
				bac[(b2<<16)|(b3<<8)|(b4<<0)][1]=ba2[d1_][d2_][0];
				bac[(b2<<16)|(b3<<8)|(b4<<0)][2]=ba3[d1__][d2__][0];
				++bac[(b2<<16)|(b3<<8)|(b4<<0)][3];
			}else{++bac[(b2<<16)|(b3<<8)|(b4<<0)][3];}
			b1=fbuff[0][l+i];b2=fbuff[0][1+l+i];b3=fbuff[0][1+1+l+i];b4=fbuff[0][1+1+1+l+i];b5=fbuff[0][1+1+1+1+l+i];++i;
		}
		i=1;b1=fbuff[0][i];b2=fbuff[0][++i];b3=fbuff[0][++i];b4=fbuff[0][++i];b5=fbuff[0][++i];
		while(i<fbuff[0][0]-l-5)
		{
			bp_(b1,b2,b3,b4,b5);a=0;b=0;c=0;
			if(ba1[d1][d2][0]==bac[(b2<<16)|(b3<<8)|(b4<<0)][0]){++a;}
			if(ba2[d1_][d2_][0]==bac[(b2<<16)|(b3<<8)|(b4<<0)][1]){++b;}		
			if(ba3[d1__][d2__][0]==bac[(b2<<16)|(b3<<8)|(b4<<0)][2]){++c;}
			if(!a){fbuff[1][-1-1-1+i]=0;}else{fbuff[1][-1-1-1+i]=1;}
			if(!b){fbuff[1][-1-1+i]=0;}else{fbuff[1][-1-1+i]=1;}
			if(!c){fbuff[1][-1+i]=0;}else{fbuff[1][-1+i]=1;}
			b1=b2;b2=b3;b3=b4;b4=b5;b5=fbuff[0][++i];
		}	
		for(i=1;i<fbuff[0][0]-l-5;++i)
		{
			if(fbuff[1][i]){fprintf(f3,"%c",fbuff[0][i]);}else{if(fbuff[0][i]==13||fbuff[0][i]==10){fprintf(f3,"%c",fbuff[0][i]);}else{fprintf(f3,"_");}}
		}
		j=1;for(i=fbuff[0][0]-l-5;i<fbuff[0][0];++i){fbuff[0][j]=fbuff[0][i];++j;}fbuff[0][0]=j;
	}
	fclose(f1);
	fclose(f3);

	return(0);
}

int main(int argc,char *argv[])
{
int 	i,j;
char 	fn[256];
char 	fn_[256];
char 	ifn[256];
char 	ifn_[256];
	
	for(i=1;i<=500;++i)
	{
		for(j=0;j<256;++j){fn[j]='\0';fn_[j]='\0';ifn[j]='\0';ifn_[j]='\0';ssl[j]='\0';}
		if(i<=9)
		{
		strcat(fn,ps);strcat(ifn,ips);
		ssl[strlen(ssl)]=(char)(48+i);
		strcat(fn,ssl);strcat(ifn,ssl);
		strcat(fn,ss);strcat(ifn,ss);
		strcat(fn_,ps);strcat(ifn_,ips);
		strcat(fn_,ssl);strcat(ifn_,ssl);
		}else{
		if(i<=99)
		{
		strcat(fn,ps);strcat(ifn,ips);
		ssl[strlen(ssl)]=(char)(48+((int)i/(int)10));
		ssl[strlen(ssl)]=(char)(48+i-(int)10*((int)i/(int)10));
		strcat(fn,ssl);strcat(ifn,ssl);
		strcat(fn,ss);strcat(ifn,ss);
		strcat(fn_,ps);strcat(ifn_,ips);
		strcat(fn_,ssl);strcat(ifn_,ssl);
		}else{
		if(i<=999)
		{
		strcat(fn,ps);strcat(ifn,ips);
		ssl[strlen(ssl)]=(char)(48+((int)i/(int)100));
		ssl[strlen(ssl)]=(char)(48+(i-(int)100*((int)i/(int)100))/(int)10);
		ssl[strlen(ssl)]=(char)(48+((int)i-(int)100*((int)i/(int)100))-(int)10*(((int)i-(int)100*((int)i/(int)100))/(int)10));
		strcat(fn,ssl);strcat(ifn,ssl);
		strcat(fn,ss);strcat(ifn,ss);
		strcat(fn_,ps);strcat(ifn_,ips);
		strcat(fn_,ssl);strcat(ifn_,ssl);
		}else{
		strcat(fn,ps);strcat(ifn,ips);
		ssl[strlen(ssl)]=(char)(48+((int)i/(int)1000));
		ssl[strlen(ssl)]=(char)(48+(i-(int)1000*((int)i/(int)1000))/(int)100);
		ssl[strlen(ssl)]=(char)(48+((((int)i-(int)1000*((int)i/(int)1000))-(int)100*(((int)i-(int)1000*((int)i/(int)1000))/(int)100))/10));
		ssl[strlen(ssl)]=(char)(48+((int)i-(int)1000*((int)i/(int)1000))-(int)100*(((int)i-(int)1000*((int)i/(int)1000))/(int)100)
		-(int)10*((((int)i-(int)1000*((int)i/(int)1000))-(int)100*(((int)i-(int)1000*((int)i/(int)1000))/(int)100))/10));		
		strcat(fn,ssl);strcat(ifn,ssl);
		strcat(fn,ss);strcat(ifn,ss);
		strcat(fn_,ps);strcat(ifn_,ips);
		strcat(fn_,ssl);strcat(ifn_,ssl);
		}
		}
		}
		printf("%s\n",fn);p_1(i,fn,ifn);
		for(j=0;j<256;++j){ssl[j]='\0';}ssl[strlen(ssl)]=(char)(48+4);p_2(fn,fn_,4,ssl);p_2(ifn,ifn_,4,ssl);
		for(j=0;j<256;++j){ssl[j]='\0';}ssl[strlen(ssl)]=(char)(48+5);p_2(fn,fn_,5,ssl);p_2(ifn,ifn_,5,ssl);
		for(j=0;j<256;++j){ssl[j]='\0';}ssl[strlen(ssl)]=(char)(48+6);p_2(fn,fn_,6,ssl);p_2(ifn,ifn_,6,ssl);
		for(j=0;j<256;++j){ssl[j]='\0';}ssl[strlen(ssl)]=(char)(48+7);p_2(fn,fn_,7,ssl);p_2(ifn,ifn_,7,ssl);
		for(j=0;j<256;++j){ssl[j]='\0';}ssl[strlen(ssl)]=(char)(48+8);p_2(fn,fn_,8,ssl);p_2(ifn,ifn_,8,ssl);
		for(j=0;j<256;++j){ssl[j]='\0';}ssl[strlen(ssl)]=(char)(48+9);p_2(fn,fn_,9,ssl);p_2(ifn,ifn_,9,ssl);
		for(j=0;j<256;++j){ssl[j]='\0';}ssl[strlen(ssl)]=(char)(48+((int)10/(int)10));
		ssl[strlen(ssl)]=(char)(48+10-(int)10*((int)10/(int)10));p_2(fn,fn_,10,ssl);p_2(ifn,ifn_,10,ssl);
		for(j=0;j<256;++j){ssl[j]='\0';}ssl[strlen(ssl)]=(char)(48+((int)11/(int)10));
		ssl[strlen(ssl)]=(char)(48+11-(int)10*((int)11/(int)10));p_2(fn,fn_,11,ssl);p_2(ifn,ifn_,11,ssl);
		for(j=0;j<256;++j){ssl[j]='\0';}ssl[strlen(ssl)]=(char)(48+((int)12/(int)10));
		ssl[strlen(ssl)]=(char)(48+12-(int)10*((int)12/(int)10));p_2(fn,fn_,12,ssl);p_2(ifn,ifn_,12,ssl);
		for(j=0;j<256;++j){ssl[j]='\0';}ssl[strlen(ssl)]=(char)(48+((int)13/(int)10));
		ssl[strlen(ssl)]=(char)(48+13-(int)10*((int)13/(int)10));p_2(fn,fn_,13,ssl);p_2(ifn,ifn_,13,ssl);
		for(j=0;j<256;++j){ssl[j]='\0';}ssl[strlen(ssl)]=(char)(48+((int)14/(int)10));
		ssl[strlen(ssl)]=(char)(48+14-(int)10*((int)14/(int)10));p_2(fn,fn_,14,ssl);p_2(ifn,ifn_,14,ssl);
		for(j=0;j<256;++j){ssl[j]='\0';}ssl[strlen(ssl)]=(char)(48+((int)15/(int)10));
		ssl[strlen(ssl)]=(char)(48+15-(int)10*((int)15/(int)10));p_2(fn,fn_,15,ssl);p_2(ifn,ifn_,15,ssl);
        	remove(fn);remove(ifn);

	}
	return(0);
}