//	This source and similar text sequences method apply to GNU General Public License. 
//			Copyright (C) 2001-2016 Jasenko Dzinleski 

//		This program is free software; you can redistribute it
//	and/or modify it under the terms of the GNU General Public License as
//	published by the Free Software Foundation; either version 2 of the
//	License, or (at your option) any later version. 

//	This program is distributed in the hope that it will be useful, but
//	WITHOUT ANY WARRANTY; without even the implied warranty of
//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
//	General Public License for more details. 

//	You should have received a copy of the GNU General Public License along
//	with this program; if not, write to the Free Software Foundation, Inc.,
//	51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

//  		Text Context Similar Sequences 
//		written by Dzinleski Jasenko  April , 2017



#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>

FILE	*f1,*f2,*f3,*f4,*f5;

char	infn[256]="fasta__.txt";

int 	fbuff[3][4096];
char 	sq_[4096];

char 	ps[256]="out_d";
char 	ips[256]="iout_d";
char 	ss[256]=".txt";

char	sv[256];
char	sv_[256][256];int sv_i,sv_j;
char 	ssl[256];

int		b1_,b2_,b3_,b4_,b5_;
int		b6_,b7_,b8_,b9_,b10_;
int		b11_,b12_,b13_,b14_,b15_;
int		b16_,b17_,b18_;
	
long long	b1,b2,b3;

long long	b64o,b64e;
long 		d1,d2,d3;
long 		d1_,d2_,d3_;

long 		dc1,dc2,dc3;
long 		dc1_,dc2_,dc3_;

long 		dd1,dd2,dd3;
long 		dd1_,dd2_,dd3_;

int bp___(long long b1,long long b2,long long b3)
{
	
	d1=(long)((((b1&b64e)>>1)^(b2&b64o))|((b1&b64e)^((b2&b64o)<<1)));	
	d2=(long)((((b1&b64e)>>1)^(b3&b64o))|((b1&b64e)^((b3&b64o)<<1)));
  //d3=(long)((((d1&b64e)>>1)^(d2&b64o))|((d1&b64e)^((d2&b64o)<<1)));

	d1_=(long)((((b2&b64e)>>1)^(b1&b64o))|((b2&b64e)^((b1&b64o)<<1)));	
	d2_=(long)((((b2&b64e)>>1)^(b3&b64o))|((b2&b64e)^((b3&b64o)<<1)));
  //d3_=(long)((((d2_&b64e)>>1)^(d1_&b64o))|((d2_&b64e)^((d1_&b64o)<<1)));
	
	return(0);
}

int p_2(char infn_[256], char outfn_[256], int sl, char ssl_[256])
{

int	a,b;
int	d,e;
int	i,j,k,l,m,n;
int	fb;
char 	outfn__[256];

	for(i=0;i<256;++i){outfn__[i]='\0';}
	f1=fopen(infn_,"rb");
	strcat(outfn__,outfn_);
	outfn__[strlen(outfn__)]='_';outfn__[strlen(outfn__)]='l';
	strcat(outfn__,ssl);
	strcat(outfn__,ss);
	f2=fopen(outfn__,"wb");
	fb=getc(f1);
	while(!feof(f1))
	{
		for(i=0;i<256;++i){sv[i]='\0';}i=0;
		while((fb!=10)&&(fb!=13)&&!feof(f1)){sv[i]=fb;++i;fb=getc(f1);}
		while((fb==10)||(fb==13)&&!feof(f1)){fb=getc(f1);}
		for(i=0;i<256;++i){for(j=0;j<256;++j){sv_[i][j]='\0';}}sv_i=0;sv_j=0;
		for(i=0;i<strlen(sv);++i){if(sv[i]=='_'){if(sv_i){sv_i=0;++sv_j;}}else{sv_[sv_j][sv_i]=sv[i];++sv_i;}}
		for(i=0;i<sv_j;++i){if(strlen(sv_[i])==sl){fprintf(f2,"%05d%c%s%c%c",strlen(sv_[i]),(char)9,sv_[i],(char)13,(char)10);}}
	}
	fclose(f1);
	fclose(f2);
	return(0);

}

int p_1(int l,char outfn[256],char outifn[256])
{

	int	i,j,k,m;
	int	fbyte;
	int	b0_1,b0_2,b0_3,b0_4,b0_5,b0_6,b0_7,b0_8,b0_9,b0_10,b0_11,b0_12,b0_13,b0_14,b0_15,b0_16,b0_17,b0_18;
	int	b1_1,b1_2,b1_3,b1_4,b1_5,b1_6,b1_7,b1_8,b1_9,b1_10,b1_11,b1_12,b1_13,b1_14,b1_15,b1_16,b1_17,b1_18;
	int	a,b,c;

	f1=fopen(infn,"rb");
	f3=fopen(outfn,"wb");
	fbuff[0][0]=1;
	while(!feof(f1))
	{
		
		while(!feof(f1)&&(fbuff[0][0]<4096)){fbuff[0][fbuff[0][0]]=getc(f1);fbuff[1][fbuff[0][0]]=0;fbuff[2][fbuff[0][0]]=0;++fbuff[0][0];}

		i=1;
		while(i<fbuff[0][0]-l-18)
		{

		b0_1=fbuff[0][i];
		b0_2=fbuff[0][1+i];
		b0_3=fbuff[0][2+i];
		b0_4=fbuff[0][3+i];
		b0_5=fbuff[0][4+i];
		b0_6=fbuff[0][5+i];
		b0_7=fbuff[0][6+i];
		b0_8=fbuff[0][7+i];
		b0_9=fbuff[0][8+i];
		b0_10=fbuff[0][9+i];
		b0_11=fbuff[0][10+i];
		b0_12=fbuff[0][11+i];
		b0_13=fbuff[0][12+i];
		b0_14=fbuff[0][13+i];
		b0_15=fbuff[0][14+i];
		b0_16=fbuff[0][15+i];
		b0_17=fbuff[0][16+i];
		b0_18=fbuff[0][17+i];

		b1=((b0_1<<56-1)|(b0_3<<48-1)|(b0_5<<40-1)|(b0_7<<32-1)|(b0_9<<24-1)|(b0_11<<16-1)|(b0_13<<8-1)|(b0_15<<0));
		b2=((b0_2<<56-1)|(b0_4<<48-1)|(b0_6<<40-1)|(b0_8<<32-1)|(b0_10<<24-1)|(b0_12<<16-1)|(b0_14<<8-1)|(b0_16<<0));
		b3=((b0_3<<56-1)|(b0_5<<48-1)|(b0_7<<40-1)|(b0_9<<32-1)|(b0_11<<24-1)|(b0_13<<16-1)|(b0_15<<8-1)|(b0_17<<0));
		b1=b1<<1;b2=b2<<1;b3=b3<<1;
		bp___(b1,b2,b3);
		
		dc1=d1;
		dc2=d2;
		dc1_=d1_;
		dc2_=d2_;

		b1_1=fbuff[0][l+i];
		b1_2=fbuff[0][1+l+i];
		b1_3=fbuff[0][2+l+i];
		b1_4=fbuff[0][3+l+i];
		b1_5=fbuff[0][4+l+i];
		b1_6=fbuff[0][5+l+i];
		b1_7=fbuff[0][6+l+i];
		b1_8=fbuff[0][7+l+i];
		b1_9=fbuff[0][8+l+i];
		b1_10=fbuff[0][9+l+i];
		b1_11=fbuff[0][10+l+i];
		b1_12=fbuff[0][11+l+i];
		b1_13=fbuff[0][12+l+i];
		b1_14=fbuff[0][13+l+i];
		b1_15=fbuff[0][14+l+i];
		b1_16=fbuff[0][15+l+i];
		b1_17=fbuff[0][16+l+i];
		b1_18=fbuff[0][17+l+i];

		b1=((b1_1<<56-1)|(b1_3<<48-1)|(b1_5<<40-1)|(b1_7<<32-1)|(b1_9<<24-1)|(b1_11<<16-1)|(b1_13<<8-1)|(b1_15<<0));
		b2=((b1_2<<56-1)|(b1_4<<48-1)|(b1_6<<40-1)|(b1_8<<32-1)|(b1_10<<24-1)|(b1_12<<16-1)|(b1_14<<8-1)|(b1_16<<0));
		b3=((b1_3<<56-1)|(b1_5<<48-1)|(b1_7<<40-1)|(b1_9<<32-1)|(b1_11<<24-1)|(b1_13<<16-1)|(b1_15<<8-1)|(b1_17<<0));
		b1=b1<<1;b2=b2<<1;b3=b3<<1;
		bp___(b1,b2,b3);

		dd1=d1;
		dd2=d2;
		dd1_=d1_;
		dd2_=d2_;

		if(dc1==dd1)
		{
		fbuff[1][0+i]=1;
		fbuff[1][2+i]=1;
		fbuff[1][4+i]=1;
		fbuff[1][6+i]=1;

		fbuff[1][8+i]=1;
		fbuff[1][10+i]=1;
		fbuff[1][12+i]=1;
		fbuff[1][14+i]=1;
		}

		if(dc2==dd2)
		{
		fbuff[1][1+i]=1;
		fbuff[1][3+i]=1;
		fbuff[1][5+i]=1;
		fbuff[1][7+i]=1;

		fbuff[1][9+i]=1;
		fbuff[1][11+i]=1;
		fbuff[1][13+i]=1;
		fbuff[1][15+i]=1;
		}

		if(dc1_==dd1_)
		{
		fbuff[1][2+i]=1;
		fbuff[1][4+i]=1;
		fbuff[1][6+i]=1;
		fbuff[1][8+i]=1;

		fbuff[1][10+i]=1;
		fbuff[1][12+i]=1;
		fbuff[1][14+i]=1;
		fbuff[1][16+i]=1;
		}

		if(dc2_==dd2_)
		{
		fbuff[1][3+i]=1;
		fbuff[1][5+i]=1;
		fbuff[1][7+i]=1;
		fbuff[1][9+i]=1;

		fbuff[1][11+i]=1;
		fbuff[1][13+i]=1;
		fbuff[1][15+i]=1;
		fbuff[1][17+i]=1;
		}

		++i;

		}
		for(i=1;i<fbuff[0][0]-l-18;++i)
		{
			if(fbuff[1][i]){fprintf(f3,"%c",fbuff[0][i]);}else{if(fbuff[0][i]==13||fbuff[0][i]==10){fprintf(f3,"%c",fbuff[0][i]);}else{fprintf(f3,"_");}}
		}
		j=1;for(i=fbuff[0][0]-l-18;i<fbuff[0][0];++i){fbuff[0][j]=fbuff[0][i];++j;}fbuff[0][0]=j;
	}
	fclose(f1);
	fclose(f3);

	return(0);
}

int main(int argc,char *argv[])
{
int 	i,j;
char 	fn[256];
char 	fn_[256];
char 	ifn[256];
char 	ifn_[256];

	b64o=0;for(i=0;i<64;i+=2){b64o|=1<<i;}
	b64e=0;for(i=1;i<64;i+=2){b64e|=1<<i;}		
	
	for(i=167;i<=500;++i)
	{
		for(j=0;j<256;++j){fn[j]='\0';fn_[j]='\0';ifn[j]='\0';ifn_[j]='\0';ssl[j]='\0';}
		if(i<=9)
		{
		strcat(fn,ps);strcat(ifn,ips);
		ssl[strlen(ssl)]=(char)(48+i);
		strcat(fn,ssl);strcat(ifn,ssl);
		strcat(fn,ss);strcat(ifn,ss);
		strcat(fn_,ps);strcat(ifn_,ips);
		strcat(fn_,ssl);strcat(ifn_,ssl);
		}else{
		if(i<=99)
		{
		strcat(fn,ps);strcat(ifn,ips);
		ssl[strlen(ssl)]=(char)(48+((int)i/(int)10));
		ssl[strlen(ssl)]=(char)(48+i-(int)10*((int)i/(int)10));
		strcat(fn,ssl);strcat(ifn,ssl);
		strcat(fn,ss);strcat(ifn,ss);
		strcat(fn_,ps);strcat(ifn_,ips);
		strcat(fn_,ssl);strcat(ifn_,ssl);
		}else{
		if(i<=999)
		{
		strcat(fn,ps);strcat(ifn,ips);
		ssl[strlen(ssl)]=(char)(48+((int)i/(int)100));
		ssl[strlen(ssl)]=(char)(48+(i-(int)100*((int)i/(int)100))/(int)10);
		ssl[strlen(ssl)]=(char)(48+((int)i-(int)100*((int)i/(int)100))-(int)10*(((int)i-(int)100*((int)i/(int)100))/(int)10));
		strcat(fn,ssl);strcat(ifn,ssl);
		strcat(fn,ss);strcat(ifn,ss);
		strcat(fn_,ps);strcat(ifn_,ips);
		strcat(fn_,ssl);strcat(ifn_,ssl);
		}else{
		strcat(fn,ps);strcat(ifn,ips);
		ssl[strlen(ssl)]=(char)(48+((int)i/(int)1000));
		ssl[strlen(ssl)]=(char)(48+(i-(int)1000*((int)i/(int)1000))/(int)100);
		ssl[strlen(ssl)]=(char)(48+((((int)i-(int)1000*((int)i/(int)1000))-(int)100*(((int)i-(int)1000*((int)i/(int)1000))/(int)100))/10));
		ssl[strlen(ssl)]=(char)(48+((int)i-(int)1000*((int)i/(int)1000))-(int)100*(((int)i-(int)1000*((int)i/(int)1000))/(int)100)
		-(int)10*((((int)i-(int)1000*((int)i/(int)1000))-(int)100*(((int)i-(int)1000*((int)i/(int)1000))/(int)100))/10));		
		strcat(fn,ssl);strcat(ifn,ssl);
		strcat(fn,ss);strcat(ifn,ss);
		strcat(fn_,ps);strcat(ifn_,ips);
		strcat(fn_,ssl);strcat(ifn_,ssl);
		}
		}
		}
		printf("%s\n",fn);p_1(i,fn,ifn);
		for(j=0;j<256;++j){ssl[j]='\0';}ssl[strlen(ssl)]=(char)(48+4);p_2(fn,fn_,4,ssl);
		//
		for(j=0;j<256;++j){ssl[j]='\0';}ssl[strlen(ssl)]=(char)(48+5);p_2(fn,fn_,5,ssl);
		//
		for(j=0;j<256;++j){ssl[j]='\0';}ssl[strlen(ssl)]=(char)(48+6);p_2(fn,fn_,6,ssl);
		//
		for(j=0;j<256;++j){ssl[j]='\0';}ssl[strlen(ssl)]=(char)(48+7);p_2(fn,fn_,7,ssl);
		//
		for(j=0;j<256;++j){ssl[j]='\0';}ssl[strlen(ssl)]=(char)(48+8);p_2(fn,fn_,8,ssl);
		//
		for(j=0;j<256;++j){ssl[j]='\0';}ssl[strlen(ssl)]=(char)(48+9);p_2(fn,fn_,9,ssl);
		//
		for(j=0;j<256;++j){ssl[j]='\0';}ssl[strlen(ssl)]=(char)(48+((int)10/(int)10));
		ssl[strlen(ssl)]=(char)(48+10-(int)10*((int)10/(int)10));p_2(fn,fn_,10,ssl);
		//
		for(j=0;j<256;++j){ssl[j]='\0';}ssl[strlen(ssl)]=(char)(48+((int)11/(int)10));
		ssl[strlen(ssl)]=(char)(48+11-(int)10*((int)11/(int)10));p_2(fn,fn_,11,ssl);
		//
		for(j=0;j<256;++j){ssl[j]='\0';}ssl[strlen(ssl)]=(char)(48+((int)12/(int)10));
		ssl[strlen(ssl)]=(char)(48+12-(int)10*((int)12/(int)10));p_2(fn,fn_,12,ssl);
		//
		for(j=0;j<256;++j){ssl[j]='\0';}ssl[strlen(ssl)]=(char)(48+((int)13/(int)10));
		ssl[strlen(ssl)]=(char)(48+13-(int)10*((int)13/(int)10));p_2(fn,fn_,13,ssl);
		//
		for(j=0;j<256;++j){ssl[j]='\0';}ssl[strlen(ssl)]=(char)(48+((int)14/(int)10));
		ssl[strlen(ssl)]=(char)(48+14-(int)10*((int)14/(int)10));p_2(fn,fn_,14,ssl);
		//
		for(j=0;j<256;++j){ssl[j]='\0';}ssl[strlen(ssl)]=(char)(48+((int)15/(int)10));
		ssl[strlen(ssl)]=(char)(48+15-(int)10*((int)15/(int)10));p_2(fn,fn_,15,ssl);
		//
		for(j=0;j<256;++j){ssl[j]='\0';}ssl[strlen(ssl)]=(char)(48+((int)16/(int)10));
		ssl[strlen(ssl)]=(char)(48+16-(int)10*((int)16/(int)10));p_2(fn,fn_,16,ssl);
		//
		for(j=0;j<256;++j){ssl[j]='\0';}ssl[strlen(ssl)]=(char)(48+((int)17/(int)10));
		ssl[strlen(ssl)]=(char)(48+17-(int)10*((int)17/(int)10));p_2(fn,fn_,17,ssl);
		//
		for(j=0;j<256;++j){ssl[j]='\0';}ssl[strlen(ssl)]=(char)(48+((int)18/(int)10));
		ssl[strlen(ssl)]=(char)(48+18-(int)10*((int)18/(int)10));p_2(fn,fn_,18,ssl);
		//
		for(j=0;j<256;++j){ssl[j]='\0';}ssl[strlen(ssl)]=(char)(48+((int)19/(int)10));
		ssl[strlen(ssl)]=(char)(48+19-(int)10*((int)19/(int)10));p_2(fn,fn_,19,ssl);
		//
		for(j=0;j<256;++j){ssl[j]='\0';}ssl[strlen(ssl)]=(char)(48+((int)20/(int)10));
		ssl[strlen(ssl)]=(char)(48+20-(int)10*((int)20/(int)10));p_2(fn,fn_,20,ssl);
        	//remove(fn);remove(ifn);
		//
	}
	return(0);
}