//	This source and similar text sequences method apply to GNU General Public License. 
//			Copyright (C) 2001-2016 Jasenko Dzinleski 

//		This program is free software; you can redistribute it
//	and/or modify it under the terms of the GNU General Public License as
//	published by the Free Software Foundation; either version 2 of the
//	License, or (at your option) any later version. 

//	This program is distributed in the hope that it will be useful, but
//	WITHOUT ANY WARRANTY; without even the implied warranty of
//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
//	General Public License for more details. 

//	You should have received a copy of the GNU General Public License along
//	with this program; if not, write to the Free Software Foundation, Inc.,
//	51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

//  	Text Context Similar Sequences 
//		written by Dzinleski Jasenko  April , 2017


#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <math.h>

FILE 	*f1,*f2,*f3,*f4,*f5;
char	infn[256]="fasta__.txt";
char	outfn[256];
char	infn_[256];
int 	ba[256][256][256][2];

char	svl_[4096];
double	da[2][3][4096];
int 	fb[3][4096];

char 	ps[256]="out_d";
char 	ips[256]="iout_d";
char 	ss[256]=".txt";

char	sv[256];
char	sv_[256][256];int sv_i,sv_j;
char 	ssl[256];

char 	sq_[4096];

int p_2(char infn_[256], char outfn_[256], int sl, char ssl_[256])
{

int	a,b;
int	d,e;
int	i,j,k,l,m,n;
int	fb;
char 	outfn__[256];

	for(i=0;i<256;++i){outfn__[i]='\0';}
	f1=fopen(infn_,"rb");
	strcat(outfn__,outfn_);
	outfn__[strlen(outfn__)]='_';outfn__[strlen(outfn__)]='l';
	strcat(outfn__,ssl);
	strcat(outfn__,ss);
	f2=fopen(outfn__,"wb");
	fb=getc(f1);
	while(!feof(f1))
	{
		for(i=0;i<256;++i){sv[i]='\0';}i=0;
		while((fb!=10)&&(fb!=13)&&!feof(f1)){sv[i]=fb;++i;fb=getc(f1);}
		while((fb==10)||(fb==13)&&!feof(f1)){fb=getc(f1);}
		for(i=0;i<256;++i){for(j=0;j<256;++j){sv_[i][j]='\0';}}sv_i=0;sv_j=0;
		for(i=0;i<strlen(sv);++i){if(sv[i]=='_'){if(sv_i){sv_i=0;++sv_j;}}else{sv_[sv_j][sv_i]=sv[i];++sv_i;}}
		for(i=0;i<sv_j;++i){if(strlen(sv_[i])==sl){fprintf(f2,"%05d%c%s%c%c",strlen(sv_[i]),(char)9,sv_[i],(char)13,(char)10);}}
	}
	fclose(f1);
	fclose(f2);
	return(0);

}

int p_1(int l,char outfn[256],char outifn[256])
{

int     i,j,k,m,n;
int     mi,mj,mk,ml,mm;
int     a,b,c,d,mx;

	f1=fopen(infn,"rb");
	f3=fopen(outfn,"wb");
	fb[0][0]=1;
	while(!feof(f1))
	{
		//
		while(!feof(f1)&&(fb[0][0]<4096)){fb[0][fb[0][0]]=getc(f1);fb[1][fb[0][0]]=0;fb[2][fb[0][0]]=0;++fb[0][0];}
		for(i=0;i<256;++i){for(j=0;j<256;++j){for(k=0;k<256;++k){ba[i][j][k][0]=0;ba[i][j][k][1]=0;}}}
		//
		for(a=1;a<fb[0][0]-l-3;++a){++ba[fb[0][l+a]][fb[0][1+l+a]][fb[0][1+1+l+a]][0];}
		ml=0;for(i=0;i<256;++i){for(j=0;j<256;++j){for(k=0;k<256;++k){ml+=ba[i][j][k][0];}}}
		for(a=1;a<fb[0][0]-l-3;++a)
		{
			mi=-1;
			for(i=0;i<256;++i)
			{
				if(ba[i][fb[0][1+l+a]][fb[0][1+1+l+a]][0])
				{
					if(mi==-1){mi=ba[i][fb[0][1+l+a]][fb[0][1+1+l+a]][0];}else{
						if(mi<ba[i][fb[0][1+l+a]][fb[0][1+1+l+a]][0]){mi=ba[i][fb[0][1+l+a]][fb[0][1+1+l+a]][0];}
					}
				}
			}
			mj=-1;
			for(i=0;i<256;++i)
			{
				if(ba[fb[0][l+a]][i][fb[0][1+1+l+a]][0])
				{
					if(mj==-1){mj=ba[fb[0][l+a]][i][fb[0][1+1+l+a]][0];}else{
						if(mj<ba[fb[0][l+a]][i][fb[0][1+1+l+a]][0]){mj=ba[fb[0][l+a]][i][fb[0][1+1+l+a]][0];}
					}
				}
			}
			mk=-1;
			for(i=0;i<256;++i)
			{
				if(ba[fb[0][l+a]][fb[0][1+a]][i][0])
				{
					if(mk==-1){mk=ba[fb[0][l+a]][fb[0][1+l+a]][i][0];}else{
						if(mk<ba[fb[0][l+a]][fb[0][1+l+a]][i][0]){mk=ba[fb[0][a]][fb[0][1+a]][i][0];}}
				}
			}
			//printf("%e\t%e\t%e\n",(double)mi/(double)ml,(double)mj/(double)ml,(double)mk/(double)ml);
			da[0][0][a]=(double)mi/(double)ml;
			da[0][1][a]=(double)mj/(double)ml;
			da[0][2][a]=(double)mk/(double)ml;
		}
		//
		for(a=1;a<fb[0][0]-3-l;++a){++ba[fb[0][a]][fb[0][1+a]][fb[0][1+1+a]][1];}
		ml=0;for(i=0;i<256;++i){for(j=0;j<256;++j){for(k=0;k<256;++k){ml+=ba[i][j][k][1];}}}
		for(a=1;a<fb[0][0]-3-l;++a)
		{
			mi=-1;
			for(i=0;i<256;++i)
			{
				if(ba[i][fb[0][1+a]][fb[0][1+1+a]][1])
				{
					if(mi==-1){mi=ba[i][fb[0][1+a]][fb[0][1+1+a]][1];}else{
						if(mi<ba[i][fb[0][1+a]][fb[0][1+1+a]][1]){mi=ba[i][fb[0][1+a]][fb[0][1+1+a]][1];}
					}
				}
			}
			mj=-1;
			for(i=0;i<256;++i)
			{
				if(ba[fb[0][a]][i][fb[0][1+1+a]][1])
				{
					if(mj==-1){mj=ba[fb[0][a]][i][fb[0][1+1+a]][1];}else{
						if(mj<ba[fb[0][a]][i][fb[0][1+1+a]][1]){mj=ba[fb[0][a]][i][fb[0][1+1+a]][1];}
					}
				}
			}
			mk=-1;
			for(i=0;i<256;++i)
			{
				if(ba[fb[0][a]][fb[0][1+a]][i][1])
				{
					if(mk==-1){mk=ba[fb[0][a]][fb[0][1+a]][i][1];}else{
						if(mk<ba[fb[0][a]][fb[0][1+a]][i][1]){mk=ba[fb[0][a]][fb[0][1+a]][i][1];}
					}
				}
			}
			//printf("%e\t%e\t%e\n",(double)mi/(double)ml,(double)mj/(double)ml,(double)mk/(double)ml);
			da[1][0][a]=(double)mi/(double)ml;
			da[1][1][a]=(double)mj/(double)ml;
			da[1][2][a]=(double)mk/(double)ml;
		}
		//
		for(a=1;a<fb[0][0]-3-l;++a)
		{
			//printf("1.%e\t%e\t%e\n",da[0][0][a],da[0][1][a],da[0][2][a]);
			//printf("2.%e\t%e\t%e\n",da[1][0][a],da[1][1][a],da[1][2][a]);
			if(da[0][0][a]==da[1][0][a]){fb[1][a]=1;}
			if(da[0][1][a]==da[1][1][a]){fb[1][1+a]=1;}
			if(da[0][2][a]==da[1][2][a]){fb[1][1+1+a]=1;}
		}
		//
		for(i=1;i<fb[0][0]-l-3;++i)
		{
			if(fb[1][i]){fprintf(f3,"%c",fb[0][i]);}else{if(fb[0][i]==13||fb[0][i]==10){fprintf(f3,"%c",fb[0][i]);}else{fprintf(f3,"_");}}
		}
		j=1;for(i=fb[0][0]-l-3;i<fb[0][0];++i){fb[0][j]=fb[0][i];++j;}fb[0][0]=j;
	}
	fclose(f1);
	fclose(f3);
	
	return(0);
}

int main(int argc,char *argv[])
{
int 	i,j;
char 	fn[256];
char 	fn_[256];
char 	ifn[256];
char 	ifn_[256];
	
	for(i=1;i<=67;++i)
	{
		for(j=0;j<256;++j){fn[j]='\0';fn_[j]='\0';ifn[j]='\0';ifn_[j]='\0';ssl[j]='\0';}
		if(i<=9)
		{
		strcat(fn,ps);strcat(ifn,ips);
		ssl[strlen(ssl)]=(char)(48+i);
		strcat(fn,ssl);strcat(ifn,ssl);
		strcat(fn,ss);strcat(ifn,ss);
		strcat(fn_,ps);strcat(ifn_,ips);
		strcat(fn_,ssl);strcat(ifn_,ssl);
		}else{
		if(i<=99)
		{
		strcat(fn,ps);strcat(ifn,ips);
		ssl[strlen(ssl)]=(char)(48+((int)i/(int)10));
		ssl[strlen(ssl)]=(char)(48+i-(int)10*((int)i/(int)10));
		strcat(fn,ssl);strcat(ifn,ssl);
		strcat(fn,ss);strcat(ifn,ss);
		strcat(fn_,ps);strcat(ifn_,ips);
		strcat(fn_,ssl);strcat(ifn_,ssl);
		}else{
		if(i<=999)
		{
		strcat(fn,ps);strcat(ifn,ips);
		ssl[strlen(ssl)]=(char)(48+((int)i/(int)100));
		ssl[strlen(ssl)]=(char)(48+(i-(int)100*((int)i/(int)100))/(int)10);
		ssl[strlen(ssl)]=(char)(48+((int)i-(int)100*((int)i/(int)100))-(int)10*(((int)i-(int)100*((int)i/(int)100))/(int)10));
		strcat(fn,ssl);strcat(ifn,ssl);
		strcat(fn,ss);strcat(ifn,ss);
		strcat(fn_,ps);strcat(ifn_,ips);
		strcat(fn_,ssl);strcat(ifn_,ssl);
		}else{
		strcat(fn,ps);strcat(ifn,ips);
		ssl[strlen(ssl)]=(char)(48+((int)i/(int)1000));
		ssl[strlen(ssl)]=(char)(48+(i-(int)1000*((int)i/(int)1000))/(int)100);
		ssl[strlen(ssl)]=(char)(48+((((int)i-(int)1000*((int)i/(int)1000))-(int)100*(((int)i-(int)1000*((int)i/(int)1000))/(int)100))/10));
		ssl[strlen(ssl)]=(char)(48+((int)i-(int)1000*((int)i/(int)1000))-(int)100*(((int)i-(int)1000*((int)i/(int)1000))/(int)100)
		-(int)10*((((int)i-(int)1000*((int)i/(int)1000))-(int)100*(((int)i-(int)1000*((int)i/(int)1000))/(int)100))/10));		
		strcat(fn,ssl);strcat(ifn,ssl);
		strcat(fn,ss);strcat(ifn,ss);
		strcat(fn_,ps);strcat(ifn_,ips);
		strcat(fn_,ssl);strcat(ifn_,ssl);
		}
		}
		}
		printf("%s\n",fn);p_1(i,fn,ifn);
		for(j=0;j<256;++j){ssl[j]='\0';}ssl[strlen(ssl)]=(char)(48+4);p_2(fn,fn_,4,ssl);p_2(ifn,ifn_,4,ssl);
		for(j=0;j<256;++j){ssl[j]='\0';}ssl[strlen(ssl)]=(char)(48+5);p_2(fn,fn_,5,ssl);p_2(ifn,ifn_,5,ssl);
		for(j=0;j<256;++j){ssl[j]='\0';}ssl[strlen(ssl)]=(char)(48+6);p_2(fn,fn_,6,ssl);p_2(ifn,ifn_,6,ssl);
		for(j=0;j<256;++j){ssl[j]='\0';}ssl[strlen(ssl)]=(char)(48+7);p_2(fn,fn_,7,ssl);p_2(ifn,ifn_,7,ssl);
		for(j=0;j<256;++j){ssl[j]='\0';}ssl[strlen(ssl)]=(char)(48+8);p_2(fn,fn_,8,ssl);p_2(ifn,ifn_,8,ssl);
		for(j=0;j<256;++j){ssl[j]='\0';}ssl[strlen(ssl)]=(char)(48+9);p_2(fn,fn_,9,ssl);p_2(ifn,ifn_,9,ssl);
		for(j=0;j<256;++j){ssl[j]='\0';}ssl[strlen(ssl)]=(char)(48+((int)10/(int)10));
		ssl[strlen(ssl)]=(char)(48+10-(int)10*((int)10/(int)10));p_2(fn,fn_,10,ssl);p_2(ifn,ifn_,10,ssl);
		for(j=0;j<256;++j){ssl[j]='\0';}ssl[strlen(ssl)]=(char)(48+((int)11/(int)10));
		ssl[strlen(ssl)]=(char)(48+11-(int)10*((int)11/(int)10));p_2(fn,fn_,11,ssl);p_2(ifn,ifn_,11,ssl);
		for(j=0;j<256;++j){ssl[j]='\0';}ssl[strlen(ssl)]=(char)(48+((int)12/(int)10));
		ssl[strlen(ssl)]=(char)(48+12-(int)10*((int)12/(int)10));p_2(fn,fn_,12,ssl);p_2(ifn,ifn_,12,ssl);
		for(j=0;j<256;++j){ssl[j]='\0';}ssl[strlen(ssl)]=(char)(48+((int)13/(int)10));
		ssl[strlen(ssl)]=(char)(48+13-(int)10*((int)13/(int)10));p_2(fn,fn_,13,ssl);p_2(ifn,ifn_,13,ssl);
		for(j=0;j<256;++j){ssl[j]='\0';}ssl[strlen(ssl)]=(char)(48+((int)14/(int)10));
		ssl[strlen(ssl)]=(char)(48+14-(int)10*((int)14/(int)10));p_2(fn,fn_,14,ssl);p_2(ifn,ifn_,14,ssl);
		for(j=0;j<256;++j){ssl[j]='\0';}ssl[strlen(ssl)]=(char)(48+((int)15/(int)10));
		ssl[strlen(ssl)]=(char)(48+15-(int)10*((int)15/(int)10));p_2(fn,fn_,15,ssl);p_2(ifn,ifn_,15,ssl);
        	remove(fn);remove(ifn);

	}
	return(0);
}