//	This source and Longest text sequence from Text Subset Sequnce Length Count method apply to GNU General Public License. 
//			Copyright (C) 2001-2017 Jasenko Dzinleski 

//		This program is free software; you can redistribute it
//	and/or modify it under the terms of the GNU General Public License as
//	published by the Free Software Foundation; either version 2 of the
//	License, or (at your option) any later version. 

//	This program is distributed in the hope that it will be useful, but
//	WITHOUT ANY WARRANTY; without even the implied warranty of
//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
//	General Public License for more details. 

//	You should have received a copy of the GNU General Public License along
//	with this program; if not, write to the Free Software Foundation, Inc.,
//	51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

//  	Longest text sequence from Text Subset Sequnce Length Count  
//	written by Dzinleski Jasenko  December 2016 , July 2017

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <math.h>

FILE		*f1,*f2;

char		infn[256];
char		outfn[256]="out.txt";

char		sv[256];
long long 	laH[1000000];int laHi=0;
int       	lac[1000000]; 
char    	svc[256];
long long 	lb;
char    	lbs[256];
int 		sc;

double mpow(int d,int a){if(!a){return(1);}else{if(a==1){return(d);}else{double b=d;int c=2;while(c<=a){b*=d;++c;}return(b);}}}

int p_3(char infn_[256],char outfn_[256])
{

int	a,b,c,d,cL,h;
int	i,j,k,l,m,n;
int	fb;

char 	cv_[100][256];int cv_i=0;

	f1=fopen(infn_,"rb");
	f2=fopen(outfn_,"w");

	fb=getc(f1);
	for(i=0;i<256;++i){cv_[cv_i][i]='\0';}
	a=0;while((fb!=10)&&(fb!=13)&&!feof(f1)){cv_[cv_i][a]=fb;++a;fb=getc(f1);}
	while((fb==10)||(fb==13)&&!feof(f1)){fb=getc(f1);}++cv_i;

	for(i=0;i<256;++i){cv_[cv_i][i]='\0';}
	a=cv_i;i=0;while(i<a){cv_[cv_i][i]=' ';++i;}while((fb!=10)&&(fb!=13)&&!feof(f1)){cv_[cv_i][a]=fb;++a;fb=getc(f1);}
	while((fb==10)||(fb==13)&&!feof(f1)){fb=getc(f1);}

	b=cv_i;while(cv_[-1+cv_i][b]==cv_[cv_i][b]){++b;}

	while(!feof(f1))
	{

		while(cv_i<100&&!feof(f1))
		{
			if(b==strlen(cv_[-1+cv_i]))
			{
				++cv_i;
				for(i=0;i<256;++i){cv_[cv_i][i]='\0';}
				a=cv_i;i=0;while(i<a){cv_[cv_i][i]=' ';++i;}while((fb!=10)&&(fb!=13)&&!feof(f1)){cv_[cv_i][a]=fb;++a;fb=getc(f1);}
				while((fb==10)||(fb==13)&&!feof(f1)){fb=getc(f1);}
	
			}else{
	
				i=0;while(i<strlen(cv_[0])){fprintf(f2,"%c",cv_[0][i]);++i;}
				j=strlen(cv_[0]);i=cv_i-1;while(j<strlen(cv_[i])){fprintf(f2,"%c",cv_[i][j]);++j;}
				fprintf(f2,"\n");break;
			}
			b=cv_i;while(cv_[-1+cv_i][b]==cv_[cv_i][b]){++b;}
		}

		a=0;for(i=cv_i;i<256;++i){cv_[0][a]=cv_[cv_i][i];++a;}
		cv_i=1;
		for(i=0;i<256;++i){cv_[cv_i][i]='\0';}
		a=cv_i;i=0;while(i<a){cv_[cv_i][i]=' ';++i;}while((fb!=10)&&(fb!=13)&&!feof(f1)){cv_[cv_i][a]=fb;++a;fb=getc(f1);}
		while((fb==10)||(fb==13)&&!feof(f1)){fb=getc(f1);}

		b=1;while(cv_[-1+cv_i][b]==cv_[cv_i][b]){++b;}

	}
	fclose(f1);
	fclose(f2);

	return(0);
	
}

int p_2(int sl)
{
int i,j;

	for(j=0;j<strlen(sv)-sl;++j)
	{
        	lb=0;sc=0;for(i=0;i<256;++i){lbs[i]='\0';}
		for(i=j;i<j+sl;++i)
		{
            		if(sv[i]=='A'){lbs[sc]='1';lbs[++sc]='0';lbs[++sc]='0';}else{
                		if(sv[i]=='C'){lbs[sc]='0';lbs[++sc]='1';lbs[++sc]='0';}else{
                    			if(sv[i]=='G'){lbs[sc]='1';lbs[++sc]='1';lbs[++sc]='0';}else{
                        			if(sv[i]=='T'){lbs[sc]='0';lbs[++sc]='0';lbs[++sc]='1';}else{lbs[sc]='1';lbs[++sc]='0';lbs[++sc]='1';}
            		}}}++sc;
		}
		lb=0;for(i=0;i<strlen(lbs)&&i<64;++i){if(lbs[i]=='1'){lb|=(long long)1<<i;}}
		        if(!laHi){laH[laHi]=lb;lac[laHi]=1;++laHi;}else{
		            for(i=0;i<laHi;++i){if(laH[i]==lb){++lac[i];break;}}if(i==laHi){laH[laHi]=lb;lac[laHi]=1;++laHi;}
		}
	}
	return(0);
}

int p_1(char infn_[256],int sl,int st,int al)
{
int	i,j,k,l,cc=0;
int	fb;
int	ec;

	laHi=0;for(i=0;i<256;++i){lac[i]=0;}

	f1=fopen(infn_,"rb");
	fb=getc(f1);
	while(!feof(f1))
	{
		for(i=0;i<256;++i){sv[i]='\0';}i=0;
		while((fb!=10)&&(fb!=13)){sv[i]=fb;++i;fb=getc(f1);}
		while((fb==10)||(fb==13)){fb=getc(f1);}++cc;
		if(cc>=st){p_2(sl);if(laHi>=al){printf("%d - Exiting at %d\n",sl,cc);break;}}
	}
	if(feof(f1)){cc=-1;}
	fclose(f1);

    	ec=0;for(i=0;i<laHi;++i){if(lac[i]==1){++ec;}}
    	for(i=0;i<laHi;++i)
    	{
		if(lac[i]>1)
		{
	        	for(j=0;j<256;++j){lbs[j]='\0';svc[j]='\0';}
	        	k=0;for(j=0;j<64;++j){lb=(long long)1<<j;if(((laH[i]&lb)>>j)==1){lbs[k]='1';}else{lbs[k]='0';}++k;}
	        	//printf("%s\n",lbs);
	        	l=0;sc=0;while(sc<strlen(lbs))
	        	{
	        	if(lbs[sc]=='1'&&lbs[1+sc]=='0'&&lbs[1+1+sc]=='0'){svc[l]='A';++l;}else{
	            		if(lbs[sc]=='0'&&lbs[1+sc]=='1'&&lbs[1+1+sc]=='0'){svc[l]='C';++l;}else{
	                		if(lbs[sc]=='1'&&lbs[1+sc]=='1'&&lbs[1+1+sc]=='0'){svc[l]='G';++l;}else{
	                    			if(lbs[sc]=='0'&&lbs[1+sc]=='0'&&lbs[1+1+sc]=='1'){svc[l]='T';++l;}else{svc[l]='_';++l;}
	        	}}}sc+=3;
	        	}
			k=0;for(j=0;j<strlen(svc);++j){if(svc[j]=='_'){++k;}}
			if(k!=strlen(svc))
			{
				//fprintf(f2,"%s\t%d\t%d\t%e\n",svc,strlen(svc)-k,lac[i],log((double)ec/(double)lac[i]));
				//fprintf(f2,"%s\n",svc);
				for(j=0;j<strlen(svc);++j){if(svc[j]!='_'){fprintf(f2,"%c",svc[j]);}}fprintf(f2,"\n");
			}		
		}
	}
	
	return(cc);
}

int main(int argc,char *argv[])
{

int	i,j,k,l,m,n;
int	a,b,c,cc=0;
char	sp1[256];
char	sp2[256];
int	ip1,ip1_,ip2;

	if(argc<3){return(0);}else{strcpy(infn,argv[1]);f1=fopen(infn,"rb");if(f1==NULL){return(0);}fclose(f1);}
	
	strcpy(sp1,argv[2]);
	strcpy(sp2,argv[3]);

	ip1=0;j=strlen(sp1)-1;for(i=0;i<strlen(sp1);++i){ip1+=((int)sp1[i]-48)*(int)mpow(10,j);--j;}
	ip2=0;j=strlen(sp2)-1;for(i=0;i<strlen(sp2);++i){ip2+=((int)sp2[i]-48)*(int)mpow(10,j);--j;}ip1_=ip1;

	f2=fopen(outfn,"wb");
	i=0;while(i!=-1){i=p_1(infn,10,ip1,ip2);ip1=i;}
	ip1=ip1_;i=0;while(i!=-1){i=p_1(infn,11,ip1,ip2);ip1=i;}
	ip1=ip1_;i=0;while(i!=-1){i=p_1(infn,12,ip1,ip2);ip1=i;}
	ip1=ip1_;i=0;while(i!=-1){i=p_1(infn,13,ip1,ip2);ip1=i;}
	ip1=ip1_;i=0;while(i!=-1){i=p_1(infn,14,ip1,ip2);ip1=i;}
	ip1=ip1_;i=0;while(i!=-1){i=p_1(infn,15,ip1,ip2);ip1=i;}
	ip1=ip1_;i=0;while(i!=-1){i=p_1(infn,16,ip1,ip2);ip1=i;}
	ip1=ip1_;i=0;while(i!=-1){i=p_1(infn,17,ip1,ip2);ip1=i;}
	ip1=ip1_;i=0;while(i!=-1){i=p_1(infn,18,ip1,ip2);ip1=i;}
	ip1=ip1_;i=0;while(i!=-1){i=p_1(infn,19,ip1,ip2);ip1=i;}
	ip1=ip1_;i=0;while(i!=-1){i=p_1(infn,20,ip1,ip2);ip1=i;}
	ip1=ip1_;i=0;while(i!=-1){i=p_1(infn,21,ip1,ip2);ip1=i;}
	fclose(f2);

	p_3("out.txt","out_.txt");

	return(0);
}