//	This source and Text Sequence Browsing by their Complexity method apply to GNU General Public License. 
//			Copyright (C) 2001-2017 Jasenko Dzinleski 

//		This program is free software; you can redistribute it
//	and/or modify it under the terms of the GNU General Public License as
//	published by the Free Software Foundation; either version 2 of the
//	License, or (at your option) any later version. 

//	This program is distributed in the hope that it will be useful, but
//	WITHOUT ANY WARRANTY; without even the implied warranty of
//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
//	General Public License for more details. 

//	You should have received a copy of the GNU General Public License along
//	with this program; if not, write to the Free Software Foundation, Inc.,
//	51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

//  	Text Sequence Browsing by their Complexity  
//	written by Dzinleski Jasenko  December 2016 , Jun 2017


#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

FILE		*f1,*f2,*f3,*f4,*f5;

char	infn[256];
char	out1fn[256]="out1.txt";
char	out2fn[256]="out2.txt";
char	out3fn[256]="out3.txt";
char	out4fn[256]="out4.txt";
char	sv[256];
char	sv_[256];
char    svc[256];
int	ca[100][100];

int	ba16[256][256][2];int ba16i;
int	df=0;

long long laH[100000];int laHi=0;
int       lac[100000]; 

double mpow(int d,int a){if(!a){return(1);}else{if(a==1){return(d);}else{double b=d;int c=2;while(c<=a){b*=d;++c;}return(b);}}}

int main(int argc,char *argv[])
{

int	i,j,k,l,m,n;
int	a,b,c,cc=0;
int	fb,fb_;

long long lb;
char    lbs[256];
int sc;

	//if(argc<2){return(0);}
	if(strlen(argv[1])==0){return(0);}
	//if(strlen(argv[2])==0){return(0);}

	strcpy(infn,argv[1]);
	//strcpy(outfn,argv[2]);

	f1=fopen(infn,"rb");
	f2=fopen(out1fn,"wb");
	f3=fopen(out2fn,"wb");
	f4=fopen(out3fn,"wb");
    f5=fopen(out4fn,"wb");
	fb=getc(f1);
	while(!feof(f1))
	{
		for(i=0;i<256;++i){sv[i]='\0';}i=0;
		while((fb!=10)&&(fb!=13)){sv[i]=fb;++i;fb=getc(f1);}
		while((fb==10)||(fb==13)){fb=getc(f1);}++cc;

		for(i=0;i<100;++i){for(j=0;j<100;++j){ca[i][j]=-1;}}
		a=0;for(i=0;i<strlen(sv)-1;++i)
		{
			if(sv[i]==sv[i+1])
			{
				c=i;a=1;b=1+i;
				while(sv[c]==sv[b]&&b<strlen(sv)){++a;++b;}i=b-1;
				if(ca[a][0]==-1){ca[a][0]=1;}else{++ca[a][0];}ca[a][ca[a][0]]=c;}
		}
		for(i=0;i<256;++i){for(j=0;j<256;++j){ba16[i][j][0]=-1;ba16[i][j][1]=0;}}ba16i=0;
		for(i=0;i<100;++i)
		{
		if(ca[i][0]!=-1){for(j=1;j<=ca[i][0];++j){
			if(ba16[ca[i][j]][i][0]==-1)
			{ba16[ca[i][j]][i][0]=ba16i;++ba16i;++ba16[ca[i][j]][i][1];}else{++ba16[ca[i][j]][i][1];}
            
		}}
		}
		fprintf(f4,"%s",sv);fprintf(f4,"\t%09d\n",cc);
        lb=0;sc=0;for(i=0;i<256;++i){lbs[i]='\0';}
		for(i=0;i<256;++i){for(j=0;j<256;++j){if(ba16[i][j][0]!=-1){fprintf(f2,"%d!%c!%d\t",j,sv[i],i);fprintf(f5,"%d%c",j,sv[i]);
            if(sv[i]=='A'){lbs[sc]='1';lbs[++sc]='0';lbs[++sc]='0';}else{
                if(sv[i]=='C'){lbs[sc]='0';lbs[++sc]='1';lbs[++sc]='0';}else{
                    if(sv[i]=='G'){lbs[sc]='1';lbs[++sc]='1';lbs[++sc]='0';}else{
                        if(sv[i]=='T'){lbs[sc]='0';lbs[++sc]='0';lbs[++sc]='1';}else{lbs[sc]='1';lbs[++sc]='0';lbs[++sc]='1';}
            }}}++sc;
        }}}//printf("%d %s\n",strlen(lbs),lbs);
        lb=0;for(i=0;i<strlen(lbs)&&i<64;++i){if(lbs[i]=='1'){lb|=(long long)1<<i;}}
        if(!laHi){laH[laHi]=lb;lac[laHi]=1;++laHi;}else{
            for(i=0;i<laHi;++i){if(laH[i]==lb){++lac[i];break;}}if(i==laHi){laH[laHi]=lb;lac[laHi]=1;++laHi;}
        }
        fprintf(f5,"\t%09d\n",cc);fprintf(f2,"\t%09d\n",cc);
        
		for(i=0;i<256;++i){for(j=0;j<256;++j){ba16[i][j][0]=-1;ba16[i][j][1]=0;}}ba16i=0;
		for(i=0;i<100;++i)
		{
		if(ca[i][0]!=-1){for(j=1;j<=ca[i][0];++j){
			if(ba16[i][(int)sv[ca[i][j]]][0]==-1)
			{ba16[i][(int)sv[ca[i][j]]][0]=ba16i;++ba16i;++ba16[i][(int)sv[ca[i][j]]][1];}else{++ba16[i][(int)sv[ca[i][j]]][1];}
            
		}}
		}
		for(i=0;i<256;++i){for(j=0;j<256;++j){if(ba16[i][j][0]!=-1){fprintf(f3,"%d!%c!%d\t",i,j,ba16[i][j][1]);}}}fprintf(f3,"\t%09d\n",cc);


	}
	fclose(f1);
	fclose(f2);
    fclose(f3);
    fclose(f4);
    fclose(f5);

    //printf("%d\n",laHi);
    
    for(i=0;i<laHi;++i)
    {
        for(j=0;j<256;++j){lbs[j]='\0';svc[j]='\0';}
        k=0;for(j=0;j<64;++j){lb=(long long)1<<j;if(((laH[i]&lb)>>j)==1){lbs[k]='1';}else{lbs[k]='0';}++k;}
        //printf("%s\n",lbs);
        l=0;sc=0;while(sc<strlen(lbs))
        {
        if(lbs[sc]=='1'&&lbs[1+sc]=='0'&&lbs[1+1+sc]=='0'){printf("A");svc[l]='A';++l;}else{
            if(lbs[sc]=='0'&&lbs[1+sc]=='1'&&lbs[1+1+sc]=='0'){printf("C");svc[l]='C';++l;}else{
                if(lbs[sc]=='1'&&lbs[1+sc]=='1'&&lbs[1+1+sc]=='0'){printf("G");svc[l]='G';++l;}else{
                    if(lbs[sc]=='0'&&lbs[1+sc]=='0'&&lbs[1+1+sc]=='1'){printf("T");svc[l]='T';++l;}else{printf("_");}
        }}}sc+=3;
        }printf("\t%d\n",lac[i]);
        if(lac[i]>1)
        {
            f5=fopen(out4fn,"rb");
        	fb=getc(f5);
        	while(!feof(f5))
        	{
        		for(j=0;j<256;++j){sv[j]='\0';}j=0;l=0;k=0;
        		while((fb!=10)&&(fb!=13)){sv[j]=fb;++j;if(fb!='\t'&&!(fb>='0'&&fb<='9')){if(sv[j-1]==svc[l]){++k;}++l;}fb=getc(f5);}
        		while((fb==10)||(fb==13)){fb=getc(f5);}
                if(k==l)
                {
                    printf("%s\n",sv);
                    f4=fopen(out3fn,"rb");
                	fb_=getc(f4);
                	while(!feof(f4))
                	{
                		for(j=0;j<256;++j){sv_[j]='\0';}j=0;
                		while((fb_!=10)&&(fb_!=13)){sv_[j]=fb_;++j;if(fb_=='\t'){l=j-1;}fb_=getc(f4);}
                		while((fb_==10)||(fb_==13)){fb_=getc(f4);}
                        k=strlen(sv)-1;for(j=strlen(sv_)-1;j>l;--j){if(sv_[j]!=sv[k]){break;}else{--k;}}
                        if(j==l){printf("%s\n",sv_);}//printf("%s\n",sv_);
                    }
                    fclose(f4);
                }
            }
            fclose(f5);
        }
    }

    return(0);

}