//	This source and similar text sequences method apply to GNU General Public License. 
//			Copyright (C) 2001-2014 Jasenko Dzinleski 

//		This program is free software; you can redistribute it
//	and/or modify it under the terms of the GNU General Public License as
//	published by the Free Software Foundation; either version 2 of the
//	License, or (at your option) any later version. 

//	This program is distributed in the hope that it will be useful, but
//	WITHOUT ANY WARRANTY; without even the implied warranty of
//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
//	General Public License for more details. 

//	You should have received a copy of the GNU General Public License along
//	with this program; if not, write to the Free Software Foundation, Inc.,
//	51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

//  		Similar Text Sequences 
//		written by Dzinleski Jasenko  November 2016

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <vector>
#include <iostream>


FILE		*f1,*f2;

char		infn[256]="out_s2_s16.txt";
char		outfn[256]="cout_s2_s16.txt";
char		cv[256];int cvi=0;

int		oa[32];int oai=0;
long 		naH[0xffff];int naHi=0;
long 		naL[0xffff];int naLi=0;

using std::vector; vector< vector<int> > na;
int	nac1=32,nac1_;
int	nac2=32,nac2_;
int	nai=0;

int main(int argc,char *argv[])
{

int	a,b,cL;
int	i,j,k,l,m,n;
int	fb;
int	nc=0;
int	nc_=0;
int	v;
char	sv[256];
char	ev[256];

int c[32];
int cn=1;

	if(argc<5){return(0);}
	if(strlen(argv[1])==0){return(0);}
	strcpy(infn,argv[1]);
	j=0;strcpy(sv,argv[2]);for(i=-1+strlen(sv);i>=0;--i){nc+=(-48+sv[i])*pow(10,j);++j;}//printf("%d\n",nc);
	j=0;strcpy(ev,argv[3]);for(i=-1+strlen(ev);i>=0;--i){nc_+=(-48+ev[i])*pow(10,j);++j;}//printf("%d\n",nc_);
	if(strlen(argv[4])==0){return(0);}
	strcpy(outfn,argv[4]);
	na.resize(nac1);for(i=0;i<nac1;++i){na[i].resize(nac2);}
	for(i=0;i<32;++i){for(j=0;j<32;++j){na[i][j]=-1;}}
	for(i=0;i<0xffff;++i){naH[i]=-1;naL[i]=-1;}	
	
	f1=fopen(infn,"rb");
	fb=getc(f1);
	while(!feof(f1)&&nc<nc_){while((fb!=10)&&(fb!=13)){fb=getc(f1);}while((fb==10)||(fb==13)){fb=getc(f1);++nc;}}
	while(!feof(f1))
	{
		cvi=0;while((fb!=10)&&(fb!=13)){cv[cvi]=fb;++cvi;fb=getc(f1);}
		while((fb==10)||(fb==13)){fb=getc(f1);}

		for(i=0;i<cvi&&i<64;++i){if(cv[i]>='0'&&cv[i]<='9'){}else{break;}}
		if(i!=64){continue;}++nc;
		//printf("%s\n",cv);

		i=0;
		v=10*(-48+(int)cv[0] )+(-48+(int)cv[1] );if(v){i|=1<<(32-1);}
		v=10*(-48+(int)cv[2] )+(-48+(int)cv[3] );if(v){i|=1<<(32-2);}
		v=10*(-48+(int)cv[4] )+(-48+(int)cv[5] );if(v){i|=1<<(32-3);}
		v=10*(-48+(int)cv[6] )+(-48+(int)cv[7] );if(v){i|=1<<(32-4);}
		v=10*(-48+(int)cv[8] )+(-48+(int)cv[9] );if(v){i|=1<<(32-5);}
		v=10*(-48+(int)cv[10])+(-48+(int)cv[11]);if(v){i|=1<<(32-6);}
		v=10*(-48+(int)cv[12])+(-48+(int)cv[13]);if(v){i|=1<<(32-7);}
		v=10*(-48+(int)cv[14])+(-48+(int)cv[15]);if(v){i|=1<<(32-8);}
		v=10*(-48+(int)cv[16])+(-48+(int)cv[17]);if(v){i|=1<<(32-9);}
		v=10*(-48+(int)cv[18])+(-48+(int)cv[19]);if(v){i|=1<<(32-10);}
		v=10*(-48+(int)cv[20])+(-48+(int)cv[21]);if(v){i|=1<<(32-11);}
		v=10*(-48+(int)cv[22])+(-48+(int)cv[23]);if(v){i|=1<<(32-12);}
		v=10*(-48+(int)cv[24])+(-48+(int)cv[25]);if(v){i|=1<<(32-13);}
		v=10*(-48+(int)cv[26])+(-48+(int)cv[27]);if(v){i|=1<<(32-14);}
		v=10*(-48+(int)cv[28])+(-48+(int)cv[29]);if(v){i|=1<<(32-15);}
		v=10*(-48+(int)cv[30])+(-48+(int)cv[31]);if(v){i|=1<<(32-16);}

		v=10*(-48+(int)cv[32] )+(-48+(int)cv[33] );if(v){i|=1<<(32-17);}
		v=10*(-48+(int)cv[34] )+(-48+(int)cv[35] );if(v){i|=1<<(32-18);}
		v=10*(-48+(int)cv[36] )+(-48+(int)cv[37] );if(v){i|=1<<(32-19);}
		v=10*(-48+(int)cv[38] )+(-48+(int)cv[39] );if(v){i|=1<<(32-20);}
		v=10*(-48+(int)cv[40] )+(-48+(int)cv[41] );if(v){i|=1<<(32-21);}
		v=10*(-48+(int)cv[42])+(-48+(int)cv[43]);if(v){i|=1<<(32-22);}
		v=10*(-48+(int)cv[44])+(-48+(int)cv[45]);if(v){i|=1<<(32-23);}
		v=10*(-48+(int)cv[46])+(-48+(int)cv[47]);if(v){i|=1<<(32-24);}
		v=10*(-48+(int)cv[48])+(-48+(int)cv[49]);if(v){i|=1<<(32-25);}
		v=10*(-48+(int)cv[50])+(-48+(int)cv[51]);if(v){i|=1<<(32-26);}
		v=10*(-48+(int)cv[52])+(-48+(int)cv[53]);if(v){i|=1<<(32-27);}
		v=10*(-48+(int)cv[54])+(-48+(int)cv[55]);if(v){i|=1<<(32-28);}
		v=10*(-48+(int)cv[56])+(-48+(int)cv[57]);if(v){i|=1<<(32-29);}
		v=10*(-48+(int)cv[58])+(-48+(int)cv[59]);if(v){i|=1<<(32-30);}
		v=10*(-48+(int)cv[60])+(-48+(int)cv[61]);if(v){i|=1<<(32-31);}
		v=10*(-48+(int)cv[62])+(-48+(int)cv[63]);if(v){i|=1<<(32-32);}

		if(naH[(i&0xffff0000)>>16]==-1){naH[(i&0xffff0000)>>16]=naHi;a=naHi;++naHi;}else{a=naH[(i&0xffff0000)>>16];}
		if(naL[(i&0x0000ffff)>>0]==-1){naL[(i&0x0000ffff)>>0]=naLi;b=naLi;++naLi;}else{b=naL[(i&0x0000ffff)>>0];}

		if((na.max_size()<(a*b))||(a>=nac1)||(b>=nac2))
		{
			if(a>=nac1)
			{
				nac1_=nac1;while(a>=nac1_){nac1_+=32;}
				if(na.max_size()<nac2*nac1_){printf("(Incomplete) exiting line %d\n",nc);break;}
				na.resize(nac1_);for(k=nac1;k<nac1_;++k){na[k].resize(nac2);}
				for(k=nac1;k<nac1_;++k){for(l=0;l<nac2;++l){na[k][l]=-1;}}		
				nac1=nac1_;
			}
			if(b>=nac2)
			{
				nac2_=nac2;while(b>=nac2_){nac2_+=32;}
				if(na.max_size()<nac1*nac2_){printf("(Incomplete) exiting line %d\n",nc);break;}
				for(k=0;k<nac1;++k){na[k].resize(nac2_);}
				for(k=0;k<nac1;++k){for(l=nac2;l<nac2_;++l){na[k][l]=-1;}}
				nac2=nac2_;
			}

		}
		if(na[a][b]==-1){na[a][b]=1;}else{++na[a][b];}
	}
	fclose(f1);
	f2=fopen(outfn,"wb");
	for(a=0;a<nac1;++a){for(b=0;b<nac2;++b){
		if(na[a][b]!=-1)
		{
			fprintf(f2,"%05d\t",na[a][b]);
			fprintf(f2,"\"");
			for(j=0;j<0xffff;++j){if(naH[j]==a){break;}}cL=j<<16;
			for(j=0;j<0xffff;++j){if(naL[j]==b){break;}}cL|=j<<0;
			oai=0;
			for(j=32-1;j>=0;--j)
			{
				k=cL<<((32-1)-j);k=k>>(32-1);
				if(k&0x1){fprintf(f2,"1");oa[oai]=32-j;++oai;}else{fprintf(f2,"0");}
			}fprintf(f2,"\"\t");for(j=0;j<oai;++j){fprintf(f2,"%d\t",oa[j]);}fprintf(f2,"\n");
		}
	}}
	fclose(f2);
	return(0);

}