//	This source and text sequence grouping method apply to GNU General Public License. 
//			Copyright (C) 2017 Jasenko Dzinleski 

//		This program is free software; you can redistribute it
//	and/or modify it under the terms of the GNU General Public License as
//	published by the Free Software Foundation; either version 2 of the
//	License, or (at your option) any later version. 

//	This program is distributed in the hope that it will be useful, but
//	WITHOUT ANY WARRANTY; without even the implied warranty of
//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
//	General Public License for more details. 

//	You should have received a copy of the GNU General Public License along
//	with this program; if not, write to the Free Software Foundation, Inc.,
//	51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

//  	Text Sequence Grouping 
//	written by Dzinleski Jasenko  May 2017


#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

FILE		*f1,*f2,*f3;

char		infn[256];
char		lsfn[256];
char		outfn[256]="clist_out_.txt";

char		sv1[256];
char		cv[256];int cvi=0,cvi_=0;

int		n1_,j1_;
int		n1,j1;

int 		csv1_[256];
int 		csv1[256];
char 		csv[256];
int		c_1,c_1_;

int		cnt[1200000][2];int cnti=0;

//--------------------------------------------------------------
	#include 	<unistd.h>
	#include 	<wait.h>

	int p_1_(char *ar[],char ofn[256])
	{
		int 	outf[2];
		int	status;
		pipe(outf);pid_t pid=fork();if(pid==0){while((status=execvp(ar[0],ar))<0){while(wait(&status)!= pid);}}
		return(0);
	}
//--------------------------------------------------------------
double mpow(int d,int a){if(!a){return(1);}else{if(a==1){return(d);}else{double b=d;int c=2;while(c<=a){b*=d;++c;}return(b);}}}
//--------------------------------------------------------------

int p_3_(char inf[256],char ls[256])
{

	int 	i,j,k,fbyte,cc=0;
	char	sb[2][256];

	f2=fopen64(inf,"rb");if(f1==NULL){return(0);} 
	fbyte=getc(f2);
	while(!feof(f2))
	{
		for(i=0;i<256;++i){sb[0][i]='\0';}
		j=0;k=0;while(!feof(f2)&&fbyte!=13&&fbyte!=10){sb[0][j]=fbyte;if(j<9){if(sb[0][j]==ls[j]){++k;}}++j;fbyte=getc(f2);}
		while(!feof(f1)&&(fbyte==13||fbyte==10)){fbyte=getc(f2);}
		if(9==k){printf("%s\n",sb[0]);++cc;}else{if(cc){break;}}
	}
	fclose(f2);

	return(0);
}

int p_4_(char inf[256],char inf_[256],char ls[256])
{

	int 	i,j,k,fbyte,cc=0;
	char	sb[2][256];
	char	sbi[256];

	f1=fopen64(inf,"rb");if(f1==NULL){return(0);} 
	fbyte=getc(f1);
	while(!feof(f1))
	{
		for(i=0;i<256;++i){sb[0][i]='\0';sbi[i]='\0';}
		j=0;k=0;while(!feof(f1)&&fbyte!=13&&fbyte!=10){sb[0][j]=fbyte;if(j<5){if(sb[0][j]==ls[j]){++k;}}++j;fbyte=getc(f1);}
		while(!feof(f1)&&(fbyte==13||fbyte==10)){fbyte=getc(f1);}
		if(5==k){k=-1+9;for(i=-1+strlen(sb[0]);k>=0;--i){sbi[k]=sb[0][i];--k;}printf("%s\n",sbi);p_3_(inf_,sbi);++cc;}else{if(cc){break;}}
	}
	fclose(f1);

	return(0);
}

int p_2_(char inf[256])
{

	int 	i,j,k,fbyte,cc=0;
	int	mcc=-1,ncc=0,mc=0,nc=0;
	char	sb[2][256];
	char	sbi[256];
	double	dmc=0;
	int	dmcc=1;

	for(i=0;i<256;++i){sbi[i]='\0';}
	for(i=0;i<256;++i){sb[0][i]='\0';}
	f1=fopen64(inf,"rb");if(f1==NULL){return(0);} 
	fbyte=getc(f1);j=0;while(!feof(f1)&&fbyte!=13&&fbyte!=10){sb[0][j]=fbyte;++j;fbyte=getc(f1);}
	while(!feof(f1)&&(fbyte==13||fbyte==10)){fbyte=getc(f1);}
	while(!feof(f1))
	{
		for(i=0;i<256;++i){sb[1][i]='\0';}
		j=0;k=0;while(!feof(f1)&&fbyte!=13&&fbyte!=10){sb[1][j]=fbyte;if(j<9){if(sb[0][j]==sb[1][j]){++k;}}++j;fbyte=getc(f1);}
		while(!feof(f1)&&(fbyte==13||fbyte==10)){fbyte=getc(f1);}
		if(9==k){if(mc){++mc;}else{++mc;++mc;}}else{
			//if(mc){printf("%s\t%d\n",sb[0],mc);}
			if(mcc==-1){mcc=mc;for(i=0;i<9;++i){sbi[i]=sb[0][i];}}else{
				j=0;for(i=-1+9;i>=0;--i){j+=(-48+(int)sb[0][i])*(long)mpow(10,(-1+9-i));}
				cnt[cnti][0]=j;cnt[cnti][1]=mc;++cnti;
				++cc;
				if(mcc<mc){mcc=mc;for(i=0;i<9;++i){sbi[i]=sb[0][i];}}
			}
			for(i=0;i<256;++i){sb[0][i]=sb[1][i];}
			mc=0;
		}		
	}
	fclose(f1);
	//printf("%s\t%d\t%d\n",sbi,mcc,cc);

	return(0);
}

int p_1(char sv1_[256])
{
	int i;
	if(strlen(sv1_)>1)
	{

		n1_=0;j1_=0;for(i=0;i<256;++i){csv1_[i]=-1;}
		for(i=0;i<strlen(sv1_);)
		{
			if(sv1_[i]!='_'){csv1_[j1_]=0;++j1_;++n1_;++i;}else{csv1_[j1_]=1;++i;while(sv1_[i]=='_'){++csv1_[j1_];++i;}++j1_;}
		}
		//for(i=0;i<j1;++i){printf("%d",csv1_[i]);}printf("\n");
	}else{return(-1);}
	return(1);
}

int p_2(char sv1_[256],int j1_,int n1_,int csv1_[256])
{
int i,l,n,m,k;
	c_1=0;c_1_=0;
	for(k=0;k<cvi_;++k)
	{
		m=0;l=0;i=0;
		while(l<j1_)
		{
			if(csv1_[l]){i+=csv1_[l];--i;}else{if(sv1_[i]!=cv[i+k]){break;}else{/*printf("%c",sv1[i]);*/if(m==0){n=k;}++m;}}
			++i;++l;
		}
		if(m==n1_){if(n<cvi){++c_1;}else{++c_1_;}}
	}
	return(0);	
}

int p_3(int cl_)
{

int	a,b,cc;
int	c1=0,c1_=0;
int	d,e;
int	i,j,k,l,m,n;
int	fb;

	cvi=0;
	f1=fopen(infn,"rb");
	fb=getc(f1);
	for(i=0;i<256;++i){cv[i]='\0';}cvi=0;
	while((fb!=10)&&(fb!=13)){cv[cvi]=fb;++cvi;fb=getc(f1);}cvi_=cvi;
	while((fb==10)||(fb==13)){fb=getc(f1);}cc=1;
	while(!feof(f1))
	{
		while((fb!=10)&&(fb!=13)){cv[cvi_]=fb;++cvi_;fb=getc(f1);}
		while((fb==10)||(fb==13)){fb=getc(f1);}
		if(strlen(sv1)>1){i=p_2(sv1,j1,n1,csv1);
		if(c_1){
		/*fprintf(f3,"%09d\t%03d\n",cc,cl_);*/
		fprintf(f3,"%09d\t",cc);for(i=0;i<cvi;++i){fprintf(f3,"%c",cv[i]);}fprintf(f3,"\t%s\t%03d\n",sv1,cl_);
		}c1+=c_1;c1_=c_1_;}
		j=0;for(i=cvi;i<cvi_;++i){cv[j]=cv[i];++j;}cvi=j;cvi_=j;++cc;
		for(i=cvi_;i<256;++i){cv[i]='\0';}
	}
	fclose(f1);
	//printf("%s",sv1);printf("\t%d\n",c1);

	return(0);

}

int main(int argc,char *argv[])
{

int	a,b,cc=0;
int	d,e;
int	i,j,k,l,m,n;
int	fb;

	if(argc<2){return(0);}
	if(strlen(argv[1])==0){return(0);}
	if(strlen(argv[2])==0){return(0);}

	strcpy(infn,argv[1]);
	strcpy(lsfn,argv[2]);

	f2=fopen(lsfn,"rb");
	f3=fopen(outfn,"wb");
	fb=getc(f2);
	while(!feof(f2))
	{
		for(i=0;i<256;++i){sv1[i]='\0';}i=0;
		while((fb!=10)&&(fb!=13)){sv1[i]=fb;++i;fb=getc(f2);}
		while((fb==10)||(fb==13)){fb=getc(f2);}
		i=p_1(sv1);n1=n1_;j1=j1_;for(i=0;i<256;++i){csv1[i]=csv1_[i];}p_3(cc);
		++cc;
	}
	fclose(f2);
	fclose(f3);

	char *cm[]={"sort","clist_out_.txt","-osclist_out_.txt",NULL};p_1_(cm,"out.txt");

	p_2_("sclist_out_.txt");
	f2=fopen("csclist_out_l3.txt","wb");
	for(i=0;i<cnti;++i){fprintf(f2,"%05d\t%09d\n",cnt[i][1],cnt[i][0]);}
	fclose(f2);
	char *cm1[]={"sort","csclist_out_l3.txt","-oscsclist_out_l3.txt",NULL};p_1_(cm1,"out.txt");
	p_4_("scsclist_out_l3.txt","sclist_out_l3.txt","00004");

	return(0);

}