//	This source and similar text sequences method apply to GNU General Public License. 
//			Copyright (C) 2001-2014 Jasenko Dzinleski 

//		This program is free software; you can redistribute it
//	and/or modify it under the terms of the GNU General Public License as
//	published by the Free Software Foundation; either version 2 of the
//	License, or (at your option) any later version. 

//	This program is distributed in the hope that it will be useful, but
//	WITHOUT ANY WARRANTY; without even the implied warranty of
//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
//	General Public License for more details. 

//	You should have received a copy of the GNU General Public License along
//	with this program; if not, write to the Free Software Foundation, Inc.,
//	51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

//  Similar Text Sequences 
//	written by Dzinleski Jasenko  March 2014


#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>

FILE	*f1,*f2;

char	infn[256]="fasta__.txt";
char	infn_[256]="sl1.txt";

char	sv[1000][256];int svi=0;
int		nc_[1000];

char	sv1[256];int 	sv1i;

char	csv[256];int csvi=0;

char	cv[256];int cvi=0;
char	nl[256];int nli=0;	

int main(int argc,char *argv[])
{

int	a,b,c1,c2,c3,c4,c5,c6,c7,c8,d,e;
int	i,j,k,l,m,n;
int	fb;
int	nc=1;
int	mc,n_;

	for(i=0;i<1000;++i){for(j=0;j<256;++j){sv[i][j]='\0';}}

	f1=fopen(infn_,"rb");
	fb=getc(f1);
	while(!feof(f1))
	{
		j=0;while((fb!=10)&&(fb!=13)){sv[svi][j]=fb;++j;fb=getc(f1);}
		while((fb==10)||(fb==13)){fb=getc(f1);}
		//printf("%s\n",sv[svi]);
		nc_[svi]=0;++svi;
	}
	fclose(f1);printf("%d\n",svi);

	f1=fopen(infn,"rb");
	fb=getc(f1);
	while(!feof(f1))
	{
		cvi=0;while((fb!=10)&&(fb!=13)){cv[cvi]=fb;++cvi;fb=getc(f1);}
		if(nli==0){while((fb==10)||(fb==13)){nl[nli]=fb;++nli;fb=getc(f1);}}else{
			j=0;while((fb==10)||(fb==13)){if(nl[j]==fb){++j;if(j==nli){j=0;++nc;}}fb=getc(f1);}
		}

		c1=0;
		for(n=0;n<svi;++n)
		{

			for(j=0;j<strlen(sv[n]);++j){sv1[j]=sv[n][j];}
			for(j=strlen(sv[n]);j<256;++j){sv1[j]='\0';}
			sv1i=strlen(sv1);

			for(i=0;i<256;++i){csv[i]='\0';}csvi=0;
			for(k=0;k<cvi;++k)
			{
				m=0;for(j=0;j<strlen(sv1);++j){if(sv1[j]!='_'){if(sv1[j]!=cv[j+k]){break;}else{++m;csv[csvi]=cv[j+k];++csvi;}}else{++m;csv[csvi]=cv[j+k];++csvi;}}
				if(strlen(csv)==strlen(sv1)){++nc_[n];++c1;if(c1==1){/*printf("%s\t%s\t",sv1,cv);*//*printf("%s\t",sv1);*/}else{/*printf("%s\t",sv1);*/}}
				for(i=0;i<256;++i){csv[i]='\0';}csvi=0;
			}
		}
		//printf("%d\t%d\n",c1,nc);
		//if(c1>=35&&c1<=45){printf("%s\t%d\t%d\n",cv,c1,nc);}
	}
	for(n=0;n<svi;++n){if(nc_[n]!=0){printf("%d\t%s\n",nc_[n],sv[n]);}}

	return(0);

}