//	This source and similar text sequences method apply to GNU General Public License. 
//			Copyright (C) 2001-2014 Jasenko Dzinleski 

//		This program is free software; you can redistribute it
//	and/or modify it under the terms of the GNU General Public License as
//	published by the Free Software Foundation; either version 2 of the
//	License, or (at your option) any later version. 

//	This program is distributed in the hope that it will be useful, but
//	WITHOUT ANY WARRANTY; without even the implied warranty of
//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
//	General Public License for more details. 

//	You should have received a copy of the GNU General Public License along
//	with this program; if not, write to the Free Software Foundation, Inc.,
//	51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

//  Similar Text Sequences 
//	written by Dzinleski Jasenko  March 2014


#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>

FILE		*f1,*f2;

char		infn[256]="fasta_.txt";

char		sv1[256]="T__G__CAAG__";int sv1i=strlen(sv1);
char		sv2[256]="T__T__CAAG__";int sv2i=strlen(sv2);
char		sv3[256]="G__T__CAAG__";int sv3i=strlen(sv3);
char		sv4[256]="T__C__CAAG__";int sv4i=strlen(sv4);

char		csv[256];		int csvi=0;

char		cv[256];int cvi=0;
char		nl[256];int nli=0;

int main(int argc,char *argv[])
{

int	a,b,c1,c2,c3,c4,d,e;
int	i,j,k,l,m,n;
int	fb;
int	nc=1;

	f1=fopen(infn,"rb");
	fb=getc(f1);
	while(!feof(f1))
	{
		cvi=0;while((fb!=10)&&(fb!=13)){cv[cvi]=fb;++cvi;fb=getc(f1);}
		if(nli==0){while((fb==10)||(fb==13)){nl[nli]=fb;++nli;fb=getc(f1);}}else{
			j=0;while((fb==10)||(fb==13)){if(nl[j]==fb){++j;if(j==nli){j=0;++nc;}}fb=getc(f1);}
		}

		//printf("%s\n",cv);

		c1=0;for(i=0;i<256;++i){csv[i]='\0';}csvi=0;
		for(k=0;k<cvi;++k)
		{
			m=0;for(j=0;j<strlen(sv1);++j){if(sv1[j]!='_'){if(sv1[j]!=cv[j+k]){break;}else{++m;csv[csvi]=cv[j+k];++csvi;}}else{++m;csv[csvi]=cv[j+k];++csvi;}}
			if(strlen(csv)==strlen(sv1)){++c1;/*break;*/}
			for(i=0;i<256;++i){csv[i]='\0';}csvi=0;
		}

		c2=0;for(i=0;i<256;++i){csv[i]='\0';}csvi=0;
		for(k=0;k<cvi;++k)
		{
			m=0;for(j=0;j<strlen(sv2);++j){if(sv2[j]!='_'){if(sv2[j]!=cv[j+k]){break;}else{++m;csv[csvi]=cv[j+k];++csvi;}}else{++m;csv[csvi]=cv[j+k];++csvi;}}
			if(strlen(csv)==strlen(sv2)){++c2;/*break;*/}
			for(i=0;i<256;++i){csv[i]='\0';}csvi=0;
		}

		c3=0;for(i=0;i<256;++i){csv[i]='\0';}csvi=0;
		for(k=0;k<cvi;++k)
		{
			m=0;for(j=0;j<strlen(sv3);++j){if(sv3[j]!='_'){if(sv3[j]!=cv[j+k]){break;}else{++m;csv[csvi]=cv[j+k];++csvi;}}else{++m;csv[csvi]=cv[j+k];++csvi;}}
			if(strlen(csv)==strlen(sv3)){++c3;/*break;*/}
			for(i=0;i<256;++i){csv[i]='\0';}csvi=0;
		}

		c4=0;for(i=0;i<256;++i){csv[i]='\0';}csvi=0;
		for(k=0;k<cvi;++k)
		{
			m=0;for(j=0;j<strlen(sv4);++j){if(sv4[j]!='_'){if(sv4[j]!=cv[j+k]){break;}else{++m;csv[csvi]=cv[j+k];++csvi;}}else{++m;csv[csvi]=cv[j+k];++csvi;}}
			if(strlen(csv)==strlen(sv4)){++c4;/*break;*/}
			for(i=0;i<256;++i){csv[i]='\0';}csvi=0;
		}

		if(c1>=1||c2>=1||c3>=1||c4>=1)
		{
			printf("%02d%02d%02d%02d\t",c1,c2,c3,c4);
			for(j=0;j<cvi;++j){printf("%c",cv[j]);}
			printf("\t%s\t%s",csv,sv1);printf("\t%d\n",nc);
		}
	}
	return(0);

}