//	This source and Text sequence patterns method apply to GNU General Public License. 
//			Copyright (C) 2013  Jasenko Dzinleski 

//	This source applies to the GNU General Public License as
//	published by the Free Software Foundation 
//	and can not be used, copied, sold, redistributed or 
//	used in any other way but only by written permission by Jasenko Dzinleski . 
//	Copyright (C) from 2001 - 2012 and later by Jasenko Dzinleski 

//	This program is distributed in the hope that it will be useful, but
//	WITHOUT ANY WARRANTY; without even the implied warranty of
//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
//	General Public License for more details. 

//	You should have received a copy of the GNU General Public License along
//	with this program; if not, write to the Free Software Foundation, Inc.,
//	51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

//
//		Text sequence patterns
//		written by Dzinleski Jasenko  October , 2013
//


#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>

FILE	*f1;

char	infn[256]="fasta_.txt";

long 	ba2b[256][256][4];	
long 	ba2bi=0;
long	r_[0xffffff][4];
long	ri_=0;

int	b8o,b8e;
int	df=0;

int 	bd[300000][3];
int	bdi=0;

long 	ca=0;
long 	cb=0;

long	mc=0,unmc=0;
long	mc_=0,unmc_=0;
long	fc=1;

int p_1()
{

int	a,b,c,d,e;
int	i,j,k,l,m;

int	b1_,b2_,b3_;
int	b1,b2,b3;
int	b11_,b21_,b31_;

int	b2c_,b2i_=0;

int	d1,d2,d3;
int	d1_,d2_,d3_;

int	fbyte;
char	nl[256];int nli=0;

	for(i=0;i<256;++i){for(j=0;j<256;++j){ba2b[i][j][0]=-1;}}ba2bi=0;
	for(ri_=0;ri_<0xffffff;++ri_){r_[ri_][0]=-1;}ri_=0;

	f1=fopen(infn,"rb");

	b1_=getc(f1);
	b2_=getc(f1);
	b3_=getc(f1);

	while(!feof(f1))
	{

		b1=b1_<<1;
		b2=b2_<<1;
		b3=b3_<<1;

		d1=((((b1&b8e)>>1)^(b2&b8o))|((b1&b8e)^((b2&b8o)<<1)));	
		d2=((((b1&b8e)>>1)^(b3&b8o))|((b1&b8e)^((b3&b8o)<<1)));
		d3=((((d1&b8e)>>1)^(d2&b8o))|((d1&b8e)^((d2&b8o)<<1)));

		d1_=((((b2&b8e)>>1)^(b1&b8o))|((b2&b8e)^((b1&b8o)<<1)));	
		d2_=((((b2&b8e)>>1)^(b3&b8o))|((b2&b8e)^((b3&b8o)<<1)));
		d3_=((((d2_&b8e)>>1)^(d1_&b8o))|((d2_&b8e)^((d1_&b8o)<<1)));

		if((ba2b[(b1_&0xff)][(b3_&0xff)][3]!=d3)||(ba2b[(b1_&0xff)][(b3_&0xff)][4]!=d3_)){++unmc_;}
		if((ba2b[(b1_&0xff)][(b3_&0xff)][3]!=d3)&&(ba2b[(b1_&0xff)][(b3_&0xff)][4]!=d3_)){++mc_;}

		if(ba2b[(b1_&0xff)][(b3_&0xff)][0]==-1)
		{
			ba2b[(b1_&0xff)][(b3_&0xff)][0]=ba2bi;++ba2bi;
			ba2b[(b1_&0xff)][(b3_&0xff)][1]=1;
			ba2b[(b1_&0xff)][(b3_&0xff)][3]=d3;
			ba2b[(b1_&0xff)][(b3_&0xff)][4]=d3_;

		}else{
			++ba2b[(b1_&0xff)][(b3_&0xff)][1];
		}

		if(b2i_==0)
		{
			b2c_=0;b2c_|=b2_<<16;++b2i_;
		}else{
			if(b2i_==1){b2c_|=b2_<<8;++b2i_;}else{
				b2c_|=b2_<<0;b2i_=0;

				b1=(((b2c_&0xff0000)>>16)<<1);
				b2=(((b2c_&0x00ff00)>>8)<<1);
				b3=(((b2c_&0x0000ff)>>0)<<1);

				d1=((((b1&b8e)>>1)^(b2&b8o))|((b1&b8e)^((b2&b8o)<<1)));	
				d2=((((b1&b8e)>>1)^(b3&b8o))|((b1&b8e)^((b3&b8o)<<1)));
				d3=((((d1&b8e)>>1)^(d2&b8o))|((d1&b8e)^((d2&b8o)<<1)));

				d1_=((((b2&b8e)>>1)^(b1&b8o))|((b2&b8e)^((b1&b8o)<<1)));	
				d2_=((((b2&b8e)>>1)^(b3&b8o))|((b2&b8e)^((b3&b8o)<<1)));
				d3_=((((d2_&b8e)>>1)^(d1_&b8o))|((d2_&b8e)^((d1_&b8o)<<1)));

				if((r_[b2c_][2]!=d3)||(r_[b2c_][3]!=d3_)){++unmc;}	
				if((r_[b2c_][2]!=d3)&&(r_[b2c_][3]!=d3_)){++mc;}

				if(r_[b2c_][0]==-1)
				{
					r_[b2c_][0]=ri_;++ri_;
					r_[b2c_][1]=1;
					r_[b2c_][2]=d3;
					r_[b2c_][3]=d3_;
				}else{
					++r_[b2c_][1];
				}
			}	
		}			

		if(b2i_==0)
		{
			++fc;fseek(f1,fc,SEEK_SET);

			b1_=getc(f1);
			b2_=getc(f1);
			b3_=getc(f1);
		}else{
			b2_=b3_;
			b3_=getc(f1);
		}
	}
	fclose(f1);
	
	printf("o %e\n",100*(double)mc_/(double)unmc_);
	printf("i %e\n",100*(double)mc/(double)unmc);

	f1=fopen(infn,"rb");
	fbyte=getc(f1);
	while((fbyte!=10)&&(fbyte!=13)){fbyte=getc(f1);}
	if(nli==0){while((fbyte==10)||(fbyte==13)){nl[nli]=fbyte;++nli;fbyte=getc(f1);}}
	fclose(f1);

	i=0;j=0;k=0;

	f1=fopen(infn,"rb");

	b1_=getc(f1);
	b11_=getc(f1);
	b21_=getc(f1);
	b31_=getc(f1);
	b3_=getc(f1);

	while(!feof(f1))
	{
		
		if(nl[i]==b1_){++i;}if(!(i<nli)){++j;i=0;k=0;/*printf("\n%d\n",j);*/}
		if(nl[i]==b11_){++i;}if(!(i<nli)){++j;i=0;k=0;/*printf("\n%d\n",j);*/}
		if(nl[i]==b21_){++i;}if(!(i<nli)){++j;i=0;k=0;/*printf("\n%d\n",j);*/}
		if(nl[i]==b31_){++i;}if(!(i<nli)){++j;i=0;k=0;/*printf("\n%d\n",j);*/}
		if(nl[i]==b3_){++i;}if(!(i<nli)){++j;i=0;k=0;/*printf("\n%d\n",j);*/}

		if(ba2b[(b1_&0xff)][(b3_&0xff)][0]==-1){printf("Error 1...\n");return(0);}
		if(r_[((b11_<<16)|(b21_<<8)|(b31_<<0))][0]==-1){printf("Error 2...\n");return(0);}

		printf("%c%c%c%c%c\t",
		b1_,b11_,b21_,b31_,b3_);
		printf("!%e!\n",
		((double)ba2b[(b1_&0xff)][(b3_&0xff)][1]/(double)r_[((b11_<<16)|(b21_<<8)|(b31_<<0))][1]));

		b1_=getc(f1);
		b11_=getc(f1);
		b21_=getc(f1);
		b31_=getc(f1);
		b3_=getc(f1);

	}
	fclose(f1);
	
	return(0);
}

int main(int argc,char *argv[])
{

	int	a,b,c,d,e;
	int	i,j,k,l,m;

	printf("\n\tText sequence patterns\n");
	printf("\twritten by Dzinleski Jasenko October , 2013\n");

	if(argv[1]==NULL){return(0);}else{
		strcpy(infn,argv[1]);f1=fopen(infn,"rb");
		if(f1==NULL){return(0);}else{fclose(f1);}
	}

	b8o=0;
	b8o|=1<<0;
	b8o|=1<<2;
	b8o|=1<<4;
	b8o|=1<<6;

	b8e=0;
	b8e|=1<<1;
	b8e|=1<<3;
	b8e|=1<<5;
	b8e|=1<<7;

	printf("\n\tFilename: %s\n\n",infn);
	e=p_1();

	return(0);

}