//mybpred.c
//Michael Black, 2006
//
//handles the branch target buffer and branch prediction

#include <math.h>

//bits of PC used to index the BTB
int BTB_bits=16;

//branch predictor type
//0=no predictor (stall on branches)
//1=predict the same as the last global branch
//2=predict the same as the last local branch
//3=always predict not taken
//4=always predict taken
//5=bimodal
int branch_predictor_type=5;

//used in type 1 predictor
//holds direction of last branch
int lastbranch=0;

//branch target buffer - indexed by PC
//for jr and jalr, holds the last destination address
//for conditional branches, holds the taken destination address
unsigned int* branch_target_buffer;

//branch predictor used in type 2
char* branch_predictor_table;
//number of PC bits used to index the predictor
int btbits=10;

//BTBinit initializes the branch target buffer and branch predictor
void BTBinit()
{
	int i;

	branch_target_buffer=(unsigned int*)calloc((int)pow(2,BTB_bits),4);
	if (branch_target_buffer==0)
		fatal("Cannot allocate space for BTB");

	if (branch_predictor_type==2 || branch_predictor_type==5)
	{
		branch_predictor_table=(char*)calloc((int)pow(2,btbits),1);
		if (branch_predictor_table==0)
			fatal("Cannot allocate space for branch predictor");

		for (i=0; i<(int)pow(2,btbits); i++)
			branch_predictor_table[i]=0;
	}
}

//getBTB gets a destination from the BTB
unsigned int getBTB(unsigned int PC)
{
	int entry = (PC/4) % (int)pow(2,BTB_bits);
	return branch_target_buffer[entry];
}

//updateBTB sets a BTB entry to destPC
void updateBTB(unsigned int PC, unsigned int destPC)
{
	int entry = (PC/4) % (int)pow(2,BTB_bits);

	branch_target_buffer[entry]=destPC;
}

//get_branch_prediction returns 0 = make no prediction, 1 = predict not taken, 2 = predict taken
int get_branch_prediction(unsigned int PC)
{
	int entry;

	//if no predictor, make no prediction
	if (branch_predictor_type==0)
		return 0;
	//return direction of last branch
	else if (branch_predictor_type==1)
		return lastbranch+1;
	//return direction of last local branch
	else if (branch_predictor_type==2)
	{
		entry=(PC>>3)%(int)pow(2,btbits);

		if (branch_predictor_table[entry]==0)
			return 1;
		else
			return 2;
	}
	//return don't take
	else if (branch_predictor_type==3)
		return 1;
	//return take
	else if (branch_predictor_type==4)
		return 2;
	
	else if (branch_predictor_type==5)
	{
		entry=(PC>>3)%(int)pow(2,btbits);

		if (branch_predictor_table[entry]>=2)
			return 2;
		else
			return 1;
	}
	
	return 0;
}

//train_branch_predictor uses the direction of the last branch to update the predictor tables
//PC=last branch PC, taken=whether the last branch was taken (1) or not (0)
void train_branch_predictor(unsigned int PC, int taken)
{
	int entry;

	if (branch_predictor_type==1)
	{
		lastbranch=taken;
	}
	else if (branch_predictor_type==2)
	{
		entry=(PC>>3)%(int)pow(2,btbits);

		branch_predictor_table[entry]=taken;
	}
	else if (branch_predictor_type==5)
	{
		entry=(PC>>3)%(int)pow(2,btbits);

		if (taken==0)
			branch_predictor_table[entry]--;
		else
			branch_predictor_table[entry]++;
		if (branch_predictor_table[entry]<0)
			branch_predictor_table[entry]=0;
		else if (branch_predictor_table[entry]>3)
			branch_predictor_table[entry]=3;
	}
}
//mycache.c
//Michael Black, 2006

extern unsigned int counter;

//if 0, don't bother with cache, just read/write straight to memory
unsigned int cache_exists=0;

//cache stats (can be redefined with simulator flags)
//full size in bytes
unsigned int il1_cache_size=65536;
unsigned int il2_cache_size=1048576;
unsigned int dl1_cache_size=65536;
unsigned int dl2_cache_size=1048576;
//size of each cache block in bytes
unsigned int cache_block_size=64;
//number of cache sets
unsigned int il1_cache_sets=512;
unsigned int il2_cache_sets=8192;
unsigned int dl1_cache_sets=512;
unsigned int dl2_cache_sets=8192;
//number of cache ways: cache_size / (cache_block_size * cache_sets)
unsigned int il1_cache_ways;
unsigned int il2_cache_ways;
unsigned int dl1_cache_ways;
unsigned int dl2_cache_ways;

//initial latencies
int init_il1_cache_hit_latency=1;
int init_il2_cache_hit_latency=6;
int init_il2_cache_miss_latency=6;
int init_dl1_cache_hit_latency=1;
int init_dl2_cache_hit_latency=6;
int init_dl2_cache_miss_latency=6;

//simulated latencies, based on clock speed
int il1_cache_hit_latency;
int il2_cache_hit_latency;
int il2_cache_miss_latency;
int dl1_cache_hit_latency;
int dl2_cache_hit_latency;
int dl2_cache_miss_latency;

//cache stats
unsigned int d_cache_write_misses=0;
unsigned int i_cache_read_misses=0;
unsigned int d_cache_read_misses=0;
unsigned int il1_cache_replacements=0;
unsigned int il2_cache_replacements=0;
unsigned int dl1_cache_replacements=0;
unsigned int dl2_cache_replacements=0;
unsigned int d_cache_writes=0;
unsigned int i_cache_reads=0;
unsigned int d_cache_reads=0;

//actual cache
unsigned char* il1_cache;
unsigned char* il2_cache;
unsigned char* dl1_cache;
unsigned char* dl2_cache;

//tag for each block
unsigned int* il1_cache_tag;
unsigned int* il2_cache_tag;
unsigned int* dl1_cache_tag;
unsigned int* dl2_cache_tag;

//whether cache block is valid
char* il1_cache_valid;
char* il2_cache_valid;
char* dl1_cache_valid;
char* dl2_cache_valid;

//whether cache block is dirty (differs from memory) 
char* il1_cache_dirty;
char* il2_cache_dirty;
char* dl1_cache_dirty;
char* dl2_cache_dirty;

//replacement information for each cache block
char* il1_cache_replace;
char* il2_cache_replace;
char* dl1_cache_replace;
char* dl2_cache_replace;

//cache_init sets up memory for the cache & initializes block info
void cache_init()
{
	int i;

	//create the cache
	il1_cache=(unsigned char*)calloc(il1_cache_size,1);
	il1_cache_ways=il1_cache_size/(cache_block_size*il1_cache_sets);
	il1_cache_tag=(unsigned int*)calloc(il1_cache_size/cache_block_size,4);
	il1_cache_valid=(char*)calloc(il1_cache_size/cache_block_size,1);
	il1_cache_dirty=(char*)calloc(il1_cache_size/cache_block_size,1);
	il1_cache_replace=(char*)calloc(il1_cache_size/cache_block_size,1);

	if (il1_cache==0 || il1_cache_tag==0 || il1_cache_valid==0 || il1_cache_dirty==0 || il1_cache_replace==0)
		fatal("Not enough memory to store level 1 instruction cache");

	//set all blocks to invalid and clean
	for (i=0; i<il1_cache_size/cache_block_size; i++)
	{
		il1_cache_valid[i]=0;
		il1_cache_dirty[i]=0;
	}

	//create the cache
	il2_cache=(unsigned char*)calloc(il2_cache_size,1);
	il2_cache_ways=il2_cache_size/(cache_block_size*il2_cache_sets);
	il2_cache_tag=(unsigned int*)calloc(il2_cache_size/cache_block_size,4);
	il2_cache_valid=(char*)calloc(il2_cache_size/cache_block_size,1);
	il2_cache_dirty=(char*)calloc(il2_cache_size/cache_block_size,1);
	il2_cache_replace=(char*)calloc(il2_cache_size/cache_block_size,1);

	if (il2_cache==0 || il2_cache_tag==0 || il2_cache_valid==0 || il2_cache_dirty==0 || il2_cache_replace==0)
		fatal("Not enough memory to store level 2 instruction cache");

	//set all blocks to invalid and clean
	for (i=0; i<il2_cache_size/cache_block_size; i++)
	{
		il2_cache_valid[i]=0;
		il2_cache_dirty[i]=0;
	}

	//create the cache
	dl1_cache=(unsigned char*)calloc(dl1_cache_size,1);
	dl1_cache_ways=dl1_cache_size/(cache_block_size*dl1_cache_sets);
	dl1_cache_tag=(unsigned int*)calloc(dl1_cache_size/cache_block_size,4);
	dl1_cache_valid=(char*)calloc(dl1_cache_size/cache_block_size,1);
	dl1_cache_dirty=(char*)calloc(dl1_cache_size/cache_block_size,1);
	dl1_cache_replace=(char*)calloc(dl1_cache_size/cache_block_size,1);

	if (dl1_cache==0 || dl1_cache_tag==0 || dl1_cache_valid==0 || dl1_cache_dirty==0 || dl1_cache_replace==0)
		fatal("Not enough memory to store level 1 data cache");

	//set all blocks to invalid and clean
	for (i=0; i<dl1_cache_size/cache_block_size; i++)
	{
		dl1_cache_valid[i]=0;
		dl1_cache_dirty[i]=0;
	}

	//create the cache
	dl2_cache=(unsigned char*)calloc(dl2_cache_size,1);
	dl2_cache_ways=dl2_cache_size/(cache_block_size*dl2_cache_sets);
	dl2_cache_tag=(unsigned int*)calloc(dl2_cache_size/cache_block_size,4);
	dl2_cache_valid=(char*)calloc(dl2_cache_size/cache_block_size,1);
	dl2_cache_dirty=(char*)calloc(dl2_cache_size/cache_block_size,1);
	dl2_cache_replace=(char*)calloc(dl2_cache_size/cache_block_size,1);

	if (dl2_cache==0 || dl2_cache_tag==0 || dl2_cache_valid==0 || dl2_cache_dirty==0 || dl2_cache_replace==0)
		fatal("Not enough memory to store level 2 data cache");

	//set all blocks to invalid and clean
	for (i=0; i<dl2_cache_size/cache_block_size; i++)
	{
		dl2_cache_valid[i]=0;
		dl2_cache_dirty[i]=0;
	}

	//set all the latencies to initial values
	il1_cache_hit_latency=init_il1_cache_hit_latency;
	il2_cache_hit_latency=init_il2_cache_hit_latency;
	il2_cache_miss_latency=init_il2_cache_miss_latency;
	dl1_cache_hit_latency=init_dl1_cache_hit_latency;
	dl2_cache_hit_latency=init_dl2_cache_hit_latency;
	dl2_cache_miss_latency=init_dl2_cache_miss_latency;
}

//il1_cache_evict removes a block from the level 1 instruction cache
void il1_cache_evict(unsigned int cache_block)
{
	int i,j,base;
	int il2_tag,il2_set;

	//if block is dirty, must write back to level 2 cache
	if (il1_cache_dirty[cache_block]==1)
	{
		base=il1_cache_tag[cache_block]*il1_cache_sets*cache_block_size;
		base+=(cache_block/il1_cache_ways)*cache_block_size;

		//find the block in the level 2 instruction cache
		il2_tag = base / (cache_block_size * il2_cache_sets);
		il2_set = (base % (cache_block_size * il2_cache_sets)) / (cache_block_size);
		for (j=0; j<il2_cache_ways; j++)
		{
			if (il2_cache_valid[il2_set*il2_cache_ways+j]==1)
				if (il2_cache_tag[il2_set*il2_cache_ways+j]==il2_tag)
					break;
		}
		if (j==il2_cache_ways)
			fatal("Block in level 1 instruction cache not found in level 2 instruction cache");
		//copy the block from level 1 cache to level 2 cache
		for (i=0; i<cache_block_size; i++)
			il2_cache[(il2_set*il2_cache_ways+j)*cache_block_size+i] = il1_cache[cache_block*cache_block_size+i];
		//set the level 2 block as dirty
		il2_cache_dirty[il2_set*il2_cache_ways+j]=1;
	}
	//set block to invalid & clean
	il1_cache_dirty[cache_block]=0;
	il1_cache_valid[cache_block]=0;
}

//dl1_cache_evict removes a block from the level 1 data cache
void dl1_cache_evict(unsigned int cache_block)
{
	int i,j,base;
	int dl2_tag,dl2_set;

	//if block is dirty, must write back to level 2 cache
	if (dl1_cache_dirty[cache_block]==1)
	{
		base=dl1_cache_tag[cache_block]*dl1_cache_sets*cache_block_size;
		base+=(cache_block/dl1_cache_ways)*cache_block_size;
		//find the block in the level 2 data cache
		dl2_tag = base / (cache_block_size * dl2_cache_sets);
		dl2_set = (base % (cache_block_size * dl2_cache_sets)) / (cache_block_size);
		for (j=0; j<dl2_cache_ways; j++)
		{
			if (dl2_cache_valid[dl2_set*dl2_cache_ways+j]==1)
				if (dl2_cache_tag[dl2_set*dl2_cache_ways+j]==dl2_tag)
					break;
		}
		if (j==dl2_cache_ways)
			fatal("Block in level 1 data cache not found in level 2 data cache");

		//copy the block from level 1 cache to level 2 cache
		for (i=0; i<cache_block_size; i++)
			dl2_cache[(dl2_set*dl2_cache_ways+j)*cache_block_size+i] = dl1_cache[cache_block*cache_block_size+i];
		//set the level 2 block as dirty
		dl2_cache_dirty[dl2_set*dl2_cache_ways+j]=1;
	}
	//set block to invalid & clean
	dl1_cache_dirty[cache_block]=0;
	dl1_cache_valid[cache_block]=0;
}

//il2_cache_evict removes a block from the level 2 instruction cache
//assume that the block has already been evicted from level 1 instruction cache
void il2_cache_evict(unsigned int cache_block)
{
	int i,base;
	int il1_set, il1_tag;

	base=il2_cache_tag[cache_block]*il2_cache_sets*cache_block_size;
	base+=(cache_block/il2_cache_ways)*cache_block_size;

	//if the block is still in level 1, must evict it
	il1_tag = base / (cache_block_size * il1_cache_sets);
	il1_set = (base % (cache_block_size * il1_cache_sets)) / (cache_block_size);
	for (i=0; i<il1_cache_ways; i++)
	{
		if (il1_cache_tag[il1_set*il1_cache_ways+i]==il1_tag)
			if (il1_cache_valid[il1_set*il1_cache_ways+i]==1)
				il1_cache_evict(il1_set*il1_cache_ways+i);
	}

	//if block is dirty, must write back to memory
	if (il2_cache_dirty[cache_block]==1)
	{
		for (i=0; i<cache_block_size; i++)
		{
			memory_write(base+i, il2_cache[cache_block*cache_block_size+i]);
		}
	}
	//set block to invalid & clean
	il2_cache_dirty[cache_block]=0;
	il2_cache_valid[cache_block]=0;
}

//dl2_cache_evict removes a block from the cache
void dl2_cache_evict(unsigned int cache_block)
{
	int i,base;
	int dl1_set, dl1_tag;

	base=dl2_cache_tag[cache_block]*dl2_cache_sets*cache_block_size;
	base+=(cache_block/dl2_cache_ways)*cache_block_size;

	//if the block is still in level 1, must evict it
	dl1_tag = base / (cache_block_size * dl1_cache_sets);
	dl1_set = (base % (cache_block_size * dl1_cache_sets)) / (cache_block_size);
	for (i=0; i<dl1_cache_ways; i++)
	{
		if (dl1_cache_tag[dl1_set*dl1_cache_ways+i]==dl1_tag)
			if (dl1_cache_valid[dl1_set*dl1_cache_ways+i]==1)
				dl1_cache_evict(dl1_set*dl1_cache_ways+i);
	}

	//if block is dirty, must write back to memory
	if (dl2_cache_dirty[cache_block]==1)
	{
		for (i=0; i<cache_block_size; i++)
		{
			memory_write(base+i, dl2_cache[cache_block*cache_block_size+i]);
		}
	}
	//set block to invalid & clean
	dl2_cache_dirty[cache_block]=0;
	dl2_cache_valid[cache_block]=0;
}

//cache_invalidate removes the block containing address from all of the caches
void cache_invalidate(unsigned int address)
{
	int i;
	int set,tag;
	tag = address / (cache_block_size * il1_cache_sets);
	set = (address % (cache_block_size * il1_cache_sets)) / (cache_block_size);

 	//check if in il1 cache
	for (i=0; i<il1_cache_ways; i++)
	{
		if (il1_cache_valid[set*il1_cache_ways+i]==1)
		{
			if (il1_cache_tag[set*il1_cache_ways+i]==tag)
			{
				il1_cache_evict(set*il1_cache_ways+i);
				break;
			}
		}
	}

	tag = address / (cache_block_size * il2_cache_sets);
	set = (address % (cache_block_size * il2_cache_sets)) / (cache_block_size);

 	//check if in il2 cache
	for (i=0; i<il2_cache_ways; i++)
	{
		if (il2_cache_valid[set*il2_cache_ways+i]==1)
		{
			if (il2_cache_tag[set*il2_cache_ways+i]==tag)
			{
				il2_cache_evict(set*il2_cache_ways+i);
				break;
			}
		}
	}

	tag = address / (cache_block_size * dl1_cache_sets);
	set = (address % (cache_block_size * dl1_cache_sets)) / (cache_block_size);

 	//check if in dl1 cache
	for (i=0; i<dl1_cache_ways; i++)
	{
		if (dl1_cache_valid[set*dl1_cache_ways+i]==1)
		{
			if (dl1_cache_tag[set*dl1_cache_ways+i]==tag)
			{
				dl1_cache_evict(set*dl1_cache_ways+i);
				break;
			}
		}
	}

	tag = address / (cache_block_size * dl2_cache_sets);
	set = (address % (cache_block_size * dl2_cache_sets)) / (cache_block_size);

 	//check if in dl2 cache
	for (i=0; i<dl2_cache_ways; i++)
	{
		if (dl2_cache_valid[set*dl2_cache_ways+i]==1)
		{
			if (dl2_cache_tag[set*dl2_cache_ways+i]==tag)
			{
				dl2_cache_evict(set*dl2_cache_ways+i);
				break;
			}
		}
	}
}

//i_cache_miss loads the block containing address into the level 1 instruction cache
//it also may need to load the block into the level 2 instruction cache as well
//if there is no room for that block, another block is evicted
void i_cache_miss(unsigned int address)
{
	int set,tag,i,j,k,l,il2_set,il2_tag;

	tag = address / (cache_block_size * il1_cache_sets);
	set = (address % (cache_block_size * il1_cache_sets)) / (cache_block_size);
	il2_tag = address / (cache_block_size * il2_cache_sets);
	il2_set = (address % (cache_block_size * il2_cache_sets)) / (cache_block_size);
	//find if there is a cache entry that is invalid
	for (i=0; i<il1_cache_ways; i++)
	{
		if (il1_cache_valid[set*il1_cache_ways+i]==0)
			break;
	}
	//if there are no invalid entries, must evict one
	if (i==il1_cache_ways)
	{
		//pick the entry with the highest replace (LRU) value
		i=0;
		for (l=1; l<il1_cache_ways; l++)
			if (il1_cache_replace[set*il1_cache_ways+l]>il1_cache_replace[set*il1_cache_ways+i])
				i=l;
		il1_cache_evict(set*il1_cache_ways+i);
		il1_cache_replacements++;
	}
	//find out whether the block is in the level 2 instruction cache
	for (k=0; k<il2_cache_ways; k++)
	{
		if (il2_cache_valid[il2_set*il2_cache_ways+k]==1)
			if (il2_cache_tag[il2_set*il2_cache_ways+k]==il2_tag)
				break;
	}
	if (k<il2_cache_ways)
	{
		//block is in level 2 cache; just load it into level 1
		for (j=0; j<cache_block_size; j++)
			il1_cache[(set*il1_cache_ways+i)*cache_block_size+j]=il2_cache[(il2_set*il2_cache_ways+k)*cache_block_size+j];

		//update LRU info: the block containing address gets LRU of 0, others get ++
		for (l=0; l<il2_cache_ways; l++)
		{
			if (l!=k && il2_cache_valid[il2_set*il2_cache_ways+l]==1)
				if (il2_cache_replace[il2_set*il2_cache_ways+l]<=il2_cache_replace[il2_set*il2_cache_ways+k])
					il2_cache_replace[il2_set*il2_cache_ways+l]++;
		}
		il2_cache_replace[il2_set*il2_cache_ways+k]=0;
	}
	else
	{
		//block isn't in level 2 cache either; need to load it into il2 first

		//find if there is a cache entry that is invalid
		for (k=0; k<il2_cache_ways; k++)
		{
			if (il2_cache_valid[il2_set*il2_cache_ways+k]==0)
				break;
		}
		if (k==il2_cache_ways)
		{
			//pick the entry with the highest replace (LRU) value
			k=0;
			for (l=1; l<il2_cache_ways; l++)
				if (il2_cache_replace[il2_set*il2_cache_ways+l]>il2_cache_replace[il2_set*il2_cache_ways+k])
					k=l;

			il2_cache_evict(il2_set*il2_cache_ways+k);
			il2_cache_replacements++;
		}
		//load the new block into the level 2 cache
		for (j=0; j<cache_block_size; j++)
			il2_cache[(il2_set*il2_cache_ways+k)*cache_block_size+j]=memory_read((address-address%cache_block_size)+j);

		//set its tag, dirty, and valid bits
		il2_cache_tag[il2_set*il2_cache_ways+k]=il2_tag;
		il2_cache_dirty[il2_set*il2_cache_ways+k]=0;
		il2_cache_valid[il2_set*il2_cache_ways+k]=1;

		//update LRU info: the block containing address gets LRU of 0, others get ++
		for (l=0; l<il2_cache_ways; l++)
		{
			if (l!=k && il2_cache_valid[il2_set*il2_cache_ways+l]==1)
				if (il2_cache_replace[il2_set*il2_cache_ways+l]<=il2_cache_replace[il2_set*il2_cache_ways+k])
					il2_cache_replace[il2_set*il2_cache_ways+l]++;
		}
		il2_cache_replace[il2_set*il2_cache_ways+k]=0;

		//now load the block from the level 2 cache into the level 1 cache
		for (j=0; j<cache_block_size; j++)
			il1_cache[(set*il1_cache_ways+i)*cache_block_size+j]=il2_cache[(il2_set*il2_cache_ways+k)*cache_block_size+j];			
	}
	//set its tag, dirty, and valid bits
	il1_cache_tag[set*il1_cache_ways+i]=tag;
	il1_cache_dirty[set*il1_cache_ways+i]=0;
	il1_cache_valid[set*il1_cache_ways+i]=1;

	power_model(13,0,0);
}

//d_cache_miss loads the block containing address into the level 1 instruction cache
//it also may need to load the block into the level 2 instruction cache as well
//if there is no room for that block, another block is evicted
void d_cache_miss(unsigned int address)
{
	int set,tag,i,j,k,l,dl2_set,dl2_tag;

	tag = address / (cache_block_size * dl1_cache_sets);
	set = (address % (cache_block_size * dl1_cache_sets)) / (cache_block_size);
	dl2_tag = address / (cache_block_size * dl2_cache_sets);
	dl2_set = (address % (cache_block_size * dl2_cache_sets)) / (cache_block_size);

	//find if there is a cache entry that is invalid
	for (i=0; i<dl1_cache_ways; i++)
	{
		if (dl1_cache_valid[set*dl1_cache_ways+i]==0)
			break;
	}
	//if there are no invalid entries, must evict one
	if (i==dl1_cache_ways)
	{
		//pick the entry with the highest replace (LRU) value
		i=0;
		for (l=1; l<dl1_cache_ways; l++)
			if (dl1_cache_replace[set*dl1_cache_ways+l]>dl1_cache_replace[set*dl1_cache_ways+i])
				i=l;

		dl1_cache_evict(set*dl1_cache_ways+i);
		dl1_cache_replacements++;
	}
	//find out whether the block is in the level 2 instruction cache
	for (k=0; k<dl2_cache_ways; k++)
	{
		if (dl2_cache_valid[dl2_set*dl2_cache_ways+k]==1)
			if (dl2_cache_tag[dl2_set*dl2_cache_ways+k]==dl2_tag)
				break;
	}
	if (k<dl2_cache_ways)
	{
		//block is in level 2 cache; just load it into level 1
		for (j=0; j<cache_block_size; j++)
			dl1_cache[(set*dl1_cache_ways+i)*cache_block_size+j]=dl2_cache[(dl2_set*dl2_cache_ways+k)*cache_block_size+j];

		//update LRU info: the block containing address gets LRU of 0, others get ++
		for (l=0; l<dl2_cache_ways; l++)
		{
			if (l!=k && dl2_cache_valid[dl2_set*dl2_cache_ways+l]==1)
				if (dl2_cache_replace[dl2_set*dl2_cache_ways+l]<=dl2_cache_replace[dl2_set*dl2_cache_ways+k])
					dl2_cache_replace[dl2_set*dl2_cache_ways+l]++;
		}
		dl2_cache_replace[dl2_set*dl2_cache_ways+k]=0;
	}
	else
	{
		//block isn't in level 2 cache either; need to load it into dl2 first

		//find if there is a cache entry that is invalid
		for (k=0; k<dl2_cache_ways; k++)
		{
			if (dl2_cache_valid[dl2_set*dl2_cache_ways+k]==0)
				break;
		}
		if (k==dl2_cache_ways)
		{
			//pick the entry with the highest replace (LRU) value
			k=0;
			for (l=1; l<dl2_cache_ways; l++)
				if (dl2_cache_replace[dl2_set*dl2_cache_ways+l]>dl2_cache_replace[dl2_set*dl2_cache_ways+k])
					k=l;

			dl2_cache_evict(dl2_set*dl2_cache_ways+k);
			dl2_cache_replacements++;
		}
		//load the new block into the level 2 cache
		for (j=0; j<cache_block_size; j++)
			dl2_cache[(dl2_set*dl2_cache_ways+k)*cache_block_size+j]=memory_read((address-address%cache_block_size)+j);

		//update LRU info: the block containing address gets LRU of 0, others get ++
		for (l=0; l<dl2_cache_ways; l++)
		{
			if (l!=k && dl2_cache_valid[dl2_set*dl2_cache_ways+l]==1)
				if (dl2_cache_replace[dl2_set*dl2_cache_ways+l]<=dl2_cache_replace[dl2_set*dl2_cache_ways+k])
					dl2_cache_replace[dl2_set*dl2_cache_ways+l]++;
		}
		dl2_cache_replace[dl2_set*dl2_cache_ways+k]=0;

		//set its tag, dirty, and valid bits
		dl2_cache_tag[dl2_set*dl2_cache_ways+k]=dl2_tag;
		dl2_cache_dirty[dl2_set*dl2_cache_ways+k]=0;
		dl2_cache_valid[dl2_set*dl2_cache_ways+k]=1;
		//now load the block from the level 2 cache into the level 1 cache
		for (j=0; j<cache_block_size; j++)
			dl1_cache[(set*dl1_cache_ways+i)*cache_block_size+j]=dl2_cache[(dl2_set*dl2_cache_ways+k)*cache_block_size+j];
	}
	//set its tag, dirty, and valid bits
	dl1_cache_tag[set*dl1_cache_ways+i]=tag;
	dl1_cache_dirty[set*dl1_cache_ways+i]=0;
	dl1_cache_valid[set*dl1_cache_ways+i]=1;

	power_model(13,0,0);
}

//d_cache_dowrite writes a byte to the data cache
void d_cache_dowrite(unsigned int address, char data)
{
	int i,j;
	int set,tag,offset;

	tag = address / (cache_block_size * dl1_cache_sets);
	set = (address % (cache_block_size * dl1_cache_sets)) / (cache_block_size);
	offset = address % cache_block_size;

	d_cache_writes++;
 	//check if in cache
	for (i=0; i<dl1_cache_ways; i++)
	{
		if (dl1_cache_valid[set*dl1_cache_ways+i]==1)
		{
			if (dl1_cache_tag[set*dl1_cache_ways+i]==tag)
			{
				dl1_cache[(set*dl1_cache_ways+i)*cache_block_size+offset]=data;
				dl1_cache_dirty[set*dl1_cache_ways+i]=1;

				//update LRU info: the block containing address gets LRU of 0, others get ++
				for (j=0; j<dl1_cache_ways; j++)
				{
					if (j!=i && dl1_cache_valid[set*dl1_cache_ways+j]==1)
						if (dl1_cache_replace[set*dl1_cache_ways+j]<=dl1_cache_replace[set*dl1_cache_ways+i])
							dl1_cache_replace[set*dl1_cache_ways+j]++;
				}
				dl1_cache_replace[set*dl1_cache_ways+i]=0;
				return;
			}
		}
	}
	//if address isn't in the cache, call d_cache_miss to put it into the cache, do the write again
	if (i==dl1_cache_ways)
	{
		d_cache_write_misses++;
		d_cache_miss(address);
	
		for (i=0; i<dl1_cache_ways; i++)
		{
			if (dl1_cache_valid[set*dl1_cache_ways+i]==1)
			{
				if (dl1_cache_tag[set*dl1_cache_ways+i]==tag)
				{
					dl1_cache[(set*dl1_cache_ways+i)*cache_block_size+offset]=data;
					dl1_cache_dirty[set*dl1_cache_ways+i]=1;

					//update LRU info: the block containing address gets LRU of 0, others get ++
					for (j=0; j<dl1_cache_ways; j++)
					{
						if (j!=i && dl1_cache_valid[set*dl1_cache_ways+j]==1)
							if (dl1_cache_replace[set*dl1_cache_ways+j]<=dl1_cache_replace[set*dl1_cache_ways+i])
								dl1_cache_replace[set*dl1_cache_ways+j]++;
					}
					dl1_cache_replace[set*dl1_cache_ways+i]=0;
					return;
				}
			}
		}
	}
}

//i_cache_doread reads a byte from the instruction cache
unsigned char i_cache_doread(unsigned int address)
{
	int i,j;
	int set,tag,offset;

	tag = address / (cache_block_size * il1_cache_sets);
	set = (address % (cache_block_size * il1_cache_sets)) / (cache_block_size);
	offset = address % cache_block_size;

	i_cache_reads++;
 	//check if in cache
	for (i=0; i<il1_cache_ways; i++)
	{
		if (il1_cache_valid[set*il1_cache_ways+i]==1)
		{
			if (il1_cache_tag[set*il1_cache_ways+i]==tag)
			{
				//update LRU info: the block containing address gets LRU of 0, others get ++
				for (j=0; j<il1_cache_ways; j++)
				{
					if (j!=i && il1_cache_valid[set*dl1_cache_ways+j]==1)
						if (il1_cache_replace[set*il1_cache_ways+j]<=il1_cache_replace[set*il1_cache_ways+i])
							il1_cache_replace[set*il1_cache_ways+j]++;
				}
				il1_cache_replace[set*il1_cache_ways+i]=0;

				return(il1_cache[(set*il1_cache_ways+i)*cache_block_size+offset]);
			}
		}
	}
	//if it isn't in the cache, call cache_miss to put it in, do the read again
	if (i==il1_cache_ways)
	{
		i_cache_read_misses++;
		i_cache_miss(address);
	
		for (i=0; i<il1_cache_ways; i++)
		{
			if (il1_cache_valid[set*il1_cache_ways+i]==1)
			{
				if (il1_cache_tag[set*il1_cache_ways+i]==tag)
				{
					//update LRU info: the block containing address gets LRU of 0, others get ++
					for (j=0; j<il1_cache_ways; j++)
					{
						if (j!=i && il1_cache_valid[set*il1_cache_ways+j]==1)
							if (il1_cache_replace[set*il1_cache_ways+j]<=il1_cache_replace[set*il1_cache_ways+i])
								il1_cache_replace[set*il1_cache_ways+j]++;
					}
					il1_cache_replace[set*il1_cache_ways+i]=0;

					return(il1_cache[(set*il1_cache_ways+i)*cache_block_size+offset]);
				}
			}
		}
	}
	//if cache_miss didn't put it into the cache, something wasn't done right
	fatal("Error in i-cache");
	return(0);
}

//d_cache_doread reads a byte from the data cache
unsigned char d_cache_doread(unsigned int address)
{
	int i,j;
	int set,tag,offset;

	tag = address / (cache_block_size * dl1_cache_sets);
	set = (address % (cache_block_size * dl1_cache_sets)) / (cache_block_size);
	offset = address % cache_block_size;
	d_cache_reads++;
 	//check if in cache
	for (i=0; i<dl1_cache_ways; i++)
	{
		if (dl1_cache_valid[set*dl1_cache_ways+i]==1)
		{
			if (dl1_cache_tag[set*dl1_cache_ways+i]==tag)
			{
				//update LRU info: the block containing address gets LRU of 0, others get ++
				for (j=0; j<dl1_cache_ways; j++)
				{
					if (j!=i && dl1_cache_valid[set*dl1_cache_ways+j]==1)
						if (dl1_cache_replace[set*dl1_cache_ways+j]<=dl1_cache_replace[set*dl1_cache_ways+i])
							dl1_cache_replace[set*dl1_cache_ways+j]++;
				}
				dl1_cache_replace[set*dl1_cache_ways+i]=0;

				return(dl1_cache[(set*dl1_cache_ways+i)*cache_block_size+offset]);
			}
		}
	}
	//if it isn't in the cache, call cache_miss to put it in, do the read again
	if (i==dl1_cache_ways)
	{
		d_cache_read_misses++;
		d_cache_miss(address);
	
		for (i=0; i<dl1_cache_ways; i++)
		{
			if (dl1_cache_valid[set*dl1_cache_ways+i]==1)
			{
				if (dl1_cache_tag[set*dl1_cache_ways+i]==tag)
				{
					//update LRU info: the block containing address gets LRU of 0, others get ++
					for (j=0; j<dl1_cache_ways; j++)
					{
						if (j!=i && dl1_cache_valid[set*dl1_cache_ways+j]==1)
							if (dl1_cache_replace[set*dl1_cache_ways+j]<=dl1_cache_replace[set*dl1_cache_ways+i])
								dl1_cache_replace[set*dl1_cache_ways+j]++;
					}
					dl1_cache_replace[set*dl1_cache_ways+i]=0;

					return(dl1_cache[(set*dl1_cache_ways+i)*cache_block_size+offset]);
				}
			}
		}
	}
	//if cache_miss didn't put it into the cache, something wasn't done right
	fatal("Error in d-cache");
	return(0);
}

//icache_read is called by icache_read_word
unsigned char icache_read(unsigned int address)
{
	if (cache_exists==1)
		return i_cache_doread(address);
	else
		return memory_read(address);
}

//dcache_read is called by some load instructions
unsigned char dcache_read(unsigned int address)
{
	if (cache_exists==1)
		return d_cache_doread(address);
	else
		return memory_read(address);
}

//dcache_write is called by store instructions
void dcache_write(unsigned int address, char data)
{
	if (cache_exists==1)
		d_cache_dowrite(address, data);
	else
		memory_write(address, data);
}

//icache_read_word is called by fetch
unsigned int icache_read_word(unsigned int address)
{
        int word;
        
        word=icache_read(address);
        word+=icache_read(address+1)<<8;
        word+=icache_read(address+2)<<16;
        word+=icache_read(address+3)<<24;

        return word;
}

//cache_read_word is called by lw instructions
unsigned int dcache_read_word(unsigned int address)
{
        int word;
        
        word=dcache_read(address);
        word+=dcache_read(address+1)<<8;
        word+=dcache_read(address+2)<<16;
        word+=dcache_read(address+3)<<24;

        return word;
}

//checks whether a read/write to this address will be a cache hit or a cache miss,
//and returns the cycle penalty
int icache_access_latency(unsigned int address)
{
	int i,j;

	if (cache_exists==0)
		return 1;

	int il1_tag = address / (cache_block_size * il1_cache_sets);
	int il1_set = (address % (cache_block_size * il1_cache_sets)) / (cache_block_size);
	int il2_tag = address / (cache_block_size * il2_cache_sets);
	int il2_set = (address % (cache_block_size * il2_cache_sets)) / (cache_block_size);

	//check if block is in il1 cache
	for (i=0; i<il1_cache_ways; i++)
	{
		if (il1_cache_valid[il1_set*il1_cache_ways+i]==1)
		{
			if (il1_cache_tag[il1_set*il1_cache_ways+i]==il1_tag)
			{
				break;
			}
		}
	}
	
	//check if block is in il2 cache
	for (j=0; j<il2_cache_ways; j++)
	{
		if (il2_cache_valid[il2_set*il2_cache_ways+j]==1)
		{
			if (il2_cache_tag[il2_set*il2_cache_ways+j]==il2_tag)
			{
				break;
			}
		}
	}
	if (i<il1_cache_ways && j<il2_cache_ways)
		return il1_cache_hit_latency;
	else if (i==il1_cache_ways && j<il2_cache_ways)
		return il2_cache_hit_latency;
	else if (i==il1_cache_ways && j==il2_cache_ways)
		return il2_cache_miss_latency;
	else
		fatal("Block is in level 1 i-cache but not in level 2");

	return 1;
}

//checks whether a read/write to this address will be a cache hit or a cache miss,
//and returns the cycle penalty
int dcache_access_latency(unsigned int address)
{
	int i,j;

	if (cache_exists==0)
		return 1;

	int dl1_tag = address / (cache_block_size * dl1_cache_sets);
	int dl1_set = (address % (cache_block_size * dl1_cache_sets)) / (cache_block_size);
	int dl2_tag = address / (cache_block_size * dl2_cache_sets);
	int dl2_set = (address % (cache_block_size * dl2_cache_sets)) / (cache_block_size);

	//check if block is in dl1 cache
	for (i=0; i<dl1_cache_ways; i++)
	{
		if (dl1_cache_valid[dl1_set*dl1_cache_ways+i]==1)
		{
			if (dl1_cache_tag[dl1_set*dl1_cache_ways+i]==dl1_tag)
			{
				break;
			}
		}
	}
	
	//check if block is in dl2 cache
	for (j=0; j<dl2_cache_ways; j++)
	{
		if (dl2_cache_valid[dl2_set*dl2_cache_ways+j]==1)
		{
			if (dl2_cache_tag[dl2_set*dl2_cache_ways+j]==dl2_tag)
			{
				break;
			}
		}
	}
	
	if (i<dl1_cache_ways && j<dl2_cache_ways)
		return dl1_cache_hit_latency;
	else if (i==dl1_cache_ways && j<dl2_cache_ways)
	{
		return dl2_cache_hit_latency;
	}
	else if (i==dl1_cache_ways && j==dl2_cache_ways)
	{
		return dl2_cache_miss_latency;
	}
	else
		fatal("Block is in level 1 d-cache but not in level 2");

	return 1;
}
//mycritical.c
//Michael Black
//routines for implementing critical instruction predictor

#include "mysim.h"
#include "mysimoutorder.h"

//from mysimoutorder.c
extern Reservation_Station *resstat;
extern Reorder_Buffer_Entry *ROB;
extern int inst_in_rob;
extern int reservation_station_number;
extern int rob_tail;
extern int reorder_buffer_size;

extern int subtractcritical;

//stat counters
int criticality_type=0;
unsigned int criticality_predictions=0;
unsigned int criticality_predictions_made=0;

unsigned int criticality_correct_true=0;
unsigned int criticality_correct_false=0;
unsigned int criticality_wrong_true=0;
unsigned int criticality_wrong_false=0;

unsigned int criticality_QOLD=0;
unsigned int criticality_QOLDDEP=0;
unsigned int criticality_ALOLD=0;
unsigned int criticality_QCONS=0;
unsigned int criticality_total=0;

//these counters are used for the cycle reduction during execution
unsigned int marked_critical=0;
unsigned int not_marked_critical=0;
unsigned int should_have_been_marked_critical=0;
int criterion=0;	//0=any, 1=QOLD, 2=QOLDDEP, 3=ALOLD, 4=QCONS

//flags
int criticality_hash_table_size=4096;
int criticality_history_size=256;
int criticality_local_history_size=256;
int perceptron_training_style=0;	//0 = error, 1 = training without cutoff, 2 = training with cutoff
int perceptron_weight_growth=0;		//0 = linear, 1 = exponential
int aliasing_reduction=0;		//0 = no aliasing reduction, 1 = assigned seats, 2 = assigned seats with cancellation
int perceptron_training_threshold=1073741824;	//saturate weights at this value
int criticality_predict_every=0;

//study statistics
unsigned int first_iteration=0;
unsigned int QOLD_local_nn=0,QOLD_local_np=0,QOLD_local_pn=0,QOLD_local_pp=0;
unsigned int QOLDDEP_local_nn=0,QOLDDEP_local_np=0,QOLDDEP_local_pn=0,QOLDDEP_local_pp=0;
unsigned int ALOLD_local_nn=0,ALOLD_local_np=0,ALOLD_local_pn=0,ALOLD_local_pp=0;
unsigned int QCONS_local_nn=0,QCONS_local_np=0,QCONS_local_pn=0,QCONS_local_pp=0;


//called for cycle reduction during execution
//if the instruction meets the specified criticality criterion, return 1; else return 0
int observed_critical(int rs)
{
	int mark=0;

	if ((criterion==0 || criterion==1) && ROB[resstat[rs].rob_place].QOLDeverset==1)
		mark=1;
	else if ((criterion==0 || criterion==2) && ROB[resstat[rs].rob_place].QOLDDEPeverset==1)
		mark=1;
	else if ((criterion==0 || criterion==3) && ROB[resstat[rs].rob_place].ALOLDeverset==1)
		mark=1;
	else if ((criterion==0 || criterion==4) && ROB[resstat[rs].rob_place].QCONSeverset==1)
		mark=1;
	if (mark==0)
	{
		not_marked_critical++;
		return 0;
	}
	marked_critical++;
	return 1;
}

//check if the instruction met the criterion in its last cycle of execution
void didnt_observe_critical(int rs)
{
	int mark=0;

	if ((criterion==0 || criterion==1) && ROB[resstat[rs].rob_place].QOLDeverset==1)
		mark=1;
	else if ((criterion==0 || criterion==2) && ROB[resstat[rs].rob_place].QOLDDEPeverset==1)
		mark=1;
	else if ((criterion==0 || criterion==3) && ROB[resstat[rs].rob_place].ALOLDeverset==1)
		mark=1;
	else if ((criterion==0 || criterion==4) && ROB[resstat[rs].rob_place].QCONSeverset==1)
		mark=1;

	if (mark==1)
		should_have_been_marked_critical++;
}

//This function sets the criticality indicator flags
//for each instruction in ROB
//it is called at the end of the cycle
void update_criticality_flags()
{
	int i,j,k;

	if (criticality_type==0)
		return;

	//first ++ anybody who has busy==3
	//set to 0 anybody else
	i=rob_tail;
	for (k=0; k<inst_in_rob; k++)
	{
		ROB[i].cycles_in_ROB++;

		if (ROB[i].res_stat<0)
		{
			i++;
			if (i>=reorder_buffer_size)
				i=0;

			continue;
		}
		if (resstat[ROB[i].res_stat].busy==3)
			ROB[i].cycles_notready++;
		else
			ROB[i].cycles_notready=0;

		i++;
		if (i>=reorder_buffer_size)
			i=0;
	}

	//Detecting QOld
	//oldest instruction in resstat with busy == 3
	j=0;
	i=rob_tail;
	for (k=0; k<inst_in_rob; k++)
	{
		if (ROB[i].cycles_notready>j)
			j=ROB[i].cycles_notready;

		i++;
		if (i>=reorder_buffer_size)
			i=0;
	}
	//j is now the oldest age nonready inst
	i=rob_tail;
	for (k=0; k<inst_in_rob; k++)
	{
		if (ROB[i].cycles_notready==j  && j>0)
			ROB[i].QOLDset=1;
		else
			ROB[i].QOLDset=0;

		i++;
		if (i>=reorder_buffer_size)
			i=0;
	}

	//Detecting QOldDep
	//instruction that QOld instruction(s) depend on
	//assume 0
	i=rob_tail;
	for (k=0; k<inst_in_rob; k++)
	{
		ROB[i].QOLDDEPset=0;
		i++;
		if (i>=reorder_buffer_size)
			i=0;
	}
	i=rob_tail;
	for (k=0; k<inst_in_rob; k++)
	{
		if (ROB[i].QOLDset==0)
		{
			i++;
			if (i>=reorder_buffer_size)
				i=0;
			continue;
		}
		if (resstat[ROB[i].res_stat].rs_available>=0)
			ROB[resstat[resstat[ROB[i].res_stat].rs_available].rob_place].QOLDDEPset=1;
		if (resstat[ROB[i].res_stat].rt_available>=0)
			ROB[resstat[resstat[ROB[i].res_stat].rt_available].rob_place].QOLDDEPset=1;
		if (resstat[ROB[i].res_stat].rt2_available>=0)
			ROB[resstat[resstat[ROB[i].res_stat].rt2_available].rob_place].QOLDDEPset=1;
		if (resstat[ROB[i].res_stat].HI_available>=0)
			ROB[resstat[resstat[ROB[i].res_stat].HI_available].rob_place].QOLDDEPset=1;
		if (resstat[ROB[i].res_stat].LO_available>=0)
			ROB[resstat[resstat[ROB[i].res_stat].LO_available].rob_place].QOLDDEPset=1;
		if (resstat[ROB[i].res_stat].fs_available>=0)
			ROB[resstat[resstat[ROB[i].res_stat].fs_available].rob_place].QOLDDEPset=1;
		if (resstat[ROB[i].res_stat].fs2_available>=0)
			ROB[resstat[resstat[ROB[i].res_stat].fs2_available].rob_place].QOLDDEPset=1;
		if (resstat[ROB[i].res_stat].ft_available>=0)
			ROB[resstat[resstat[ROB[i].res_stat].ft_available].rob_place].QOLDDEPset=1;
		if (resstat[ROB[i].res_stat].ft2_available>=0)
			ROB[resstat[resstat[ROB[i].res_stat].ft2_available].rob_place].QOLDDEPset=1;
		if (resstat[ROB[i].res_stat].FCC_available>=0)
			ROB[resstat[resstat[ROB[i].res_stat].FCC_available].rob_place].QOLDDEPset=1;
		i++;
		if (i>=reorder_buffer_size)
			i=0;
	}


	//Detecting AlOld
	//oldest instruction in ROB
	j=0;
	i=rob_tail;
	for (k=0; k<inst_in_rob; k++)
	{
		if (ROB[i].cycles_in_ROB>j)
			j=ROB[i].cycles_in_ROB;
		i++;
		if (i>=reorder_buffer_size)
			i=0;

	}
	//j is now the oldest age inst
	i=rob_tail;
	for (k=0; k<inst_in_rob; k++)
	{
		if (ROB[i].cycles_in_ROB==j)
			ROB[i].ALOLDset=1;
		else
			ROB[i].ALOLDset=0;
		i++;
		if (i>=reorder_buffer_size)
			i=0;
	}

	//Detecting QCons
	//inst in resstat whose result is used by most other insts
	i=rob_tail;
	for (k=0; k<inst_in_rob; k++)
	{
		ROB[i].insts_using_output=0;
		i++;
		if (i>=reorder_buffer_size)
			i=0;
	}
	i=rob_tail;
	for (k=0; k<inst_in_rob; k++)
	{
		if (ROB[i].res_stat<0)
		{
			i++;
			if (i>=reorder_buffer_size)
				i=0;
			continue;
		}

		if (resstat[ROB[i].res_stat].rs_available>=0)
			ROB[resstat[resstat[ROB[i].res_stat].rs_available].rob_place].insts_using_output++;
		if (resstat[ROB[i].res_stat].rt_available>=0)
			ROB[resstat[resstat[ROB[i].res_stat].rt_available].rob_place].insts_using_output++;
		if (resstat[ROB[i].res_stat].rt2_available>=0)
			ROB[resstat[resstat[ROB[i].res_stat].rt2_available].rob_place].insts_using_output++;
		if (resstat[ROB[i].res_stat].HI_available>=0)
			ROB[resstat[resstat[ROB[i].res_stat].HI_available].rob_place].insts_using_output++;
		if (resstat[ROB[i].res_stat].LO_available>=0)
			ROB[resstat[resstat[ROB[i].res_stat].LO_available].rob_place].insts_using_output++;
		if (resstat[ROB[i].res_stat].fs_available>=0)
			ROB[resstat[resstat[ROB[i].res_stat].fs_available].rob_place].insts_using_output++;
		if (resstat[ROB[i].res_stat].fs2_available>=0)
			ROB[resstat[resstat[ROB[i].res_stat].fs2_available].rob_place].insts_using_output++;
		if (resstat[ROB[i].res_stat].ft_available>=0)
			ROB[resstat[resstat[ROB[i].res_stat].ft_available].rob_place].insts_using_output++;
		if (resstat[ROB[i].res_stat].ft2_available>=0)
			ROB[resstat[resstat[ROB[i].res_stat].ft2_available].rob_place].insts_using_output++;
		if (resstat[ROB[i].res_stat].FCC_available>=0)
			ROB[resstat[resstat[ROB[i].res_stat].FCC_available].rob_place].insts_using_output++;
		i++;
		if (i>=reorder_buffer_size)
			i=0;
	}
	j=0;
	i=rob_tail;
	for (k=0; k<inst_in_rob; k++)
	{
		if (ROB[i].insts_using_output>j)
			j=ROB[i].insts_using_output;
		i++;
		if (i>=reorder_buffer_size)
			i=0;
	}
	//j is now the oldest age inst
	i=rob_tail;
	for (k=0; k<inst_in_rob; k++)
	{
		if (ROB[i].insts_using_output==j && j>0)
			ROB[i].QCONSset=1;
		else
			ROB[i].QCONSset=0;
		i++;
		if (i>=reorder_buffer_size)
			i=0;
	}

	//we want to know, for a given inst, whether the indicator
	//flag was ever set
	i=rob_tail;
	for (k=0; k<inst_in_rob; k++)
	{
		if (ROB[i].QOLDset==1)
			ROB[i].QOLDeverset=1;
		if (ROB[i].QOLDDEPset==1)
			ROB[i].QOLDDEPeverset=1;
		if (ROB[i].ALOLDset==1)
			ROB[i].ALOLDeverset=1;
		if (ROB[i].QCONSset==1)
			ROB[i].QCONSeverset=1;
		i++;
		if (i>=reorder_buffer_size)
			i=0;
	}
}

typedef struct
{
	int tag;
	int QOLD_ctr;
	int QOLDDEP_ctr;
	int ALOLD_ctr;
	int QCONS_ctr;
	int prediction;
	int hybrid_ctr;
	int perceptron_prediction;
	int counter_prediction;
	int dont_predict;

	//for perceptron use
	int* weight;
	char* input;	//ideally, these would be in the ROB, but for now...

	//for weight tallying
	int* correct;
	int total_correct;

	//for history use
	int* pastQOLD;
	int* pastQOLDDEP;
	int* pastALOLD;
	int* pastQCONS;
	int local_entries;
} criticality_entry;

criticality_entry* criticality_table;

typedef struct
{
	int QOLD;
	int QOLDDEP;
	int ALOLD;
	int QCONS;
	int counter;
	unsigned int PC;
} criticality_history_entry;

criticality_history_entry* criticality_history_table;

int criticality_ctr_thresh=2;


#define WEIGHT_MAG 1024 
unsigned int weight_distribution[2*WEIGHT_MAG];
float weight_averages[2*WEIGHT_MAG];
unsigned int weight_averages_count[2*WEIGHT_MAG];
float weight_accuracies[2*WEIGHT_MAG];
unsigned int weight_accuracies_count[2*WEIGHT_MAG];
extern int tally_perceptron_weights;

void tally_criticality_weights(int entry, int wsize)
{
        int wd[2*WEIGHT_MAG];
        int i;

        if (tally_perceptron_weights==0)
                return;

        for (i=0; i<2*WEIGHT_MAG; i++)
                wd[i]=0;


        //if tag is not -1, tally the weights
        if (criticality_table[entry].tag>-1)
        {
                //first tally the total distribution
                for (i=0; i<wsize; i++)
                {
	                if (criticality_table[entry].weight[i]>=WEIGHT_MAG-1)
                        {
                        	weight_distribution[2*WEIGHT_MAG-1]++;
                                wd[2*WEIGHT_MAG-1]++;
                        }
                        else if (criticality_table[entry].weight[i]<=-WEIGHT_MAG)
                        {
                        	weight_distribution[0]++;
                                wd[0]++;
                        }
                        else
                        {                
				weight_distribution[criticality_table[entry].weight[i]+WEIGHT_MAG]++;
                                wd[criticality_table[entry].weight[i]+WEIGHT_MAG]++;
                        }
                }

                //next tally the percentage of weights at each value for this entry
                for (i=0; i<2*WEIGHT_MAG; i++)
                {
                        weight_averages[i] += (float)wd[i]/(float)(wsize);
                        weight_averages_count[i]++;
                }

                //finally tally the accuracies of the weights at each magnitude
                for (i=0; i<wsize; i++)
                {
                	if (criticality_table[entry].weight[i]>=WEIGHT_MAG-1)
                        {
				weight_accuracies[2*WEIGHT_MAG-1]+=(float)(criticality_table[entry].correct[i])/(float)(criticality_table[entry].total_correct);
                                weight_accuracies_count[2*WEIGHT_MAG-1]++;
                        }
                        else if (criticality_table[entry].weight[i]<=-WEIGHT_MAG)
                        {
                        	weight_accuracies[0]+=(float)(criticality_table[entry].correct[0])/(float)(criticality_table[entry].total_correct);
                                weight_accuracies_count[0]++;
                        }
                        else
                        {
                                weight_accuracies[criticality_table[entry].weight[i]+WEIGHT_MAG]+=(float)(criticality_table[entry].correct[i])/(float)(criticality_table[entry].total_correct);
				weight_accuracies_count[criticality_table[entry].weight[i]+WEIGHT_MAG]++;
                        }
                }
        }

        //reset to 0
        for (i=0; i<wsize; i++)
        	criticality_table[entry].correct[i]=0;
        criticality_table[entry].total_correct=0;

}

void initialize_criticality_weight_tally()
{
        int i;
                                 
        if (tally_perceptron_weights==0)
                return;
                                 
        for (i=0; i<WEIGHT_MAG*2; i++)
        {
                weight_distribution[i]=0;
                weight_accuracies[i]=0;
                weight_accuracies_count[i]=0;
                weight_averages[i]=0;
                weight_averages_count[i]=0;
        }
}

void dump_criticality_weight_tally()
{
        int i;

        if (tally_perceptron_weights==0)
                return;

        for (i=0; i<criticality_hash_table_size; i++)
        {
                if (criticality_table[i].tag>-1)
                {
                        tally_criticality_weights(i,criticality_history_size);
                }
        }

        printf("\n");
        printf("Perceptron weight statistics:\n");
        printf("Weight distribution:  total quantity of weights at each magnitude\n");
        for (i=0; i<WEIGHT_MAG*2; i++)
        {
                printf("%i\t%u\n",i-WEIGHT_MAG,weight_distribution[i]);
        }
        printf("\nWeight averages:  average percentage of weights at each magnitude over every entry\n");
        for (i=0; i<WEIGHT_MAG*2; i++)
        {
                printf("%i\t%f\n",i-WEIGHT_MAG,weight_averages[i]/(float)weight_averages_count[i]);
        }
        printf("\nWeight accuracies:  average accuracy of input for weight at each magnitude over every entry\n");
        for (i=0; i<WEIGHT_MAG*2; i++)
        {
                if (weight_accuracies_count[i]==0)
                        printf("%i\t%f\n",i-WEIGHT_MAG,weight_accuracies[i]);
                else
                        printf("%i\t%f\n",i-WEIGHT_MAG,weight_accuracies[i]/(float)weight_accuracies_count[i]);
        }
}
void set_criticality_weight_correctness(char input, int* weight, int* correct, int predicted, int actual)
{
        if (tally_perceptron_weights==0)
                return;
        
        if ((input==1)==(actual==1) && *weight>0)
                *correct=*correct+1;
        else if ((input==1)!=(actual==1) && *weight<0)
                *correct=*correct+1;
}


//trains an individual weight
//training_style:  0 = error, 1 = training by actual
//perceptron_weight_growth:  0 = linear, 1 = exponential
//perceptron_training_threshold: determines weight saturation value
void train_perceptron_weight(char input, int* weight, int predicted, int actual)
{
	int e,tv;

	int x,y;

	if (perceptron_training_style==0)
	{
		//compute the error
		e=0;
		//actual==1, predicted==0
		if (predicted==0 && actual==1)
			e=1;
		//actual==0, predicted==1
		else if (predicted==1 && actual==0)
			e=-1;	

		//compute the training value
		tv=input*e;
	}
	else
	{
		if ((actual==1)==(input==1))
			tv=1;
		else
			tv=-1;
	}

	if (perceptron_weight_growth==0)
		*weight += tv;
	else
	{
		x=*weight;
		y=x;
		if (tv>0)
		{
			if (x>0)
				y=y*2;
			else if (x==0)
				y++;
			else if (x==-1)
				y++;
			else
				y=y/2;
		}
		else if (tv<0)
		{
			if (x>1)
				y=y/2;
			else if (x==1)
				y--;
			else if (x==0)
				y--;
			else
				y=y*2;
		}
		*weight=y;
	}

	if (*weight>perceptron_training_threshold)
		*weight=perceptron_training_threshold;
	if (*weight<-perceptron_training_threshold)
		*weight=-perceptron_training_threshold;
}

//stores the most recent criticality information in the global history table
//aliasing_reduction: 0 - none, 1 - assigned seats, 2 - assigned seats with cancellation
void update_global_history_table(int robentry, unsigned int PC)
{
	int location,i;

	if (aliasing_reduction==0)
	{
		//shift the new information into the history table
		for (i=criticality_history_size-1; i>0; i--)
		{
			criticality_history_table[i].QOLD=criticality_history_table[i-1].QOLD;
			criticality_history_table[i].QOLDDEP=criticality_history_table[i-1].QOLDDEP;
			criticality_history_table[i].ALOLD=criticality_history_table[i-1].ALOLD;
			criticality_history_table[i].QCONS=criticality_history_table[i-1].QCONS;
		}
		criticality_history_table[0].QOLD=ROB[robentry].QOLDeverset;
		criticality_history_table[0].QOLDDEP=ROB[robentry].QOLDDEPeverset;
		criticality_history_table[0].ALOLD=ROB[robentry].ALOLDeverset;
		criticality_history_table[0].QCONS=ROB[robentry].QCONSeverset;
	}
	else
	{
		location = (PC>>3)%criticality_history_size;
		criticality_history_table[location].QOLD=ROB[robentry].QOLDeverset;
		criticality_history_table[location].QOLDDEP=ROB[robentry].QOLDDEPeverset;
		criticality_history_table[location].ALOLD=ROB[robentry].ALOLDeverset;
		criticality_history_table[location].QCONS=ROB[robentry].QCONSeverset;

		if (aliasing_reduction==2)
		{
			criticality_history_table[location].counter=criticality_history_size;

			for (i=0; i<criticality_history_size; i++)
			{
				if (i==location)
					continue;
				if (criticality_history_table[location].counter>0)
				{
					criticality_history_table[location].counter--;
				}
			}
		}
	}
}

//This implements the baseline critical instruction predictor
//described in Tune, et al
int get_criticality_baseline(unsigned int PC)
{
	int entry;

	entry=(PC>>3)%criticality_hash_table_size;

	if (criticality_table[entry].tag!=(PC>>3)/criticality_hash_table_size)
	{
		criticality_table[entry].prediction=0;
		return 0;
	}

	criticality_table[entry].prediction=0;

	if (criticality_table[entry].QOLD_ctr>=criticality_ctr_thresh)
		criticality_table[entry].prediction=1;
	if (criticality_table[entry].QOLDDEP_ctr>=criticality_ctr_thresh)
		criticality_table[entry].prediction=1;
	if (criticality_table[entry].ALOLD_ctr>=criticality_ctr_thresh)
		criticality_table[entry].prediction=1;
	if (criticality_table[entry].QCONS_ctr>=criticality_ctr_thresh)
		criticality_table[entry].prediction=1;

	if (criticality_table[entry].dont_predict==0 && criticality_table[entry].prediction==1 && criticality_predict_every>0)
		criticality_table[entry].prediction=2;

	return criticality_table[entry].prediction;
}

void train_criticality_baseline(unsigned int PC, int robentry)
{
	int entry;

	entry=(PC>>3)%criticality_hash_table_size;
	
	//if tags don't match, reset entry
	if (criticality_table[entry].tag!=(PC>>3)/criticality_hash_table_size)
	{
		criticality_table[entry].tag=(PC>>3)/criticality_hash_table_size;
		criticality_table[entry].QOLD_ctr=1;
		criticality_table[entry].QOLDDEP_ctr=1;
		criticality_table[entry].ALOLD_ctr=1;
		criticality_table[entry].QCONS_ctr=1;

		criticality_table[entry].dont_predict=0;
	}

	if (criticality_predict_every>0)
	{
		criticality_table[entry].dont_predict--;
		if (criticality_table[entry].dont_predict<0)
			criticality_table[entry].dont_predict=criticality_predict_every;

		if (criticality_table[entry].dont_predict!=criticality_predict_every)
			return;
		if (criticality_table[entry].prediction==2)
			criticality_table[entry].prediction=1;
	}

	//set counters appropriately
	if (ROB[robentry].QOLDeverset==1)
		criticality_table[entry].QOLD_ctr+=1;
	else
		criticality_table[entry].QOLD_ctr--;
	if (ROB[robentry].QOLDDEPeverset==1)
		criticality_table[entry].QOLDDEP_ctr+=1;
	else
		criticality_table[entry].QOLDDEP_ctr--;
	if (ROB[robentry].ALOLDeverset==1)
		criticality_table[entry].ALOLD_ctr+=1;
	else
		criticality_table[entry].ALOLD_ctr--;
	if (ROB[robentry].QCONSeverset==1)
		criticality_table[entry].QCONS_ctr+=1;
	else
		criticality_table[entry].QCONS_ctr--;

	if (criticality_table[entry].QOLD_ctr>2*criticality_ctr_thresh-1)
		criticality_table[entry].QOLD_ctr=2*criticality_ctr_thresh-1;
	if (criticality_table[entry].QOLDDEP_ctr>2*criticality_ctr_thresh-1)
		criticality_table[entry].QOLDDEP_ctr=2*criticality_ctr_thresh-1;
	if (criticality_table[entry].ALOLD_ctr>2*criticality_ctr_thresh-1)
		criticality_table[entry].ALOLD_ctr=2*criticality_ctr_thresh-1;
	if (criticality_table[entry].QCONS_ctr>2*criticality_ctr_thresh-1)
		criticality_table[entry].QCONS_ctr=2*criticality_ctr_thresh-1;
	if (criticality_table[entry].QOLD_ctr<0)
		criticality_table[entry].QOLD_ctr=0;
	if (criticality_table[entry].QOLDDEP_ctr<0)
		criticality_table[entry].QOLDDEP_ctr=0;
	if (criticality_table[entry].ALOLD_ctr<0)
		criticality_table[entry].ALOLD_ctr=0;
	if (criticality_table[entry].QCONS_ctr<0)
		criticality_table[entry].QCONS_ctr=0;

	if (ROB[robentry].QOLDeverset==1 || ROB[robentry].QOLDDEPeverset==1 || ROB[robentry].ALOLDeverset==1 || ROB[robentry].QCONSeverset==1)
	{
		if (criticality_table[entry].prediction>=1)
			criticality_correct_true++;
		else
			criticality_wrong_true++;
	}
	else
	{
		if (criticality_table[entry].prediction==0)
			criticality_correct_false++;
		else
			criticality_wrong_false++;
	}


	if (ROB[robentry].QOLDeverset==1)
		criticality_QOLD++;
	if (ROB[robentry].QOLDDEPeverset==1)
		criticality_QOLDDEP++;
	if (ROB[robentry].ALOLDeverset==1)
		criticality_ALOLD++;
	if (ROB[robentry].QCONSeverset==1)
		criticality_QCONS++;
	criticality_total++;
}

void initialize_criticality_baseline()
{
	int i;

	criticality_table=(criticality_entry*)calloc(criticality_hash_table_size, sizeof(criticality_entry));

	if (criticality_table==0)
		fatal("Not enough memory for criticality");

	for (i=0; i<criticality_hash_table_size; i++)
	{
		criticality_table[i].tag=-1;
	}
}

//Perceptron approach 1 - cr3
//Perceptron predictor:
//Input: for each past inst, for each criterion, whether criteria were set for that inst
//Output: whether it should be treated as critical
//Training: whether that inst met any of the criteria

int get_criticality_perceptron1(unsigned int PC)
{
	int i,j;
	int entry;
	int sum;

	entry=(PC>>3)%criticality_hash_table_size;

	if (criticality_table[entry].tag!=(PC>>3)/criticality_hash_table_size)
	{
		criticality_table[entry].prediction=0;
		//assume non-critical on cold instruction
		//may want to study this - % of cold inst that are critical
		return 0;
	}

	//compute perceptron output
	sum=0;
	for (i=0; i<criticality_history_size; i++)
	{
		criticality_table[entry].input[4*i+0+1]=(criticality_history_table[i].QOLD==1? 1: -1);
		criticality_table[entry].input[4*i+1+1]=(criticality_history_table[i].QOLDDEP==1? 1: -1);
		criticality_table[entry].input[4*i+2+1]=(criticality_history_table[i].ALOLD==1? 1: -1);
		criticality_table[entry].input[4*i+3+1]=(criticality_history_table[i].QCONS==1? 1: -1);

		if (aliasing_reduction==2 && criticality_history_table[i].counter==0)
		{
			criticality_table[entry].input[4*i+0+1]=0;
			criticality_table[entry].input[4*i+1+1]=0;
			criticality_table[entry].input[4*i+2+1]=0;
			criticality_table[entry].input[4*i+3+1]=0;
		}

		sum+=criticality_table[entry].weight[4*i+0+1] * criticality_table[entry].input[4*i+0+1];
		sum+=criticality_table[entry].weight[4*i+1+1] * criticality_table[entry].input[4*i+1+1];
		sum+=criticality_table[entry].weight[4*i+2+1] * criticality_table[entry].input[4*i+2+1];
		sum+=criticality_table[entry].weight[4*i+3+1] * criticality_table[entry].input[4*i+3+1];
	}
	//bias
	sum+=criticality_table[entry].input[0];

	//threshold
	if (sum>=0)
		criticality_table[entry].prediction=1;
	else
		criticality_table[entry].prediction=0;

	if (criticality_table[entry].dont_predict==0 && criticality_table[entry].prediction==1 && criticality_predict_every>0)
		criticality_table[entry].prediction=2;

	return criticality_table[entry].prediction;
}

void train_criticality_perceptron1(unsigned int PC, int robentry)
{
	int entry;
	int i,j;
	int e,actual;
	int numw = 4*criticality_history_size+1;

	entry=(PC>>3)%criticality_hash_table_size;
	
	//if tags don't match, reset entry
	if (criticality_table[entry].tag!=(PC>>3)/criticality_hash_table_size)
	{
		criticality_table[entry].tag=(PC>>3)/criticality_hash_table_size;

		tally_criticality_weights(entry,criticality_history_size);


		//reset the weights
		for (i=0; i<numw; i++)
			criticality_table[entry].weight[i]=0;

		criticality_table[entry].dont_predict=0;
	}

	if (criticality_predict_every>0)
	{
		criticality_table[entry].dont_predict--;
		if (criticality_table[entry].dont_predict<0)
			criticality_table[entry].dont_predict=criticality_predict_every;

		if (criticality_table[entry].dont_predict!=criticality_predict_every)
		{
			//shift the new information into the history table
			update_global_history_table(robentry, PC);
			return;
		}
		if (criticality_table[entry].prediction==2)
			criticality_table[entry].prediction=1;
	}

	//train the perceptron
	//determine the training value
	//use the OR of the criteria
	actual=0;
	if (ROB[robentry].QOLDeverset==1)
		actual=1;
	if (ROB[robentry].QOLDDEPeverset==1)
		actual=1;
	if (ROB[robentry].ALOLDeverset==1)
		actual=1;
	if (ROB[robentry].QCONSeverset==1)
		actual=1;

	//train each weight
	for (i=0; i<criticality_history_size; i++)
	{
		train_perceptron_weight(criticality_table[entry].input[4*i+0+1],&criticality_table[entry].weight[4*i+0+1],criticality_table[entry].prediction,actual);
		train_perceptron_weight(criticality_table[entry].input[4*i+1+1],&criticality_table[entry].weight[4*i+1+1],criticality_table[entry].prediction,actual);
		train_perceptron_weight(criticality_table[entry].input[4*i+2+1],&criticality_table[entry].weight[4*i+2+1],criticality_table[entry].prediction,actual);
		train_perceptron_weight(criticality_table[entry].input[4*i+3+1],&criticality_table[entry].weight[4*i+3+1],criticality_table[entry].prediction,actual);

		
		set_criticality_weight_correctness(criticality_table[entry].input[4*i+0+1],&criticality_table[entry].weight[4*i+0+1],&criticality_table[entry].correct[4*i+0+1],criticality_table[entry].prediction,actual);
		set_criticality_weight_correctness(criticality_table[entry].input[4*i+1+1],&criticality_table[entry].weight[4*i+1+1],&criticality_table[entry].correct[4*i+1+1],criticality_table[entry].prediction,actual);
		set_criticality_weight_correctness(criticality_table[entry].input[4*i+2+1],&criticality_table[entry].weight[4*i+2+1],&criticality_table[entry].correct[4*i+2+1],criticality_table[entry].prediction,actual);
		set_criticality_weight_correctness(criticality_table[entry].input[4*i+3+1],&criticality_table[entry].weight[4*i+3+1],&criticality_table[entry].correct[4*i+3+1],criticality_table[entry].prediction,actual);
	}
	//train the bias weight
	train_perceptron_weight(1,&criticality_table[entry].weight[0],criticality_table[entry].prediction,actual);
	set_criticality_weight_correctness(1,&criticality_table[entry].weight[0],&criticality_table[entry].correct[0],criticality_table[entry].prediction,actual);

	//shift the new information into the history table
	update_global_history_table(robentry, PC);

	//update statistics
	if (actual==1 && criticality_table[entry].prediction==1)
		criticality_correct_true++;
	else if (actual==1 && criticality_table[entry].prediction==0)
		criticality_wrong_true++;
	else if (actual==0 && criticality_table[entry].prediction==1)
		criticality_wrong_false++;
	else if (actual==0 && criticality_table[entry].prediction==0)
		criticality_correct_false++;



	if (ROB[robentry].QOLDeverset==1)
		criticality_QOLD++;
	if (ROB[robentry].QOLDDEPeverset==1)
		criticality_QOLDDEP++;
	if (ROB[robentry].ALOLDeverset==1)
		criticality_ALOLD++;
	if (ROB[robentry].QCONSeverset==1)
		criticality_QCONS++;
	criticality_total++;
}

void initialize_criticality_perceptron1()
{
	int i,j,numw;

	//number of weights - 4 for each inst in hist - for each criterion, plus a bias
	numw=4*criticality_history_size+1;

	criticality_table=(criticality_entry*)calloc(criticality_hash_table_size, sizeof(criticality_entry));

	criticality_history_table=(criticality_history_entry*)calloc(criticality_history_size, sizeof(criticality_history_entry));

	if (criticality_table==0 || criticality_history_table==0)
		fatal("Not enough memory for criticality");

	for (i=0; i<criticality_hash_table_size; i++)
	{
		criticality_table[i].tag=-1;
		criticality_table[i].weight=(int*)calloc(numw, sizeof(int));
		criticality_table[i].correct=(int*)calloc(numw, sizeof(int));
		criticality_table[i].input=(char*)calloc(numw, sizeof(char));

		if (criticality_table[i].weight==0 || criticality_table[i].input==0 || criticality_table[i].correct==0)
			fatal("Not enough memory for criticality");

		//initialize weights to 0
		for (j=0; j<numw; j++)
			criticality_table[i].weight[j]=0;
	}
	for (i=0; i<criticality_history_size; i++)
	{
		criticality_history_table[i].QOLD=0;
		criticality_history_table[i].QOLDDEP=0;
		criticality_history_table[i].ALOLD=0;
		criticality_history_table[i].QCONS=0;
	}
}

//Perceptron approach 2 - cr5
//Perceptron predictor: input for each criterion, train criteria separately
//Input: for each past inst, for each criterion, whether criteria were set for that inst
//Output: whether it should be treated as critical
//Training: whether that inst met each criterion

int get_criticality_perceptron2(unsigned int PC)
{
	int i,j;
	int entry;
	int sum;

	entry=(PC>>3)%criticality_hash_table_size;

	if (criticality_table[entry].tag!=(PC>>3)/criticality_hash_table_size)
	{
		criticality_table[entry].prediction=0;
		//assume non-critical on cold instruction
		//may want to study this - % of cold inst that are critical
		return 0;
	}

	//compute perceptron output
	sum=0;
	for (i=0; i<criticality_history_size; i++)
	{
		criticality_table[entry].input[4*i+0+1]=(criticality_history_table[i].QOLD==1? 1: -1);
		criticality_table[entry].input[4*i+1+1]=(criticality_history_table[i].QOLDDEP==1? 1: -1);
		criticality_table[entry].input[4*i+2+1]=(criticality_history_table[i].ALOLD==1? 1: -1);
		criticality_table[entry].input[4*i+3+1]=(criticality_history_table[i].QCONS==1? 1: -1);

		if (aliasing_reduction==2 && criticality_history_table[i].counter==0)
		{
			criticality_table[entry].input[4*i+0+1]=0;
			criticality_table[entry].input[4*i+1+1]=0;
			criticality_table[entry].input[4*i+2+1]=0;
			criticality_table[entry].input[4*i+3+1]=0;
		}

		sum+=criticality_table[entry].weight[4*i+0+1] * criticality_table[entry].input[4*i+0+1];
		sum+=criticality_table[entry].weight[4*i+1+1] * criticality_table[entry].input[4*i+1+1];
		sum+=criticality_table[entry].weight[4*i+2+1] * criticality_table[entry].input[4*i+2+1];
		sum+=criticality_table[entry].weight[4*i+3+1] * criticality_table[entry].input[4*i+3+1];
	}
	//bias
	sum+=criticality_table[entry].input[0];

	//threshold
	if (sum>=0)
		criticality_table[entry].prediction=1;
	else
		criticality_table[entry].prediction=0;

	if (criticality_table[entry].dont_predict==0 && criticality_table[entry].prediction==1 && criticality_predict_every>0)
		criticality_table[entry].prediction=2;

	return criticality_table[entry].prediction;
}

void train_criticality_perceptron2(unsigned int PC, int robentry)
{
	int entry;
	int i,j;
	int e,actual;
	int numw = 4*criticality_history_size+1;

	entry=(PC>>3)%criticality_hash_table_size;
	
	//if tags don't match, reset entry
	if (criticality_table[entry].tag!=(PC>>3)/criticality_hash_table_size)
	{
		criticality_table[entry].tag=(PC>>3)/criticality_hash_table_size;

		//reset the weights
		for (i=0; i<numw; i++)
			criticality_table[entry].weight[i]=0;

		criticality_table[entry].dont_predict=0;
	}

	if (criticality_predict_every>0)
	{
		criticality_table[entry].dont_predict--;
		if (criticality_table[entry].dont_predict<0)
			criticality_table[entry].dont_predict=criticality_predict_every;

		if (criticality_table[entry].dont_predict!=criticality_predict_every)
		{
			//shift the new information into the history table
			update_global_history_table(robentry, PC);
			return;
		}
		if (criticality_table[entry].prediction==2)
			criticality_table[entry].prediction=1;
	}

	//train the perceptron
	//determine the training value
	//use the OR of the criteria
	actual=0;
	if (ROB[robentry].QOLDeverset==1)
		actual=1;
	if (ROB[robentry].QOLDDEPeverset==1)
		actual=1;
	if (ROB[robentry].ALOLDeverset==1)
		actual=1;
	if (ROB[robentry].QCONSeverset==1)
		actual=1;

	//train each weight
	for (i=0; i<criticality_history_size; i++)
	{
		train_perceptron_weight(criticality_table[entry].input[4*i+0+1],&criticality_table[entry].weight[4*i+0+1],criticality_table[entry].prediction,ROB[robentry].QOLDeverset);
		train_perceptron_weight(criticality_table[entry].input[4*i+1+1],&criticality_table[entry].weight[4*i+1+1],criticality_table[entry].prediction,ROB[robentry].QOLDDEPeverset);
		train_perceptron_weight(criticality_table[entry].input[4*i+2+1],&criticality_table[entry].weight[4*i+2+1],criticality_table[entry].prediction,ROB[robentry].ALOLDeverset);
		train_perceptron_weight(criticality_table[entry].input[4*i+3+1],&criticality_table[entry].weight[4*i+3+1],criticality_table[entry].prediction,ROB[robentry].QCONSeverset);
	}
	//train the bias weight
	train_perceptron_weight(1,&criticality_table[entry].weight[0],criticality_table[entry].prediction,actual);

	//shift the new information into the history table
	update_global_history_table(robentry, PC);

	//update statistics
	if (actual==1 && criticality_table[entry].prediction==1)
		criticality_correct_true++;
	else if (actual==1 && criticality_table[entry].prediction==0)
		criticality_wrong_true++;
	else if (actual==0 && criticality_table[entry].prediction==1)
		criticality_wrong_false++;
	else if (actual==0 && criticality_table[entry].prediction==0)
		criticality_correct_false++;

	if (ROB[robentry].QOLDeverset==1)
		criticality_QOLD++;
	if (ROB[robentry].QOLDDEPeverset==1)
		criticality_QOLDDEP++;
	if (ROB[robentry].ALOLDeverset==1)
		criticality_ALOLD++;
	if (ROB[robentry].QCONSeverset==1)
		criticality_QCONS++;
	criticality_total++;
}

void initialize_criticality_perceptron2()
{
	int i,j,numw;

	//number of weights - 4 for each inst in hist - for each criterion, plus a bias
	numw=4*criticality_history_size+1;

	criticality_table=(criticality_entry*)calloc(criticality_hash_table_size, sizeof(criticality_entry));

	criticality_history_table=(criticality_history_entry*)calloc(criticality_history_size, sizeof(criticality_history_entry));

	if (criticality_table==0 || criticality_history_table==0)
		fatal("Not enough memory for criticality");

	for (i=0; i<criticality_hash_table_size; i++)
	{
		criticality_table[i].tag=-1;
		criticality_table[i].weight=(int*)calloc(numw, sizeof(int));
		criticality_table[i].input=(char*)calloc(numw, sizeof(char));

		if (criticality_table[i].weight==0 || criticality_table[i].input==0)
			fatal("Not enough memory for criticality");

		//initialize weights to 0
		for (j=0; j<numw; j++)
			criticality_table[i].weight[j]=0;
	}
	for (i=0; i<criticality_history_size; i++)
	{
		criticality_history_table[i].QOLD=0;
		criticality_history_table[i].QOLDDEP=0;
		criticality_history_table[i].ALOLD=0;
		criticality_history_table[i].QCONS=0;
	}
}

//Perceptron approach 3 - cr6
//Perceptron predictor: input on OR of criteria
//Input: for each past inst, whether any criterion was set
//Output: whether it should be treated as critical
//Training: OR of criteria

int get_criticality_perceptron3(unsigned int PC)
{
	int i,j;
	int entry;
	int sum;

	entry=(PC>>3)%criticality_hash_table_size;

	if (criticality_table[entry].tag!=(PC>>3)/criticality_hash_table_size)
	{
		criticality_table[entry].prediction=0;
		//assume non-critical on cold instruction
		//may want to study this - % of cold inst that are critical
		return 0;
	}

	//compute perceptron output
	sum=0;
	for (i=0; i<criticality_history_size; i++)
	{
		criticality_table[entry].input[i+1]=((criticality_history_table[i].QOLD==1 || criticality_history_table[i].QOLDDEP==1 || criticality_history_table[i].ALOLD==1 || criticality_history_table[i].QCONS==1)? 1: -1);

		if (aliasing_reduction==2 && criticality_history_table[i].counter==0)
			criticality_table[entry].input[i+1]=0;

		sum+=criticality_table[entry].weight[i+1] * criticality_table[entry].input[i+1];
	}
	//bias
	sum+=criticality_table[entry].input[0];

	//threshold
	if (sum>=0)
		criticality_table[entry].prediction=1;
	else
		criticality_table[entry].prediction=0;

	return criticality_table[entry].prediction;
}

void train_criticality_perceptron3(unsigned int PC, int robentry)
{
	int entry;
	int i,j;
	int e,actual;
	int numw = criticality_history_size+1;

	entry=(PC>>3)%criticality_hash_table_size;
	
	//if tags don't match, reset entry
	if (criticality_table[entry].tag!=(PC>>3)/criticality_hash_table_size)
	{
		criticality_table[entry].tag=(PC>>3)/criticality_hash_table_size;

		//reset the weights
		for (i=0; i<numw; i++)
			criticality_table[entry].weight[i]=0;

		criticality_table[entry].dont_predict=0;
	}

	if (criticality_predict_every>0)
	{
		criticality_table[entry].dont_predict--;
		if (criticality_table[entry].dont_predict<0)
			criticality_table[entry].dont_predict=criticality_predict_every;

		if (criticality_table[entry].dont_predict!=criticality_predict_every)
		{
			//shift the new information into the history table
			update_global_history_table(robentry, PC);
			return;
		}
		if (criticality_table[entry].prediction==2)
			criticality_table[entry].prediction=1;
	}

	//train the perceptron
	actual = ((ROB[robentry].QOLDeverset==1 || ROB[robentry].QOLDDEPeverset==1 || ROB[robentry].ALOLDeverset==1 || ROB[robentry].QCONSeverset==1)? 1:0);

	//train each weight
	for (i=0; i<criticality_history_size; i++)
	{
		train_perceptron_weight(criticality_table[entry].input[i+1],&criticality_table[entry].weight[i+1],criticality_table[entry].prediction,actual);
	}
	//train the bias weight
	train_perceptron_weight(1,&criticality_table[entry].weight[0],criticality_table[entry].prediction,actual);

	//shift the new information into the history table
	update_global_history_table(robentry, PC);

	//update statistics
	if (actual==1 && criticality_table[entry].prediction==1)
		criticality_correct_true++;
	else if (actual==1 && criticality_table[entry].prediction==0)
		criticality_wrong_true++;
	else if (actual==0 && criticality_table[entry].prediction==1)
		criticality_wrong_false++;
	else if (actual==0 && criticality_table[entry].prediction==0)
		criticality_correct_false++;

	if (ROB[robentry].QOLDeverset==1)
		criticality_QOLD++;
	if (ROB[robentry].QOLDDEPeverset==1)
		criticality_QOLDDEP++;
	if (ROB[robentry].ALOLDeverset==1)
		criticality_ALOLD++;
	if (ROB[robentry].QCONSeverset==1)
		criticality_QCONS++;
	criticality_total++;
}

void initialize_criticality_perceptron3()
{
	int i,j,numw;

	//number of weights - 1 for each inst in hist, plus a bias
	numw=criticality_history_size+1;

	criticality_table=(criticality_entry*)calloc(criticality_hash_table_size, sizeof(criticality_entry));

	criticality_history_table=(criticality_history_entry*)calloc(criticality_history_size, sizeof(criticality_history_entry));

	if (criticality_table==0 || criticality_history_table==0)
		fatal("Not enough memory for criticality");

	for (i=0; i<criticality_hash_table_size; i++)
	{
		criticality_table[i].tag=-1;
		criticality_table[i].weight=(int*)calloc(numw, sizeof(int));
		criticality_table[i].input=(char*)calloc(numw, sizeof(char));

		if (criticality_table[i].weight==0 || criticality_table[i].input==0)
			fatal("Not enough memory for criticality");

		//initialize weights to 0
		for (j=0; j<numw; j++)
			criticality_table[i].weight[j]=0;
	}
	for (i=0; i<criticality_history_size; i++)
	{
		criticality_history_table[i].QOLD=0;
		criticality_history_table[i].QOLDDEP=0;
		criticality_history_table[i].ALOLD=0;
		criticality_history_table[i].QCONS=0;
	}
}

//Perceptron approach 4 - cr7
//Perceptron predictor: 4 perceptrons, use OR
//Input: for each past inst whether its criterion was met (X4)
//Output: whether that criterion is set
//Training: whether that inst met its own criterion

int get_criticality_perceptron4(unsigned int PC)
{
	int i,j;
	int entry;
	int sum1,sum2,sum3,sum4;

	entry=(PC>>3)%criticality_hash_table_size;

	if (criticality_table[entry].tag!=(PC>>3)/criticality_hash_table_size)
	{
		criticality_table[entry].prediction=0;
		//assume non-critical on cold instruction
		//may want to study this - % of cold inst that are critical
		return 0;
	}

	//compute perceptron output
	sum1=0;
	sum2=0;
	sum3=0;
	sum4=0;
	for (i=0; i<criticality_history_size; i++)
	{
		criticality_table[entry].input[0*criticality_history_size+(i+1)]=(criticality_history_table[i].QOLD==1? 1: -1);
		criticality_table[entry].input[1*criticality_history_size+(i+1)]=(criticality_history_table[i].QOLDDEP==1? 1: -1);
		criticality_table[entry].input[2*criticality_history_size+(i+1)]=(criticality_history_table[i].ALOLD==1? 1: -1);
		criticality_table[entry].input[3*criticality_history_size+(i+1)]=(criticality_history_table[i].QCONS==1? 1: -1);

		if (aliasing_reduction==2 && criticality_history_table[i].counter==0)
		{
			criticality_table[entry].input[0*criticality_history_size+(i+1)]=0;
			criticality_table[entry].input[1*criticality_history_size+(i+1)]=0;
			criticality_table[entry].input[2*criticality_history_size+(i+1)]=0;
			criticality_table[entry].input[3*criticality_history_size+(i+1)]=0;
		}

		sum1+=criticality_table[entry].weight[0*criticality_history_size+(i+1)] * criticality_table[entry].input[0*criticality_history_size+(i+1)];
		sum2+=criticality_table[entry].weight[1*criticality_history_size+(i+1)] * criticality_table[entry].input[1*criticality_history_size+(i+1)];
		sum3+=criticality_table[entry].weight[2*criticality_history_size+(i+1)] * criticality_table[entry].input[2*criticality_history_size+(i+1)];
		sum4+=criticality_table[entry].weight[3*criticality_history_size+(i+1)] * criticality_table[entry].input[3*criticality_history_size+(i+1)];
	}
	//bias
	sum1+=criticality_table[entry].input[0];
	sum2+=criticality_table[entry].input[criticality_history_size];
	sum3+=criticality_table[entry].input[criticality_history_size*2];
	sum4+=criticality_table[entry].input[criticality_history_size*3];

	//threshold
	if (sum1>=0 || sum2>=0 || sum3>=0 || sum4>=0)
		criticality_table[entry].prediction=1;
	else
		criticality_table[entry].prediction=0;

	if (criticality_table[entry].dont_predict==0 && criticality_table[entry].prediction==1 && criticality_predict_every>0)
		criticality_table[entry].prediction=2;

	return criticality_table[entry].prediction;
}

void train_criticality_perceptron4(unsigned int PC, int robentry)
{
	int entry;
	int i,j;
	int e,actual;
	int numw = 4*criticality_history_size+4;

	entry=(PC>>3)%criticality_hash_table_size;
	
	//if tags don't match, reset entry
	if (criticality_table[entry].tag!=(PC>>3)/criticality_hash_table_size)
	{
		criticality_table[entry].tag=(PC>>3)/criticality_hash_table_size;

		//reset the weights
		for (i=0; i<numw; i++)
			criticality_table[entry].weight[i]=0;

		criticality_table[entry].dont_predict=0;
	}

	if (criticality_predict_every>0)
	{
		criticality_table[entry].dont_predict--;
		if (criticality_table[entry].dont_predict<0)
			criticality_table[entry].dont_predict=criticality_predict_every;

		if (criticality_table[entry].dont_predict!=criticality_predict_every)
		{
			//shift the new information into the history table
			update_global_history_table(robentry, PC);
			return;
		}
		if (criticality_table[entry].prediction==2)
			criticality_table[entry].prediction=1;
	}

	//train the perceptron
	//determine the training value
	//use the OR of the criteria
	actual=0;
	if (ROB[robentry].QOLDeverset==1)
		actual=1;
	if (ROB[robentry].QOLDDEPeverset==1)
		actual=1;
	if (ROB[robentry].ALOLDeverset==1)
		actual=1;
	if (ROB[robentry].QCONSeverset==1)
		actual=1;

	//train each weight
	for (i=0; i<criticality_history_size; i++)
	{
		train_perceptron_weight(criticality_table[entry].input[0*criticality_history_size+i+1],&criticality_table[entry].weight[0*criticality_history_size+i+1],criticality_table[entry].prediction,ROB[robentry].QOLDeverset);
		train_perceptron_weight(criticality_table[entry].input[1*criticality_history_size+i+1],&criticality_table[entry].weight[1*criticality_history_size+i+1],criticality_table[entry].prediction,ROB[robentry].QOLDDEPeverset);
		train_perceptron_weight(criticality_table[entry].input[2*criticality_history_size+i+1],&criticality_table[entry].weight[2*criticality_history_size+i+1],criticality_table[entry].prediction,ROB[robentry].ALOLDeverset);
		train_perceptron_weight(criticality_table[entry].input[3*criticality_history_size+i+1],&criticality_table[entry].weight[3*criticality_history_size+i+1],criticality_table[entry].prediction,ROB[robentry].QCONSeverset);
	}
	//train the bias weights
	train_perceptron_weight(1,&criticality_table[entry].weight[0*criticality_history_size],criticality_table[entry].prediction,ROB[robentry].QOLDeverset);
	train_perceptron_weight(1,&criticality_table[entry].weight[1*criticality_history_size],criticality_table[entry].prediction,ROB[robentry].QOLDDEPeverset);
	train_perceptron_weight(1,&criticality_table[entry].weight[2*criticality_history_size],criticality_table[entry].prediction,ROB[robentry].ALOLDeverset);
	train_perceptron_weight(1,&criticality_table[entry].weight[3*criticality_history_size],criticality_table[entry].prediction,ROB[robentry].QCONSeverset);

	//shift the new information into the history table
	update_global_history_table(robentry, PC);

	//update statistics
	if (actual==1 && criticality_table[entry].prediction==1)
		criticality_correct_true++;
	else if (actual==1 && criticality_table[entry].prediction==0)
		criticality_wrong_true++;
	else if (actual==0 && criticality_table[entry].prediction==1)
		criticality_wrong_false++;
	else if (actual==0 && criticality_table[entry].prediction==0)
		criticality_correct_false++;



	if (ROB[robentry].QOLDeverset==1)
		criticality_QOLD++;
	if (ROB[robentry].QOLDDEPeverset==1)
		criticality_QOLDDEP++;
	if (ROB[robentry].ALOLDeverset==1)
		criticality_ALOLD++;
	if (ROB[robentry].QCONSeverset==1)
		criticality_QCONS++;
	criticality_total++;
}

void initialize_criticality_perceptron4()
{
	int i,j,numw;

	//number of weights - 1 for each inst in hist plus a bias X 4
	numw=4*criticality_history_size+4;

	criticality_table=(criticality_entry*)calloc(criticality_hash_table_size, sizeof(criticality_entry));

	criticality_history_table=(criticality_history_entry*)calloc(criticality_history_size, sizeof(criticality_history_entry));

	if (criticality_table==0 || criticality_history_table==0)
		fatal("Not enough memory for criticality");

	for (i=0; i<criticality_hash_table_size; i++)
	{
		criticality_table[i].tag=-1;
		criticality_table[i].weight=(int*)calloc(numw, sizeof(int));
		criticality_table[i].input=(char*)calloc(numw, sizeof(char));

		if (criticality_table[i].weight==0 || criticality_table[i].input==0)
			fatal("Not enough memory for criticality");

		//initialize weights to 0
		for (j=0; j<numw; j++)
			criticality_table[i].weight[j]=0;
	}
	for (i=0; i<criticality_history_size; i++)
	{
		criticality_history_table[i].QOLD=0;
		criticality_history_table[i].QOLDDEP=0;
		criticality_history_table[i].ALOLD=0;
		criticality_history_table[i].QCONS=0;
	}
}

//Perceptron approach 5 - cr10
//Perceptron predictor: 4 perceptrons, use OR
//Input: for each past inst whether its criterion was met (X4)
//Output: whether that criterion is set
//Training: on OR of criteria

int get_criticality_perceptron5(unsigned int PC)
{
	int i,j;
	int entry;
	int sum1,sum2,sum3,sum4;

	entry=(PC>>3)%criticality_hash_table_size;

	if (criticality_table[entry].tag!=(PC>>3)/criticality_hash_table_size)
	{
		criticality_table[entry].prediction=0;
		//assume non-critical on cold instruction
		//may want to study this - % of cold inst that are critical
		return 0;
	}

	//compute perceptron output
	sum1=0;
	sum2=0;
	sum3=0;
	sum4=0;
	for (i=0; i<criticality_history_size; i++)
	{
		criticality_table[entry].input[0*criticality_history_size+(i+1)]=(criticality_history_table[i].QOLD==1? 1: -1);
		criticality_table[entry].input[1*criticality_history_size+(i+1)]=(criticality_history_table[i].QOLDDEP==1? 1: -1);
		criticality_table[entry].input[2*criticality_history_size+(i+1)]=(criticality_history_table[i].ALOLD==1? 1: -1);
		criticality_table[entry].input[3*criticality_history_size+(i+1)]=(criticality_history_table[i].QCONS==1? 1: -1);

		if (aliasing_reduction==2 && criticality_history_table[i].counter==0)
		{
			criticality_table[entry].input[0*criticality_history_size+(i+1)]=0;
			criticality_table[entry].input[1*criticality_history_size+(i+1)]=0;
			criticality_table[entry].input[2*criticality_history_size+(i+1)]=0;
			criticality_table[entry].input[3*criticality_history_size+(i+1)]=0;
		}

		sum1+=criticality_table[entry].weight[0*criticality_history_size+(i+1)] * criticality_table[entry].input[0*criticality_history_size+(i+1)];
		sum2+=criticality_table[entry].weight[1*criticality_history_size+(i+1)] * criticality_table[entry].input[1*criticality_history_size+(i+1)];
		sum3+=criticality_table[entry].weight[2*criticality_history_size+(i+1)] * criticality_table[entry].input[2*criticality_history_size+(i+1)];
		sum4+=criticality_table[entry].weight[3*criticality_history_size+(i+1)] * criticality_table[entry].input[3*criticality_history_size+(i+1)];
	}
	//bias
	sum1+=criticality_table[entry].input[0];
	sum2+=criticality_table[entry].input[criticality_history_size];
	sum3+=criticality_table[entry].input[criticality_history_size*2];
	sum4+=criticality_table[entry].input[criticality_history_size*3];

	//threshold
	if (sum1>=0 || sum2>=0 || sum3>=0 || sum4>=0)
		criticality_table[entry].prediction=1;
	else
		criticality_table[entry].prediction=0;

	if (criticality_table[entry].dont_predict==0 && criticality_table[entry].prediction==1 && criticality_predict_every>0)
		criticality_table[entry].prediction=2;

	return criticality_table[entry].prediction;
}

void train_criticality_perceptron5(unsigned int PC, int robentry)
{
	int entry;
	int i,j;
	int e,actual;
	int numw = 4*criticality_history_size+4;

	entry=(PC>>3)%criticality_hash_table_size;
	
	//if tags don't match, reset entry
	if (criticality_table[entry].tag!=(PC>>3)/criticality_hash_table_size)
	{
		criticality_table[entry].tag=(PC>>3)/criticality_hash_table_size;

		//reset the weights
		for (i=0; i<numw; i++)
			criticality_table[entry].weight[i]=0;

		criticality_table[entry].dont_predict=0;
	}

	if (criticality_predict_every>0)
	{
		criticality_table[entry].dont_predict--;
		if (criticality_table[entry].dont_predict<0)
			criticality_table[entry].dont_predict=criticality_predict_every;

		if (criticality_table[entry].dont_predict!=criticality_predict_every)
		{
			//shift the new information into the history table
			update_global_history_table(robentry, PC);
			return;
		}
		if (criticality_table[entry].prediction==2)
			criticality_table[entry].prediction=1;
	}

	//train the perceptron
	//determine the training value
	//use the OR of the criteria
	actual=0;
	if (ROB[robentry].QOLDeverset==1)
		actual=1;
	if (ROB[robentry].QOLDDEPeverset==1)
		actual=1;
	if (ROB[robentry].ALOLDeverset==1)
		actual=1;
	if (ROB[robentry].QCONSeverset==1)
		actual=1;

	//train each weight
	for (i=0; i<criticality_history_size; i++)
	{
		train_perceptron_weight(criticality_table[entry].input[0*criticality_history_size+i+1],&criticality_table[entry].weight[0*criticality_history_size+i+1],criticality_table[entry].prediction,actual);
		train_perceptron_weight(criticality_table[entry].input[1*criticality_history_size+i+1],&criticality_table[entry].weight[1*criticality_history_size+i+1],criticality_table[entry].prediction,actual);
		train_perceptron_weight(criticality_table[entry].input[2*criticality_history_size+i+1],&criticality_table[entry].weight[2*criticality_history_size+i+1],criticality_table[entry].prediction,actual);
		train_perceptron_weight(criticality_table[entry].input[3*criticality_history_size+i+1],&criticality_table[entry].weight[3*criticality_history_size+i+1],criticality_table[entry].prediction,actual);
	}
	//train the bias weights
	train_perceptron_weight(1,&criticality_table[entry].weight[0*criticality_history_size],criticality_table[entry].prediction,actual);
	train_perceptron_weight(1,&criticality_table[entry].weight[1*criticality_history_size],criticality_table[entry].prediction,actual);
	train_perceptron_weight(1,&criticality_table[entry].weight[2*criticality_history_size],criticality_table[entry].prediction,actual);
	train_perceptron_weight(1,&criticality_table[entry].weight[3*criticality_history_size],criticality_table[entry].prediction,actual);

	//shift the new information into the history table
	update_global_history_table(robentry, PC);

	//update statistics
	if (actual==1 && criticality_table[entry].prediction==1)
		criticality_correct_true++;
	else if (actual==1 && criticality_table[entry].prediction==0)
		criticality_wrong_true++;
	else if (actual==0 && criticality_table[entry].prediction==1)
		criticality_wrong_false++;
	else if (actual==0 && criticality_table[entry].prediction==0)
		criticality_correct_false++;



	if (ROB[robentry].QOLDeverset==1)
		criticality_QOLD++;
	if (ROB[robentry].QOLDDEPeverset==1)
		criticality_QOLDDEP++;
	if (ROB[robentry].ALOLDeverset==1)
		criticality_ALOLD++;
	if (ROB[robentry].QCONSeverset==1)
		criticality_QCONS++;
	criticality_total++;
}

void initialize_criticality_perceptron5()
{
	int i,j,numw;

	//number of weights - 1 for each inst in hist plus a bias X 4
	numw=4*criticality_history_size+4;

	criticality_table=(criticality_entry*)calloc(criticality_hash_table_size, sizeof(criticality_entry));

	criticality_history_table=(criticality_history_entry*)calloc(criticality_history_size, sizeof(criticality_history_entry));

	if (criticality_table==0 || criticality_history_table==0)
		fatal("Not enough memory for criticality");

	for (i=0; i<criticality_hash_table_size; i++)
	{
		criticality_table[i].tag=-1;
		criticality_table[i].weight=(int*)calloc(numw, sizeof(int));
		criticality_table[i].input=(char*)calloc(numw, sizeof(char));

		if (criticality_table[i].weight==0 || criticality_table[i].input==0)
			fatal("Not enough memory for criticality");

		//initialize weights to 0
		for (j=0; j<numw; j++)
			criticality_table[i].weight[j]=0;
	}
	for (i=0; i<criticality_history_size; i++)
	{
		criticality_history_table[i].QOLD=0;
		criticality_history_table[i].QOLDDEP=0;
		criticality_history_table[i].ALOLD=0;
		criticality_history_table[i].QCONS=0;
	}
}

//Perceptron approach 5 - cr8
//Perceptron predictor: same as 1, but hybrid

int get_criticality_perceptron_hybrid(unsigned int PC)
{
	int i,j;
	int entry;
	int sum;

	entry=(PC>>3)%criticality_hash_table_size;

	if (criticality_table[entry].tag!=(PC>>3)/criticality_hash_table_size)
	{
		criticality_table[entry].counter_prediction=0;
		criticality_table[entry].perceptron_prediction=0;
		criticality_table[entry].prediction=0;
		//assume non-critical on cold instruction
		//may want to study this - % of cold inst that are critical
		return 0;
	}

	//compute perceptron output
	sum=0;
	for (i=0; i<criticality_history_size; i++)
	{
		criticality_table[entry].input[4*i+0+1]=(criticality_history_table[i].QOLD==1? 1: -1);
		criticality_table[entry].input[4*i+1+1]=(criticality_history_table[i].QOLDDEP==1? 1: -1);
		criticality_table[entry].input[4*i+2+1]=(criticality_history_table[i].ALOLD==1? 1: -1);
		criticality_table[entry].input[4*i+3+1]=(criticality_history_table[i].QCONS==1? 1: -1);

		if (aliasing_reduction==2 && criticality_history_table[i].counter==0)
		{
			criticality_table[entry].input[4*i+0+1]=0;
			criticality_table[entry].input[4*i+1+1]=0;
			criticality_table[entry].input[4*i+2+1]=0;
			criticality_table[entry].input[4*i+3+1]=0;
		}

		sum+=criticality_table[entry].weight[4*i+0+1] * criticality_table[entry].input[4*i+0+1];
		sum+=criticality_table[entry].weight[4*i+1+1] * criticality_table[entry].input[4*i+1+1];
		sum+=criticality_table[entry].weight[4*i+2+1] * criticality_table[entry].input[4*i+2+1];
		sum+=criticality_table[entry].weight[4*i+3+1] * criticality_table[entry].input[4*i+3+1];
	}
	//bias
	sum+=criticality_table[entry].input[0];

	//threshold
	if (sum>=0)
		criticality_table[entry].perceptron_prediction=1;
	else
		criticality_table[entry].perceptron_prediction=0;


	criticality_table[entry].counter_prediction=0;

	if (criticality_table[entry].QOLD_ctr>=criticality_ctr_thresh)
		criticality_table[entry].counter_prediction=1;
	if (criticality_table[entry].QOLDDEP_ctr>=criticality_ctr_thresh)
		criticality_table[entry].counter_prediction=1;
	if (criticality_table[entry].ALOLD_ctr>=criticality_ctr_thresh)
		criticality_table[entry].counter_prediction=1;
	if (criticality_table[entry].QCONS_ctr>=criticality_ctr_thresh)
		criticality_table[entry].counter_prediction=1;


	if (criticality_table[entry].hybrid_ctr<criticality_ctr_thresh)
	{
		criticality_table[entry].prediction=criticality_table[entry].counter_prediction;
	}
	else
	{
		criticality_table[entry].prediction=criticality_table[entry].perceptron_prediction;
	}


	if (criticality_table[entry].dont_predict==0 && criticality_table[entry].prediction==1 && criticality_predict_every>0)
		criticality_table[entry].prediction=2;

	return criticality_table[entry].prediction;
}

void train_criticality_perceptron_hybrid(unsigned int PC, int robentry)
{
	int entry;
	int i,j;
	int e,actual;
	int numw = 4*criticality_history_size+1;

	entry=(PC>>3)%criticality_hash_table_size;
	
	//if tags don't match, reset entry
	if (criticality_table[entry].tag!=(PC>>3)/criticality_hash_table_size)
	{
		criticality_table[entry].tag=(PC>>3)/criticality_hash_table_size;

		//reset the weights
		for (i=0; i<numw; i++)
			criticality_table[entry].weight[i]=0;

		criticality_table[entry].QOLD_ctr=0;
		criticality_table[entry].QOLDDEP_ctr=0;
		criticality_table[entry].ALOLD_ctr=0;
		criticality_table[entry].QCONS_ctr=0;
		criticality_table[entry].hybrid_ctr=1;	//mild counter preference

		criticality_table[entry].dont_predict=0;
	}

	if (criticality_predict_every>0)
	{
		criticality_table[entry].dont_predict--;
		if (criticality_table[entry].dont_predict<0)
			criticality_table[entry].dont_predict=criticality_predict_every;

		if (criticality_table[entry].dont_predict!=criticality_predict_every)
		{
			//shift the new information into the history table
			update_global_history_table(robentry, PC);
			return;
		}
		if (criticality_table[entry].prediction==2)
			criticality_table[entry].prediction=1;
	}

	//train the perceptron
	//determine the training value
	//use the OR of the criteria
	actual=0;
	if (ROB[robentry].QOLDeverset==1)
		actual=1;
	if (ROB[robentry].QOLDDEPeverset==1)
		actual=1;
	if (ROB[robentry].ALOLDeverset==1)
		actual=1;
	if (ROB[robentry].QCONSeverset==1)
		actual=1;

	//train each weight
	for (i=0; i<criticality_history_size; i++)
	{
		train_perceptron_weight(criticality_table[entry].input[4*i+0+1],&criticality_table[entry].weight[4*i+0+1],criticality_table[entry].perceptron_prediction,actual);
		train_perceptron_weight(criticality_table[entry].input[4*i+1+1],&criticality_table[entry].weight[4*i+1+1],criticality_table[entry].perceptron_prediction,actual);
		train_perceptron_weight(criticality_table[entry].input[4*i+2+1],&criticality_table[entry].weight[4*i+2+1],criticality_table[entry].perceptron_prediction,actual);
		train_perceptron_weight(criticality_table[entry].input[4*i+3+1],&criticality_table[entry].weight[4*i+3+1],criticality_table[entry].perceptron_prediction,actual);
	}
	//train the bias weight
	train_perceptron_weight(1,&criticality_table[entry].weight[0],criticality_table[entry].perceptron_prediction,actual);

	//shift the new information into the history table
	update_global_history_table(robentry, PC);



	//set counters appropriately
	if (ROB[robentry].QOLDeverset==1)
		criticality_table[entry].QOLD_ctr+=1;
	else
		criticality_table[entry].QOLD_ctr--;
	if (ROB[robentry].QOLDDEPeverset==1)
		criticality_table[entry].QOLDDEP_ctr+=1;
	else
		criticality_table[entry].QOLDDEP_ctr--;
	if (ROB[robentry].ALOLDeverset==1)
		criticality_table[entry].ALOLD_ctr+=1;
	else
		criticality_table[entry].ALOLD_ctr--;
	if (ROB[robentry].QCONSeverset==1)
		criticality_table[entry].QCONS_ctr+=1;
	else
		criticality_table[entry].QCONS_ctr--;

	if (criticality_table[entry].QOLD_ctr>2*criticality_ctr_thresh-1)
		criticality_table[entry].QOLD_ctr=2*criticality_ctr_thresh-1;
	if (criticality_table[entry].QOLDDEP_ctr>2*criticality_ctr_thresh-1)
		criticality_table[entry].QOLDDEP_ctr=2*criticality_ctr_thresh-1;
	if (criticality_table[entry].ALOLD_ctr>2*criticality_ctr_thresh-1)
		criticality_table[entry].ALOLD_ctr=2*criticality_ctr_thresh-1;
	if (criticality_table[entry].QCONS_ctr>2*criticality_ctr_thresh-1)
		criticality_table[entry].QCONS_ctr=2*criticality_ctr_thresh-1;
	if (criticality_table[entry].QOLD_ctr<0)
		criticality_table[entry].QOLD_ctr=0;
	if (criticality_table[entry].QOLDDEP_ctr<0)
		criticality_table[entry].QOLDDEP_ctr=0;
	if (criticality_table[entry].ALOLD_ctr<0)
		criticality_table[entry].ALOLD_ctr=0;
	if (criticality_table[entry].QCONS_ctr<0)
		criticality_table[entry].QCONS_ctr=0;

	//update hybrid counter
	if (criticality_table[entry].counter_prediction!=actual && criticality_table[entry].perceptron_prediction==actual)
	{
		criticality_table[entry].hybrid_ctr++;
		if (criticality_table[entry].hybrid_ctr>criticality_ctr_thresh*2-1)
			criticality_table[entry].hybrid_ctr=criticality_ctr_thresh*2-1;
	}
	else if (criticality_table[entry].counter_prediction==actual && criticality_table[entry].perceptron_prediction!=actual)
	{
		criticality_table[entry].hybrid_ctr--;
		if (criticality_table[entry].hybrid_ctr<0)
			criticality_table[entry].hybrid_ctr=0;
	}
	

	//update statistics
	if (actual==1 && criticality_table[entry].prediction==1)
		criticality_correct_true++;
	else if (actual==1 && criticality_table[entry].prediction==0)
		criticality_wrong_true++;
	else if (actual==0 && criticality_table[entry].prediction==1)
		criticality_wrong_false++;
	else if (actual==0 && criticality_table[entry].prediction==0)
		criticality_correct_false++;


	if (ROB[robentry].QOLDeverset==1)
		criticality_QOLD++;
	if (ROB[robentry].QOLDDEPeverset==1)
		criticality_QOLDDEP++;
	if (ROB[robentry].ALOLDeverset==1)
		criticality_ALOLD++;
	if (ROB[robentry].QCONSeverset==1)
		criticality_QCONS++;
	criticality_total++;
}

void initialize_criticality_perceptron_hybrid()
{
	int i,j,numw;

	//number of weights - 4 for each inst in hist - for each criterion, plus a bias
	numw=4*criticality_history_size+1;

	criticality_table=(criticality_entry*)calloc(criticality_hash_table_size, sizeof(criticality_entry));

	criticality_history_table=(criticality_history_entry*)calloc(criticality_history_size, sizeof(criticality_history_entry));

	if (criticality_table==0 || criticality_history_table==0)
		fatal("Not enough memory for criticality");

	for (i=0; i<criticality_hash_table_size; i++)
	{
		criticality_table[i].tag=-1;
		criticality_table[i].weight=(int*)calloc(numw, sizeof(int));
		criticality_table[i].input=(char*)calloc(numw, sizeof(char));

		if (criticality_table[i].weight==0 || criticality_table[i].input==0)
			fatal("Not enough memory for criticality");

		//initialize weights to 0
		for (j=0; j<numw; j++)
			criticality_table[i].weight[j]=0;
	}
	for (i=0; i<criticality_history_size; i++)
	{
		criticality_history_table[i].QOLD=0;
		criticality_history_table[i].QOLDDEP=0;
		criticality_history_table[i].ALOLD=0;
		criticality_history_table[i].QCONS=0;
	}
}

//Perceptron approach piecewise - cr9
//Perceptron predictor: same as 1, but with piecewise linear antialiasing

int criticality_piecewise_number=16;

int get_criticality_perceptron_piecewise(unsigned int PC)
{
	int i,j,m;
	int entry;
	int sum;

	entry=(PC>>3)%criticality_hash_table_size;

	if (criticality_table[entry].tag!=(PC>>3)/criticality_hash_table_size)
	{
		criticality_table[entry].prediction=0;
		//assume non-critical on cold instruction
		//may want to study this - % of cold inst that are critical
		return 0;
	}

	//compute perceptron output
	sum=0;
	for (i=0; i<criticality_history_size; i++)
	{
		m=4*criticality_history_size*((criticality_history_table[i].PC>>3)%criticality_piecewise_number);

		criticality_table[entry].input[m+4*i+0+1]=(criticality_history_table[i].QOLD==1? 1: -1);
		criticality_table[entry].input[m+4*i+1+1]=(criticality_history_table[i].QOLDDEP==1? 1: -1);
		criticality_table[entry].input[m+4*i+2+1]=(criticality_history_table[i].ALOLD==1? 1: -1);
		criticality_table[entry].input[m+4*i+3+1]=(criticality_history_table[i].QCONS==1? 1: -1);

		if (aliasing_reduction==2 && criticality_history_table[i].counter==0)
		{
			criticality_table[entry].input[m+4*i+0+1]=0;
			criticality_table[entry].input[m+4*i+1+1]=0;
			criticality_table[entry].input[m+4*i+2+1]=0;
			criticality_table[entry].input[m+4*i+3+1]=0;
		}

		sum+=criticality_table[entry].weight[m+4*i+0+1] * criticality_table[entry].input[m+4*i+0+1];
		sum+=criticality_table[entry].weight[m+4*i+1+1] * criticality_table[entry].input[m+4*i+1+1];
		sum+=criticality_table[entry].weight[m+4*i+2+1] * criticality_table[entry].input[m+4*i+2+1];
		sum+=criticality_table[entry].weight[m+4*i+3+1] * criticality_table[entry].input[m+4*i+3+1];
	}
	//bias
	sum+=criticality_table[entry].input[0];

	//threshold
	if (sum>=0)
		criticality_table[entry].prediction=1;
	else
		criticality_table[entry].prediction=0;

	if (criticality_table[entry].dont_predict==0 && criticality_table[entry].prediction==1 && criticality_predict_every>0)
		criticality_table[entry].prediction=2;

	return criticality_table[entry].prediction;
}

void train_criticality_perceptron_piecewise(unsigned int PC, int robentry)
{
	int entry;
	int i,j,m;
	int e,actual;
	int numw = 4*criticality_history_size+1;

	entry=(PC>>3)%criticality_hash_table_size;
	
	//if tags don't match, reset entry
	if (criticality_table[entry].tag!=(PC>>3)/criticality_hash_table_size)
	{
		criticality_table[entry].tag=(PC>>3)/criticality_hash_table_size;

		//reset the weights
		for (i=0; i<numw; i++)
			criticality_table[entry].weight[i]=0;

		criticality_table[entry].dont_predict=0;
	}

	if (criticality_predict_every>0)
	{
		criticality_table[entry].dont_predict--;
		if (criticality_table[entry].dont_predict<0)
			criticality_table[entry].dont_predict=criticality_predict_every;

		if (criticality_table[entry].dont_predict!=criticality_predict_every)
		{
			//shift the new information into the history table
			update_global_history_table(robentry, PC);
			return;
		}
		if (criticality_table[entry].prediction==2)
			criticality_table[entry].prediction=1;
	}

	//train the perceptron
	//determine the training value
	//use the OR of the criteria
	actual=0;
	if (ROB[robentry].QOLDeverset==1)
		actual=1;
	if (ROB[robentry].QOLDDEPeverset==1)
		actual=1;
	if (ROB[robentry].ALOLDeverset==1)
		actual=1;
	if (ROB[robentry].QCONSeverset==1)
		actual=1;

	//train each weight
	for (i=0; i<criticality_history_size; i++)
	{
		m=4*criticality_history_size*((criticality_history_table[i].PC>>3)%criticality_piecewise_number);

		train_perceptron_weight(criticality_table[entry].input[m+4*i+0+1],&criticality_table[entry].weight[m+4*i+0+1],criticality_table[entry].prediction,actual);
		train_perceptron_weight(criticality_table[entry].input[m+4*i+1+1],&criticality_table[entry].weight[m+4*i+1+1],criticality_table[entry].prediction,actual);
		train_perceptron_weight(criticality_table[entry].input[m+4*i+2+1],&criticality_table[entry].weight[m+4*i+2+1],criticality_table[entry].prediction,actual);
		train_perceptron_weight(criticality_table[entry].input[m+4*i+3+1],&criticality_table[entry].weight[m+4*i+3+1],criticality_table[entry].prediction,actual);
	}
	//train the bias weight
	train_perceptron_weight(1,&criticality_table[entry].weight[0],criticality_table[entry].prediction,actual);

	//shift the new information into the history table
	update_global_history_table(robentry, PC);

	//shift the PC into the history table
	for (i=criticality_history_size-1; i>0; i--)
	{
		criticality_history_table[i].PC=criticality_history_table[i-1].PC;
	}
	criticality_history_table[0].PC=PC;

	//update statistics
	if (actual==1 && criticality_table[entry].prediction==1)
		criticality_correct_true++;
	else if (actual==1 && criticality_table[entry].prediction==0)
		criticality_wrong_true++;
	else if (actual==0 && criticality_table[entry].prediction==1)
		criticality_wrong_false++;
	else if (actual==0 && criticality_table[entry].prediction==0)
		criticality_correct_false++;



	if (ROB[robentry].QOLDeverset==1)
		criticality_QOLD++;
	if (ROB[robentry].QOLDDEPeverset==1)
		criticality_QOLDDEP++;
	if (ROB[robentry].ALOLDeverset==1)
		criticality_ALOLD++;
	if (ROB[robentry].QCONSeverset==1)
		criticality_QCONS++;
	criticality_total++;
}

void initialize_criticality_perceptron_piecewise()
{
	int i,j,numw;

	//number of weights - 4 for each inst in hist - for each criterion - for each PC, plus a bias
	numw=4*criticality_history_size*criticality_piecewise_number+1;

	criticality_table=(criticality_entry*)calloc(criticality_hash_table_size, sizeof(criticality_entry));

	criticality_history_table=(criticality_history_entry*)calloc(criticality_history_size, sizeof(criticality_history_entry));

	if (criticality_table==0 || criticality_history_table==0)
		fatal("Not enough memory for criticality");

	for (i=0; i<criticality_hash_table_size; i++)
	{
		criticality_table[i].tag=-1;
		criticality_table[i].weight=(int*)calloc(numw, sizeof(int));
		criticality_table[i].input=(char*)calloc(numw, sizeof(char));

		if (criticality_table[i].weight==0 || criticality_table[i].input==0)
			fatal("Not enough memory for criticality");

		//initialize weights to 0
		for (j=0; j<numw; j++)
			criticality_table[i].weight[j]=0;
	}
	for (i=0; i<criticality_history_size; i++)
	{
		criticality_history_table[i].QOLD=0;
		criticality_history_table[i].QOLDDEP=0;
		criticality_history_table[i].ALOLD=0;
		criticality_history_table[i].QCONS=0;
	}
}

//Study
void study_criticality(unsigned int PC, int robentry)
{
	int entry;
	int i,j;
	int e,actual;

	entry=(PC>>3)%criticality_hash_table_size;
	
	//if tags don't match, reset entry
	if (criticality_table[entry].tag!=(PC>>3)/criticality_hash_table_size)
	{
		criticality_table[entry].tag=(PC>>3)/criticality_hash_table_size;

		criticality_table[entry].local_entries=0;
	}

	//tally criterion
	if (ROB[robentry].QOLDeverset==1)
		criticality_QOLD++;
	if (ROB[robentry].QOLDDEPeverset==1)
		criticality_QOLDDEP++;
	if (ROB[robentry].ALOLDeverset==1)
		criticality_ALOLD++;
	if (ROB[robentry].QCONSeverset==1)
		criticality_QCONS++;
	criticality_total++;

	//is same as last local entry?
	if (criticality_table[entry].local_entries==0)
		first_iteration++;
	else
	{
	if (criticality_table[entry].pastQOLD[0]==0 && ROB[robentry].QOLDeverset==0)
		QOLD_local_nn++;
	else if (criticality_table[entry].pastQOLD[0]==0 && ROB[robentry].QOLDeverset==1)
		QOLD_local_np++;
	else if (criticality_table[entry].pastQOLD[0]==1 && ROB[robentry].QOLDeverset==0)
		QOLD_local_pn++;
	else if (criticality_table[entry].pastQOLD[0]==1 && ROB[robentry].QOLDeverset==1)
		QOLD_local_pp++;
	if (criticality_table[entry].pastQOLDDEP[0]==0 && ROB[robentry].QOLDDEPeverset==0)
		QOLDDEP_local_nn++;
	else if (criticality_table[entry].pastQOLDDEP[0]==0 && ROB[robentry].QOLDDEPeverset==1)
		QOLDDEP_local_np++;
	else if (criticality_table[entry].pastQOLDDEP[0]==1 && ROB[robentry].QOLDDEPeverset==0)
		QOLDDEP_local_pn++;
	else if (criticality_table[entry].pastQOLDDEP[0]==1 && ROB[robentry].QOLDDEPeverset==1)
		QOLDDEP_local_pp++;
	if (criticality_table[entry].pastALOLD[0]==0 && ROB[robentry].ALOLDeverset==0)
		ALOLD_local_nn++;
	else if (criticality_table[entry].pastALOLD[0]==0 && ROB[robentry].ALOLDeverset==1)
		ALOLD_local_np++;
	else if (criticality_table[entry].pastALOLD[0]==1 && ROB[robentry].ALOLDeverset==0)
		ALOLD_local_pn++;
	else if (criticality_table[entry].pastALOLD[0]==1 && ROB[robentry].ALOLDeverset==1)
		ALOLD_local_pp++;
	if (criticality_table[entry].pastQCONS[0]==0 && ROB[robentry].QCONSeverset==0)
		QCONS_local_nn++;
	else if (criticality_table[entry].pastQCONS[0]==0 && ROB[robentry].QCONSeverset==1)
		QCONS_local_np++;
	else if (criticality_table[entry].pastQCONS[0]==1 && ROB[robentry].QCONSeverset==0)
		QCONS_local_pn++;
	else if (criticality_table[entry].pastQCONS[0]==1 && ROB[robentry].QCONSeverset==1)
		QCONS_local_pp++;
	}

	//save info
	criticality_table[entry].local_entries++;
	if (criticality_table[entry].local_entries==criticality_local_history_size)
		criticality_table[entry].local_entries=criticality_local_history_size;
	for (j=criticality_table[entry].local_entries-1; j>=1; j--)
	{
		criticality_table[entry].pastQOLD[j]=criticality_table[entry].pastQOLD[j-1];
		criticality_table[entry].pastQOLDDEP[j]=criticality_table[entry].pastQOLDDEP[j-1];
		criticality_table[entry].pastALOLD[j]=criticality_table[entry].pastALOLD[j-1];
		criticality_table[entry].pastQCONS[j]=criticality_table[entry].pastQCONS[j-1];
	}
	criticality_table[entry].pastQOLD[0]=ROB[robentry].QOLDeverset;
	criticality_table[entry].pastQOLDDEP[0]=ROB[robentry].QOLDDEPeverset;
	criticality_table[entry].pastALOLD[0]=ROB[robentry].ALOLDeverset;
	criticality_table[entry].pastQCONS[0]=ROB[robentry].QCONSeverset;
}

void initialize_criticality_study()
{
	int i,j,numw;

	criticality_table=(criticality_entry*)calloc(criticality_hash_table_size, sizeof(criticality_entry));

	criticality_history_table=(criticality_history_entry*)calloc(criticality_history_size, sizeof(criticality_history_entry));

	if (criticality_table==0 || criticality_history_table==0)
		fatal("Not enough memory for criticality");

	for (i=0; i<criticality_hash_table_size; i++)
	{
		criticality_table[i].tag=-1;
		criticality_table[i].pastQOLD=(int*)calloc(criticality_local_history_size,sizeof(int));
		criticality_table[i].pastQOLDDEP=(int*)calloc(criticality_local_history_size,sizeof(int));
		criticality_table[i].pastALOLD=(int*)calloc(criticality_local_history_size,sizeof(int));
		criticality_table[i].pastQCONS=(int*)calloc(criticality_local_history_size,sizeof(int));

		if (criticality_table[i].pastQOLD==0 || criticality_table[i].pastQOLDDEP==0 || criticality_table[i].pastALOLD==0 || criticality_table[i].pastQCONS==0)
			fatal("Not enough memory for criticality");

		for (j=0; j<criticality_local_history_size; j++)
		{
			criticality_table[i].pastQOLD[j]=0;
			criticality_table[i].pastQOLDDEP[j]=0;
			criticality_table[i].pastALOLD[j]=0;
			criticality_table[i].pastQCONS[j]=0;
		}
		criticality_table[i].local_entries=0;
	}
	for (i=0; i<criticality_history_size; i++)
	{
		criticality_history_table[i].QOLD=0;
		criticality_history_table[i].QOLDDEP=0;
		criticality_history_table[i].ALOLD=0;
		criticality_history_table[i].QCONS=0;
	}
}

int get_criticality(unsigned int PC)
{
	int cr;

	if (criticality_type==0 || criticality_type==4)
		cr=0;
	else if (criticality_type==1)
		cr=1;
	else if (criticality_type==2)
		cr=get_criticality_baseline(PC);
	else if (criticality_type==3)
		cr=get_criticality_perceptron1(PC);
	else if (criticality_type==5)
		cr=get_criticality_perceptron2(PC);
	else if (criticality_type==6)
		cr=get_criticality_perceptron3(PC);
	else if (criticality_type==7)
		cr=get_criticality_perceptron4(PC);
	else if (criticality_type==8)
		cr=get_criticality_perceptron_hybrid(PC);
	else if (criticality_type==9)
		cr=get_criticality_perceptron_piecewise(PC);
	else if (criticality_type==10)
		cr=get_criticality_perceptron5(PC);

	criticality_predictions++;
	if (cr==1)
		criticality_predictions_made++;

	return cr;
}

void train_criticality(unsigned int PC, int robentry)
{
	if (criticality_type==0)
		return;
	if (criticality_type==2)
		train_criticality_baseline(PC, robentry);
	else if (criticality_type==3)
		train_criticality_perceptron1(PC, robentry);
	else if (criticality_type==4)
		study_criticality(PC, robentry);
	else if (criticality_type==5)
		train_criticality_perceptron2(PC, robentry);
	else if (criticality_type==6)
		train_criticality_perceptron3(PC, robentry);
	else if (criticality_type==7)
		train_criticality_perceptron4(PC, robentry);
	else if (criticality_type==8)
		train_criticality_perceptron_hybrid(PC, robentry);
	else if (criticality_type==9)
		train_criticality_perceptron_piecewise(PC, robentry);
	else if (criticality_type==10)
		train_criticality_perceptron5(PC, robentry);
}

void dump_criticality()
{
	printf("Total instructions for which a critical-path prediction was made: %u\n",criticality_predictions);
	printf("Total instructions guessed critical-path: %u\n",criticality_predictions_made);

	if (criticality_type>=2 && criticality_type!=4)
	{
		printf("Criticality-like behavior prediction results:\n");
		printf("\tTrue Correct: %u\n",criticality_correct_true);
		printf("\tTrue Wrong: %u\n",criticality_wrong_true);
		printf("\tFalse Correct: %u\n",criticality_correct_false);
		printf("\tFalse Wrong: %u\n",criticality_wrong_false);

		printf("QOLD total: %u\n",criticality_QOLD);
		printf("QOLDDEP total: %u\n",criticality_QOLDDEP);
		printf("ALOLD total: %u\n",criticality_ALOLD);
		printf("QCONS total: %u\n",criticality_QCONS);
		printf("Total: %u\n",criticality_total);
	}

	if (subtractcritical==2)
	{
		printf("Total instructions marked critical during execution: %u\n",marked_critical);
		printf("Total instructions not marked critical during execution: %u\n",not_marked_critical);
		printf("Total instructions that should have been marked critical during execution: %u\n",should_have_been_marked_critical);
	}

	if (criticality_type==4)
	{
		printf("QOLD total: %u\n",criticality_QOLD);
		printf("QOLDDEP total: %u\n",criticality_QOLDDEP);
		printf("ALOLD total: %u\n",criticality_ALOLD);
		printf("QCONS total: %u\n",criticality_QCONS);
		printf("Total: %u\n",criticality_total);

		printf("First in local history: %u\n",first_iteration);
		printf("QOLD: nn: %u, np: %u, pn: %u, pp: %u\n",QOLD_local_nn,QOLD_local_np,QOLD_local_pn,QOLD_local_pp);
		printf("QOLDDEP: nn: %u, np: %u, pn: %u, pp: %u\n",QOLDDEP_local_nn,QOLDDEP_local_np,QOLDDEP_local_pn,QOLDDEP_local_pp);
		printf("ALOLD: nn: %u, np: %u, pn: %u, pp: %u\n",ALOLD_local_nn,ALOLD_local_np,ALOLD_local_pn,ALOLD_local_pp);
		printf("QCONS: nn: %u, np: %u, pn: %u, pp: %u\n",QCONS_local_nn,QCONS_local_np,QCONS_local_pn,QCONS_local_pp);
	}

	if (tally_perceptron_weights>0)
		dump_criticality_weight_tally();
}

void initialize_criticality()
{
	if (criticality_type==0)
		return;

	if (criticality_type==2)
		initialize_criticality_baseline();
	if (criticality_type==3)
		initialize_criticality_perceptron1();
	if (criticality_type==4)
		initialize_criticality_study();
	if (criticality_type==5)
		initialize_criticality_perceptron2();
	if (criticality_type==6)
		initialize_criticality_perceptron3();
	if (criticality_type==7)
		initialize_criticality_perceptron4();
	if (criticality_type==8)
		initialize_criticality_perceptron_hybrid();
	if (criticality_type==9)
		initialize_criticality_perceptron_piecewise();
	if (criticality_type==10)
		initialize_criticality_perceptron5();
}
//myinst.c
//Michael Black, 2006
//
//myinst.c actually executes the PISA instructions

#include "mysim.h"
#include <math.h>

//the following variables are defined in mysim.c
extern unsigned int PC;
extern int R[NUM_REGS];
extern int HI;
extern int LO;
extern int FCC;
extern Ftype F;
extern int instruction_counter;

extern unsigned int predicted_PC;
extern int branch_prediction_made;

extern int de_r_rs;
extern int de_r_rt;
extern int de_r_rt2;
extern int de_r_rd;
extern fpr de_fs;
extern fpr de_fs2;
extern fpr de_ft;
extern fpr de_fd;  
extern int de_HI;
extern int de_LO;
extern int de_FCC;

extern int em_alu_out;
extern int em_alu_out2;
extern float em_alu_f_out;
extern double em_alu_d_out;

extern int em_r_rt;
extern int em_r_rt2;
extern fpr em_ft;
extern fpr em_ft2;
extern unsigned int em_PC;

extern int mw_memory_out;
extern int mw_memory_out2;

extern unsigned int fd_PC;
extern int fd_inst_upper;
extern int fd_inst_lower;
extern unsigned int de_PC;
extern int de_inst_upper;
extern int de_inst_lower;
extern int em_inst_lower;

extern int em_write_rs;  
extern int em_write_rt;  
extern int em_write_rt2;
extern int em_write_rd;
extern int em_write_fs_l;
extern int em_write_fs_l2;
extern int em_write_fs_f;
extern int em_write_fs_d;
extern int em_write_ft_l;
extern int em_write_ft_f;
extern int em_write_ft_d;
extern int em_write_fd_l; 
extern int em_write_fd_f;
extern int em_write_fd_d; 
extern int em_write_HI;  
extern int em_write_LO;  
extern int em_write_FCC;  
extern int em_write_ra;

extern int em_load_ft;
extern int em_load_ft2;
extern int em_load_rt;
extern int em_load_rt2;

//holds the name of an instruction for debugging purposes
char name[5];

//flags - defined in mysim.c
extern int fetch_overwrite;
extern int fetch_stall;

extern int fetch_load_stall_rt;
extern int fetch_load_stall_rt2;
extern int fetch_load_stall_ft;
extern int fetch_load_stall_ft2;

extern int pipeline_flush_before;
extern int pipeline_flush_after;
extern int pipeline_dualflush;

//extractl is from simplescalar, used in multiplication
unsigned int extractl(int word, int pos, int num)
{
	return(((unsigned int) word >> (pos + 1 - num)) & ~(~0 << num));
}

//dofetch is called at the end of the fetch stage
//its purpose is to determine the next PC if the fetched instruction is a branch
//if the instruction is a conditional branch, and the branch predictor
//is active, make a prediction for the next instruction's PC
//tell the fetch stage to redo fetch with predicted PC
void dofetch()
{
	//0 = don't make branch prediction at all
	//1 = predict not taken
	//2 = predict taken
	//3 = no prediction made, but value pulled from BTB anyway (jr and jalr)
	branch_prediction_made=0;

	//look at the instruction just fetched
	int opcode=fd_inst_lower&0xff;
	switch(opcode)
	{
		case 3:
			//jr
			//get your next PC from the BTB
			predicted_PC=getBTB(PC-8);
			//speculate on it, even though you're not making a branch prediction
			branch_prediction_made=3;			
			break; 
		case 4:
			//jalr
			predicted_PC=getBTB(PC-8);
			branch_prediction_made=3;			
			break; 
		case 5:
			//beq;
			//get a branch prediction: 1=don't take, 2=take, 0=don't predict
			branch_prediction_made=get_branch_prediction(PC-8);
			//if 1, predict next instruction (PC already has been set to PC+8)
			if (branch_prediction_made==1)
				predicted_PC=PC;
			//if 2, predict the destination in the BTB (presumably PC+imm<<2)
			else if (branch_prediction_made==2)
				predicted_PC=getBTB(PC-8);
			break; 
		case 6:
			//bne;
			branch_prediction_made=get_branch_prediction(PC-8);
			if (branch_prediction_made==1)
				predicted_PC=PC;
			else if (branch_prediction_made==2)
				predicted_PC=getBTB(PC-8);
			break; 
		case 7:
			//blez;
			branch_prediction_made=get_branch_prediction(PC-8);
			if (branch_prediction_made==1)
				predicted_PC=PC;
			else if (branch_prediction_made==2)
				predicted_PC=getBTB(PC-8);
			break; 
		case 8:
			//bgtz;
			branch_prediction_made=get_branch_prediction(PC-8);
			if (branch_prediction_made==1)
				predicted_PC=PC;
			else if (branch_prediction_made==2)
				predicted_PC=getBTB(PC-8);
			break; 
		case 9:
			//bltz;
			branch_prediction_made=get_branch_prediction(PC-8);
			if (branch_prediction_made==1)
				predicted_PC=PC;
			else if (branch_prediction_made==2)
				predicted_PC=getBTB(PC-8);
			break; 
		case 0xa:
			//bgez;
			branch_prediction_made=get_branch_prediction(PC-8);
			if (branch_prediction_made==1)
				predicted_PC=PC;
			else if (branch_prediction_made==2)
				predicted_PC=getBTB(PC-8);
			break; 
		case 0xb:
			//bc1f;
			branch_prediction_made=get_branch_prediction(PC-8);
			if (branch_prediction_made==1)
				predicted_PC=PC;
			else if (branch_prediction_made==2)
				predicted_PC=getBTB(PC-8);
			break; 
		case 0xc:
			//bc1t;
			branch_prediction_made=get_branch_prediction(PC-8);
			if (branch_prediction_made==1)
				predicted_PC=PC;
			else if (branch_prediction_made==2)
				predicted_PC=getBTB(PC-8);
			break;
	}
}

//dodecode() is called at the end of the decode stage
//it corrects mispredicted branches and stalls if decode instruction is a load 
//& its dest regs = fetch inst's source regs
//if the instruction at decode is branch, clear out the fetch
//if we know the new dest. addr, redo the fetch
//otherwise, stall the fetch and redo it next cycle when we do know the dest
//if the instruction at decode is load, and the next instruction uses the result of that load,
//stall the fetch (actual reg comparison and stall here is done in mysim.c)
void dodecode()
{
	//we can use the fd_inst as our decode inst, because the fetch stage hasn't been run yet
	int opcode=fd_inst_lower&0xff;
	int targ=(fd_inst_upper)&0x3ffffff;
	short int imm=(fd_inst_upper)&0xffff;
	unsigned int uimm=(fd_inst_upper)&0xffff;
	int shamt=(fd_inst_upper)&0xff;
	int bcode=(fd_inst_upper)&0xfffff;

	//assume don't need to redo fetch
	fetch_overwrite=0;
	//assume don't need to stall fetch
	fetch_stall=0;

	//if the decode inst is a load, these flags will tell the load's target reg
	//these are then used in mysim to determine whether to stall the fetched instruction
	fetch_load_stall_rt=0;
	fetch_load_stall_rt2=0;
	fetch_load_stall_ft=0;
	fetch_load_stall_ft2=0;

	switch(opcode)
	{
		case 1:
			//j
			PC=targ<<2;
			//redo fetch - we have a new PC
			fetch_overwrite=1;
			break; 
		case 2:
			//jal
			PC=targ<<2;
			//redo fetch
			fetch_overwrite=1;
			break; 
		case 3:
			//jr
			//the PC to be fetched is R[rs]
			PC=de_r_rs;
			//check the BTB result - if none made, must stall
			if (branch_prediction_made==0)
				fetch_stall=1;
			//if the BTB was wrong, stall
			else if (PC!=predicted_PC)
				fetch_stall=1;
			//set the BTB entry for this jr to R[rs]
			updateBTB(fd_PC,de_r_rs);
			break; 
		case 4:
			//jalr
			PC=de_r_rs;
			if (branch_prediction_made==0)
				fetch_stall=1;
			else if (PC!=predicted_PC)
				fetch_stall=1;
			updateBTB(fd_PC,de_r_rs);
			break; 
		case 5:
			//beq;
			//the next PC is PC+imm<<2 if R[rs]==R[rt], else PC+8
			if(de_r_rs==de_r_rt)
				PC=PC+(imm<<2);
			//if no prediction made, must stall
			if (branch_prediction_made==0)
				fetch_stall=1;
			//if a misprediction was made, or the BTB was wrong, stall
			else if (PC!=predicted_PC)
				fetch_stall=1;
			//update the branch predictor with the branch outcome
			train_branch_predictor(fd_PC,de_r_rs==de_r_rt);
			//set the BTB entry for this beq to the next PC if the branch was taken
			//why?  because we can easily determine PC+8 at fetch, but can't determine
			//PC+imm<<2 at fetch.  even if the branch wasn't taken, we may need this later. 
			updateBTB(fd_PC,PC+(imm<<2));
			break; 
		case 6:
			//bne;
			if(de_r_rs!=de_r_rt)
				PC=PC+(imm<<2);
			if (branch_prediction_made==0)
				fetch_stall=1;
			else if (PC!=predicted_PC)
				fetch_stall=1;
			train_branch_predictor(fd_PC,de_r_rs!=de_r_rt);
			updateBTB(fd_PC,PC+(imm<<2));
			break; 
		case 7:
			//blez;
			if(de_r_rs<=0)
				PC=PC+(imm<<2);
			if (branch_prediction_made==0)
				fetch_stall=1;
			else if (PC!=predicted_PC)
				fetch_stall=1;
			train_branch_predictor(fd_PC,de_r_rs<=0);
			updateBTB(fd_PC,PC+(imm<<2));
			break; 
		case 8:
			//bgtz;
			if(de_r_rs>0)
				PC=PC+(imm<<2);
			if (branch_prediction_made==0)
				fetch_stall=1;
			else if (PC!=predicted_PC)
				fetch_stall=1;
			train_branch_predictor(fd_PC,de_r_rs>0);
			updateBTB(fd_PC,PC+(imm<<2));
			break; 
		case 9:
			//bltz;
			if(de_r_rs<0)
				PC=PC+(imm<<2);
			if (branch_prediction_made==0)
				fetch_stall=1;
			else if (PC!=predicted_PC)
				fetch_stall=1;
			train_branch_predictor(fd_PC,de_r_rs<0);
			updateBTB(fd_PC,PC+(imm<<2));
			break; 
		case 0xa:
			//bgez;
			if(de_r_rs>=0)
				PC=PC+(imm<<2);
			if (branch_prediction_made==0)
				fetch_stall=1;
			else if (PC!=predicted_PC)
				fetch_stall=1;
			train_branch_predictor(fd_PC,de_r_rs>=0);
			updateBTB(fd_PC,PC+(imm<<2));
			break; 
		case 0xb:
			//bc1f;
			if(!de_FCC)
				PC=PC+(imm<<2);
			if (branch_prediction_made==0)
				fetch_stall=1;
			else if (PC!=predicted_PC)
				fetch_stall=1;
			train_branch_predictor(fd_PC,!de_FCC);
			updateBTB(fd_PC,PC+(imm<<2));
			break; 
		case 0xc:
			//bc1t;
			if(de_FCC)
				PC=PC+(imm<<2);
			if (branch_prediction_made==0)
				fetch_stall=1;
			else if (PC!=predicted_PC)
				fetch_stall=1;
			train_branch_predictor(fd_PC,de_FCC);
			updateBTB(fd_PC,PC+(imm<<2));
			break; 

		//if the instruction is a load, note whether its dest reg is rt, rt+1, ft, or ft+1
		//we can use this in mysim.c to decide whether to stall the next instr or not
		case 0x20:
		case 0x22:
		case 0x24:
		case 0x26:
		case 0x28:
		case 0x2c:
		case 0x2d:
		case 0xc0:
		case 0xc1:
		case 0xc2:
		case 0xc3:
		case 0xc4:
			//load into rt
			fetch_load_stall_rt=1;
			break;

		case 0x29:
		case 0xce:
			//load into rt and rt+1
			fetch_load_stall_rt=1;
			fetch_load_stall_rt2=1;
			break; 
		case 0x2a:
		case 0xc5:
			//load into ft
			fetch_load_stall_ft=1;
			break;
		case 0x2b:
		case 0xcf:
			//load into ft and ft+1
			fetch_load_stall_ft=1;
			fetch_load_stall_ft2=1;
			break;
	}
}

//doexecute actually executes arithmetic instructions
//the input is taken from the de_r & de_f pipeline registers 
//the output is put into the em_alu_out pipeline register 
//for loads & stores, doexecute determines the target address & puts it in em)alu_out
void doexecute()
{
	//since we haven't run decode yet, the execute instruction is still in de
	int opcode=de_inst_lower&0xff;
	int targ=(de_inst_upper)&0x3ffffff;
	short int imm=(de_inst_upper)&0xffff;
	unsigned int uimm=(de_inst_upper)&0xffff;
	int shamt=(de_inst_upper)&0xff;
	int bcode=(de_inst_upper)&0xfffff;
	int i,sign1,sign2,op1,op2;

	//keep track of which registers we will need to write to in writeback
	//we need this for data forwarding, as well as writeback
	int write_rs=0;
	int write_rt=0;
	int write_rt2=0;
	int write_rd=0;
	int write_fs_l=0;
	int write_fs_l2=0;
	int write_fs_f=0;
	int write_fs_d=0;
	int write_ft_l=0;
	int write_ft_f=0;
	int write_ft_d=0;
	int write_fd_l=0;
	int write_fd_f=0;
	int write_fd_d=0;
	int write_ra=0;
	int write_HI=0;
	int write_LO=0;
	int write_FCC=0;

	//keep track of load targets.  this is also needed in data forwarding
	em_load_rt=0;
	em_load_rt2=0;
	em_load_ft=0;
	em_load_ft2=0;

	switch(opcode)
	{
		case 0:
//			strcpy(name,"nop");
			//do nothing
			break; 
		case 1:
//			strcpy(name,"j");
			break; 
		case 2:
//			strcpy(name,"jal");
			em_alu_out=de_PC+8;
			write_ra=1;
			break; 
		case 3:
//			strcpy(name,"jr");
			break; 
		case 4:
//			strcpy(name,"jalr");
			em_alu_out=de_PC+8;
			write_ra=1;
			break; 
		case 5:
//			strcpy(name,"beq");
			break; 
		case 6:
//			strcpy(name,"bne");
			break; 
		case 7:
//			strcpy(name,"blez");
			break; 
		case 8:
//			strcpy(name,"bgtz");
			break; 
		case 9:
//			strcpy(name,"bltz");
			break; 
		case 0xa:
//			strcpy(name,"bgez");
			break; 
		case 0xb:
//			strcpy(name,"bc1f");
			break; 
		case 0xc:
//			strcpy(name,"bc1t");
			break; 
		case 0x20:
//			strcpy(name,"lb");
			em_alu_out=de_r_rs+(short)imm;
			em_load_rt=1;
			break; 
		case 0x22:
//			strcpy(name,"lbu");
			em_alu_out=de_r_rs+(short)imm;
			em_load_rt=1;
			break; 
		case 0x24:
//			strcpy(name,"lh");
			em_alu_out=de_r_rs+(short)imm;
			em_load_rt=1;
			break; 
		case 0x26:
//			strcpy(name,"lhu");
			em_alu_out=de_r_rs+(short)imm;
			em_load_rt=1;
			break; 
		case 0x28:
//			strcpy(name,"lw");
			em_alu_out=de_r_rs+(short)imm;
			em_load_rt=1;
			break; 
		case 0x29:
//			strcpy(name,"dlw");
			em_alu_out=de_r_rs+(short)imm;
			em_load_rt=1;
			em_load_rt2=1;
			break; 
		case 0x2a:
//			strcpy(name,"l.s");
			em_alu_out=de_r_rs+(short)imm;
			em_load_ft=1;
			break; 
		case 0x2b:
//			strcpy(name,"l.d");
			em_alu_out=de_r_rs+(short)imm;
			em_load_ft=1;
			em_load_ft2=1;
			break; 
		case 0x2c:
//			strcpy(name,"lwl");
			em_alu_out=de_r_rs+(short)imm;
			em_load_rt=1;
			break;
		case 0x2d:
//			strcpy(name,"lwr");
			em_alu_out=de_r_rs+(short)imm;
			em_load_rt=1;
			break; 
		case 0x30:
//			strcpy(name,"sb");
			em_alu_out=de_r_rs+imm;
			break; 
		case 0x32:
//			strcpy(name,"sh");
			em_alu_out=de_r_rs+imm;
			break; 
		case 0x34:
//			strcpy(name,"sw");
			em_alu_out=de_r_rs+imm;
			break; 
		case 0x35:
//			strcpy(name,"dsw");
			em_alu_out=de_r_rs+imm;
			break; 
		case 0x36:
//			strcpy(name,"s.s");
			em_alu_out=de_r_rs+imm;
			break; 
		case 0x37:
//			strcpy(name,"s.d");
			em_alu_out=de_r_rs+imm;
			break; 
		case 0x38:
//			strcpy(name,"dsz");
			em_alu_out=de_r_rs+imm;
			break; 
		case 0x39:
//			strcpy(name,"swl");
			em_alu_out=de_r_rs+(short)imm;
			break; 
		case 0x3a:
//			strcpy(name,"swr");
			em_alu_out=de_r_rs+(short)imm;
			break; 
		case 0xc0:
//			strcpy(name,"lb");
			em_alu_out=de_r_rs+de_r_rt;
			em_load_rt=1;
			break; 
		case 0xc1:
//			strcpy(name,"lbu");
			em_alu_out=de_r_rs+de_r_rt;
			em_load_rt=1;
			break; 
		case 0xc2:
//			strcpy(name,"lh");
			em_alu_out=de_r_rs+de_r_rt;
			em_load_rt=1;
			break; 
		case 0xc3:
//			strcpy(name,"lhu");
			em_alu_out=de_r_rs+de_r_rt;
			em_load_rt=1;
			break; 
		case 0xc4:
//			strcpy(name,"lw");
			em_alu_out=de_r_rs+de_r_rt;
			em_load_rt=1;
			break; 
		case 0xce:
//			strcpy(name,"dlw");
			em_alu_out=de_r_rs+de_r_rt;
			em_load_rt=1;
			em_load_rt2=1;
			break; 
		case 0xc5:
//			strcpy(name,"l.s");
			em_alu_out=de_r_rs+de_r_rt;
			em_load_ft=1;
			break; 
		case 0xcf:
//			strcpy(name,"l.d");
			em_alu_out=de_r_rs+de_r_rt;
			em_load_ft=1;
			em_load_ft2=1;
			break;
		case 0xc6:
//			strcpy(name,"sb");
			em_alu_out=de_r_rs+de_r_rt;
			break; 
		case 0xc7:
//			strcpy(name,"sh");
			em_alu_out=de_r_rs+de_r_rt;
			break; 
		case 0xc8:
//			strcpy(name,"sw");
			em_alu_out=de_r_rs+de_r_rt;
			break; 
		case 0xd0:
//			strcpy(name,"dsw");
			em_alu_out=de_r_rs+de_r_rt;
			break; 
		case 0xd1:
//			strcpy(name,"dsz");
			em_alu_out=de_r_rs+de_r_rt;
			break; 
		case 0xc9:
//			strcpy(name,"s.s");
			em_alu_out=de_r_rs+de_r_rt;
			break; 
		case 0xd2:
//			strcpy(name,"s.d");
			em_alu_out=de_r_rs+de_r_rt;
			break; 
		case 0xca:
//			strcpy(name,"l.s.r2");
			fatal("l.s.r2 not supported");
			break; 
		case 0xcb:
//			strcpy(name,"s.s.r2");
			fatal("s.s.r2 not supported");
			break; 
		case 0xcc:
//			strcpy(name,"lw.r2");
			fatal("lw.r2 not supported");
			break; 
		case 0xcd:
//			strcpy(name,"sw.r2");
			fatal("sw.r2 not supported");
			break; 
		case 0x40:
//			strcpy(name,"add");
			em_alu_out=de_r_rs+de_r_rt;
			write_rd=1;
			break; 
		case 0x41:
//			strcpy(name,"addi");
			em_alu_out=de_r_rs+imm;
			write_rt=1;
			break; 
		case 0x42:
//			strcpy(name,"addu");
			em_alu_out=de_r_rs+de_r_rt;
			write_rd=1;
			break; 
		case 0x43:
//			strcpy(name,"addiu");
			em_alu_out=de_r_rs+(short)imm;
			write_rt=1;
			break; 
		case 0x44:
//			strcpy(name,"sub");
			em_alu_out=de_r_rs-de_r_rt;
			write_rd=1;
			break; 
		case 0x45:
//			strcpy(name,"subu");
			em_alu_out=de_r_rs-de_r_rt;
			write_rd=1;
			break; 
		case 0x46:
		{
//			strcpy(name,"mult");
			sign1=0;
			sign2=0;
			em_alu_out=0;
			em_alu_out2=0;
			op1=de_r_rs;
			op2=de_r_rt;
			if (op1 & 020000000000)
			{
				sign1=1;
				op1=(~op1)+1;
			}
			if (op2 & 020000000000)
			{
				sign2=1;
				op2=(~op2)+1;
			}
			if (op1 & 020000000000)
				em_alu_out2=op2;
			for (i=0; i<31; i++)
			{
				em_alu_out=em_alu_out<<1;
				em_alu_out=em_alu_out+extractl(em_alu_out2,31,1);
				em_alu_out2=em_alu_out2<<1;
				if ((extractl(op1,30-i,1))==1)
				{
					if (((unsigned)037777777777-(unsigned)em_alu_out2)<(unsigned)op2)
					{
						em_alu_out=em_alu_out+1;
					}
					em_alu_out2=em_alu_out2+op2;
				}
			}
			if (sign1^sign2)
			{
				em_alu_out2=~em_alu_out2;
				em_alu_out=~em_alu_out;
				if ((unsigned)em_alu_out2==037777777777)
					em_alu_out=em_alu_out+1;
				em_alu_out2=em_alu_out2+1;
			}
			write_HI=1;
			write_LO=1;
		}
		break; 
		case 0x47:
		{
//			strcpy(name,"multu");
			em_alu_out=0;
			em_alu_out2=0;
			if (de_r_rs&020000000000)
				em_alu_out2=de_r_rt;
			for (i=0; i<31; i++)
			{
				em_alu_out=em_alu_out<<1;
				em_alu_out=em_alu_out+extractl(em_alu_out2,31,1);
				em_alu_out2=em_alu_out2<<1;
				if (extractl(de_r_rs,30-i,1)==1)
				{
					if (((unsigned)037777777777-(unsigned)em_alu_out2)<(unsigned)de_r_rt)
						em_alu_out=em_alu_out+1;
					em_alu_out2=em_alu_out2+de_r_rt;
				}
			}
			write_HI=1;
			write_LO=1;
		}
		break; 
		case 0x48:
//			strcpy(name,"div");
			if (de_r_rt==0)
				fatal("Divide by 0");
			em_alu_out2=IDIV(de_r_rs,de_r_rt);
			em_alu_out=IMOD(de_r_rs,de_r_rt);
			write_HI=1;
			write_LO=1;
			break; 
		case 0x49:
//			strcpy(name,"divu");
			if (de_r_rt==0)
				fatal("Divide by 0");
			em_alu_out2=IDIV((unsigned)de_r_rs,(unsigned)de_r_rt);
			em_alu_out=IMOD((unsigned)de_r_rs,(unsigned)de_r_rt);
			write_HI=1;
			write_LO=1;
			break;
		case 0x4a:
//			strcpy(name,"mfhi");
			em_alu_out=de_HI;
			write_rd=1;
			break;
		case 0x4b:
//			strcpy(name,"mthi");
			em_alu_out=de_r_rs;
			write_HI=1;
			break;
		case 0x4c:
//			strcpy(name,"mflo");
			em_alu_out=de_LO;
			write_rd=1;
			break; 
		case 0x4d:
//			strcpy(name,"mtlo");
			em_alu_out2=de_r_rs;
			write_LO=1;
			break; 
		case 0x4e:
//			strcpy(name,"and");
			em_alu_out=de_r_rs&de_r_rt;
			write_rd=1;
			break; 
		case 0x4f:
//			strcpy(name,"andi");
			em_alu_out=de_r_rs&uimm;
			write_rt=1;
			break; 
		case 0x50:
//			strcpy(name,"or");
			em_alu_out=de_r_rs|de_r_rt;
			write_rd=1;
			break; 
		case 0x51:
//			strcpy(name,"ori");
			em_alu_out=de_r_rs|uimm;
			write_rt=1;
			break; 
		case 0x52:
//			strcpy(name,"xor");
			em_alu_out=de_r_rs^de_r_rt;
			write_rd=1;
			break; 
		case 0x53:
//			strcpy(name,"xori");
			em_alu_out=de_r_rs^uimm;
			write_rt=1;
			break; 
		case 0x54:
//			strcpy(name,"nor");
			em_alu_out=~(de_r_rs|de_r_rt);
			write_rd=1;
			break; 
		case 0x55:
//			strcpy(name,"sll");
			em_alu_out=de_r_rt<<shamt;
			write_rd=1;
			break; 
		case 0x56:
//			strcpy(name,"sllv");
			em_alu_out=de_r_rt<<(de_r_rs&037);
			write_rd=1;
			break; 
		case 0x57:
//			strcpy(name,"srl");
			em_alu_out=(unsigned)de_r_rt>>shamt;
			write_rd=1;
			break; 
		case 0x58:
//			strcpy(name,"srlv");
			em_alu_out=(unsigned)de_r_rt>>(de_r_rs&037);
			write_rd=1;
			break; 
		case 0x59:
//			strcpy(name,"sra");
			if (de_r_rt&020000000000)
			{
				em_alu_out=de_r_rt;
				for (i=0; i<shamt; i++)
					em_alu_out=(em_alu_out>>1)|020000000000;
			}
			else
				em_alu_out=de_r_rt>>shamt;
			write_rd=1;
			break; 
		case 0x5a:
//			strcpy(name,"srav");
			shamt=de_r_rs&037;
			if (de_r_rt&020000000000)
			{
				em_alu_out=de_r_rt;
				for (i=0; i<shamt; i++)
					em_alu_out=(em_alu_out>>1)|020000000000;
			}
			else
				em_alu_out=de_r_rt>>shamt;
			write_rd=1;
			break; 
		case 0x5b:
//			strcpy(name,"slt");
			em_alu_out=(de_r_rs<de_r_rt)? 1:0;
			write_rd=1;
			break; 
		case 0x5c:
//			strcpy(name,"slti");
			em_alu_out=(de_r_rs<imm)? 1:0;
			write_rt=1;
			break; 
		case 0x5d:
//			strcpy(name,"sltu");
			em_alu_out=((unsigned)de_r_rs<(unsigned)de_r_rt)? 1:0;
			write_rd=1;
			break; 
		case 0x5e:
//			strcpy(name,"sltiu");
			em_alu_out=((unsigned)de_r_rs<(unsigned)imm)? 1:0;
			write_rt=1;
			break; 
		case 0x70:
//			strcpy(name,"add.s");
			em_alu_f_out=de_fs.f+de_ft.f;
			write_fd_f=1;
			break; 
		case 0x71:
//			strcpy(name,"add.d");
			em_alu_d_out=de_fs.d+de_ft.d;
			write_fd_d=1;
			break; 
		case 0x72:
//			strcpy(name,"sub.s");
			em_alu_f_out=de_fs.f-de_ft.f;
			write_fd_f=1;
			break; 
		case 0x73:
//			strcpy(name,"sub.d");
			em_alu_d_out=de_fs.d-de_ft.d;
			write_fd_d=1;
			break; 
		case 0x74:
//			strcpy(name,"mul.s");
			em_alu_f_out=de_fs.f*de_ft.f;
			write_fd_f=1;
			break; 
		case 0x75:
//			strcpy(name,"mul.d");
			em_alu_d_out=de_fs.d*de_ft.d;
			write_fd_d=1;
			break; 
		case 0x76:
//			strcpy(name,"div.s");
			if (de_ft.f==0)
				fatal("Divide by 0");
			em_alu_f_out=de_fs.f/de_ft.f;
			write_fd_f=1;
			break; 
		case 0x77:
//			strcpy(name,"div.d");
			if (de_ft.d==0)
				fatal("Divide by 0");
			em_alu_d_out=de_fs.d/de_ft.d;
			write_fd_d=1;
			break; 
		case 0x78:
//			strcpy(name,"abs.s");
			em_alu_f_out=fabs((double)de_fs.f);
			write_fd_f=1;
			break; 
		case 0x79:
//			strcpy(name,"abs.d");
			em_alu_d_out=fabs(de_fs.f);
			write_fd_d=1;
			break; 
		case 0x7a:
//			strcpy(name,"mov.s");
			em_alu_f_out=de_fs.f;
			write_fd_f=1;
			break; 
		case 0x7b:
//			strcpy(name,"mov.d");
			em_alu_d_out=de_fs.d;
			write_fd_d=1;
			break; 
		case 0x7c:
//			strcpy(name,"neg.s");
			em_alu_f_out=-de_fs.f;
			write_fd_f=1;
			break; 
		case 0x7d:
//			strcpy(name,"neg.d");
			em_alu_d_out=-de_fs.d;
			write_fd_d=1;
			break; 
		case 0x80:
//			strcpy(name,"cvt.s.d");
			em_alu_f_out=(float)de_fs.d;
			write_fd_f=1;
			break; 
		case 0x81:
//			strcpy(name,"cvt.s.w");
			em_alu_f_out=(float)de_fs.l;
			write_fd_f=1;
			break; 
		case 0x82:
//			strcpy(name,"cvt.d.s");
			em_alu_d_out=(double)de_fs.f;
			write_fd_d=1;
			break; 
		case 0x83:
//			strcpy(name,"cvt.d.w");
			em_alu_d_out=(double)de_fs.l;
			write_fd_d=1;
			break; 
		case 0x84:
//			strcpy(name,"cvt.w.s");
			em_alu_out=(int)de_fs.f;
			write_fd_l=1;
			break; 
		case 0x85:
//			strcpy(name,"cvt.w.d");
			em_alu_out=(int)de_fs.d;
			write_fd_l=1;
			break; 
		case 0x90:
//			strcpy(name,"c.eq.s");
			em_alu_out=de_fs.f==de_ft.f;
			write_FCC=1;
			break; 
		case 0x91:
//			strcpy(name,"c.eq.d");
			em_alu_out=de_fs.d==de_ft.d;
			write_FCC=1;
			break; 
		case 0x92:
//			strcpy(name,"c.lt.s");
			em_alu_out=de_fs.f<de_ft.f;
			write_FCC=1;
			break; 
		case 0x93:
//			strcpy(name,"c.lt.d");
			em_alu_out=de_fs.d<de_ft.d;
			write_FCC=1;
			break; 
		case 0x94:
//			strcpy(name,"c.le.s");
			em_alu_out=de_fs.f<=de_ft.f;
			write_FCC=1;
			break; 
		case 0x95:
//			strcpy(name,"c.le.d");
			em_alu_out=de_fs.d<=de_ft.d;
			write_FCC=1;
			break; 
		case 0x96:
//			strcpy(name,"sqrt.s");
			em_alu_f_out=sqrt((double)de_fs.f);
			write_fd_f=1;
			break; 
		case 0x97:
//			strcpy(name,"sqrt.d");
			em_alu_d_out=sqrt(de_fs.d);
			write_fd_d=1;
			break; 
		case 0xa0:
//			strcpy(name,"syscall");
			//syscall handled in fetch
			break; 
		case 0xa1:
//			strcpy(name,"break");
			//do nothing for the moment
			break; 
		case 0xa2:
//			strcpy(name,"lui");
			em_alu_out=uimm<<16;
			write_rt=1;
			break; 
		case 0xa3:
//			strcpy(name,"mfc1");
			em_alu_out=de_fs.l;
			write_rt=1;
			break; 
		case 0xa7:
//			strcpy(name,"dmfc1");
			em_alu_out=de_fs.l;
			em_alu_out2=de_fs2.l;
			write_rt=1;
			write_rt2=1;
			break; 
		case 0xa4:
//			strcpy(name,"cfc1");
//			fatal("cfc1 not supported");
			break; 
		case 0xa5:
//			strcpy(name,"mtc1");
			em_alu_out=de_r_rt;
			write_fs_l=1;
			break; 
		case 0xa8:
//			strcpy(name,"dmtc1");
			em_alu_out=de_r_rt;
			em_alu_out2=de_r_rt2;
			write_fs_l=1;
			write_fs_l2=1;
			break; 
		case 0xa6:
//			strcpy(name,"ctc1");
//			fatal("ctc1 not supported");
			break; 
		default:
//			strcpy(name,"undef");
			fatal("Undefined instruction");
			break;
	}

	//set all the em pipeline flags so that we will know which registers will be written back
	em_write_rs=write_rs;
	em_write_rt=write_rt;
	em_write_rt2=write_rt2;
	em_write_rd=write_rd;
	em_write_fs_l=write_fs_l;
	em_write_fs_l2=write_fs_l2;
	em_write_fs_f=write_fs_f;
	em_write_fs_d=write_fs_d;
	em_write_ft_l=write_ft_l;
	em_write_ft_f=write_ft_f;
	em_write_ft_d=write_ft_d;
	em_write_fd_l=write_fd_l;
	em_write_fd_f=write_fd_f;
	em_write_fd_d=write_fd_d;
	em_write_ra=write_ra;
	em_write_HI=write_HI;
	em_write_LO=write_LO;
	em_write_FCC=write_FCC;
}

//domemory is called in the memory stage
//it takes care of the actual loads and stores, writing the results to mw_memory_out
//also for instructions that read or write to double fp regs, it's very difficult to do data forwarding
//consequently, we'll just flush the entire pipeline before and/or after them
void domemory()
{
	//we haven't run execute yet, so the instruction is still in em
	int opcode=em_inst_lower&0xff;
	int i;

	//flush the pipeline, then execute this instruction
	pipeline_flush_before=0;
	//flush the pipeline after executing this instruction
	pipeline_flush_after=0;

	switch(opcode)
	{
		case 0x20:
//			strcpy(name,"lb");
			mw_memory_out=dcache_read(em_alu_out);
			break; 
		case 0x22:
//			strcpy(name,"lbu");
			mw_memory_out=(unsigned char)dcache_read(em_alu_out);
			break; 
		case 0x24:
//			strcpy(name,"lh");
			i=dcache_read(em_alu_out);
			i=i+(dcache_read(em_alu_out+1)<<8);
			mw_memory_out=(short)i;
			break; 
		case 0x26:
//			strcpy(name,"lhu");
			i=(unsigned char)dcache_read(em_alu_out);
			mw_memory_out=i+((unsigned short)dcache_read(em_alu_out+1)<<8);
			break; 
		case 0x28:
//			strcpy(name,"lw");
			mw_memory_out=dcache_read_word(em_alu_out);
			break; 
		case 0x29:
//			strcpy(name,"dlw");
			mw_memory_out=dcache_read_word(em_alu_out);
			mw_memory_out2=dcache_read_word(em_alu_out+4);
			break; 
		case 0x2a:
//			strcpy(name,"l.s");
			mw_memory_out=dcache_read_word(em_alu_out);
			break; 
		case 0x2b:
//			strcpy(name,"l.d");
			mw_memory_out=dcache_read_word(em_alu_out);
			mw_memory_out2=dcache_read_word(em_alu_out+4);
			break; 
		case 0x2c:
//			strcpy(name,"lwl");
			i=em_alu_out;
			mw_memory_out=(dcache_read(i)<<24)+(mw_memory_out&0x00FFFFFF);
			if (i%4!=0)
			{
				mw_memory_out=(dcache_read(i-1)<<16)+(mw_memory_out&0xFF00FFFF);
				if ((i-1)%4!=0)
				{
					mw_memory_out=(dcache_read(i-2)<<8)+(mw_memory_out&0xFFFF00FF);
					if ((i-2)%4!=0)
						mw_memory_out=(dcache_read(i-3))+(mw_memory_out&0xFFFFFF00);
				}
			}
			break; 
		case 0x2d:
//			strcpy(name,"lwr");
			i=em_alu_out;
			mw_memory_out=(dcache_read(i))+(mw_memory_out&0xFFFFFF00);
			if ((i+1)%4!=0)
			{
				mw_memory_out=(dcache_read(i+1)<<8)+(mw_memory_out&0xFFFF00FF);
				if ((i+2)%4!=0)
				{
					mw_memory_out=(dcache_read(i+2)<<16)+(mw_memory_out&0xFF00FFFF);
					if ((i+3)%4!=0)
						mw_memory_out=(dcache_read(i+3)<<24)+(mw_memory_out&0x00FFFFFF);
				}
			}
			break; 
		case 0x30:
//			strcpy(name,"sb");
			dcache_write(em_alu_out, em_r_rt&0xFF); 
			break; 
		case 0x32:
//			strcpy(name,"sh");
			dcache_write(em_alu_out, em_r_rt&0xFF);
			dcache_write(em_alu_out+1, (em_r_rt>>8)&0xFF);
			break; 
		case 0x34:
//			strcpy(name,"sw");
			dcache_write(em_alu_out, em_r_rt&0xFF);
			dcache_write(em_alu_out+1, (em_r_rt>>8)&0xFF);
			dcache_write(em_alu_out+2, (em_r_rt>>16)&0xFF);
			dcache_write(em_alu_out+3, (em_r_rt>>24)&0xFF);
			break; 
		case 0x35:
//			strcpy(name,"dsw");
                        dcache_write(em_alu_out, em_r_rt&0xFF);
                        dcache_write(em_alu_out+1, (em_r_rt>>8)&0xFF);
                        dcache_write(em_alu_out+2, (em_r_rt>>16)&0xFF);
                        dcache_write(em_alu_out+3, (em_r_rt>>24)&0xFF);
                        dcache_write(em_alu_out+4, em_r_rt2&0xFF);
                        dcache_write(em_alu_out+5, (em_r_rt2>>8)&0xFF);
                        dcache_write(em_alu_out+6, (em_r_rt2>>16)&0xFF);
                        dcache_write(em_alu_out+7, (em_r_rt2>>24)&0xFF);
			break; 
		case 0x36:
//			strcpy(name,"s.s");
                        dcache_write(em_alu_out, em_ft.l&0xFF);
                        dcache_write(em_alu_out+1, (em_ft.l>>8)&0xFF);
                        dcache_write(em_alu_out+2, (em_ft.l>>16)&0xFF);
                        dcache_write(em_alu_out+3, (em_ft.l>>24)&0xFF);
			break; 
		case 0x37:
//			strcpy(name,"s.d");
                        dcache_write(em_alu_out, em_ft.l&0xFF);
                        dcache_write(em_alu_out+1, (em_ft.l>>8)&0xFF);
                        dcache_write(em_alu_out+2, (em_ft.l>>16)&0xFF);
                        dcache_write(em_alu_out+3, (em_ft.l>>24)&0xFF);
                        dcache_write(em_alu_out+4, em_ft2.l&0xFF);
                        dcache_write(em_alu_out+5, (em_ft2.l>>8)&0xFF);
                        dcache_write(em_alu_out+6, (em_ft2.l>>16)&0xFF);
                        dcache_write(em_alu_out+7, (em_ft2.l>>24)&0xFF);
			break; 
		case 0x38:
//			strcpy(name,"dsz");
			dcache_write(em_alu_out, 0);
			dcache_write(em_alu_out+1, 0);
			dcache_write(em_alu_out+2, 0);
			dcache_write(em_alu_out+3, 0);
			dcache_write(em_alu_out+4, 0);
			dcache_write(em_alu_out+5, 0);
			dcache_write(em_alu_out+6, 0);
			dcache_write(em_alu_out+7, 0);
			break; 
		case 0x39:
//			strcpy(name,"swl");
			i=em_alu_out;
			dcache_write(i, (em_r_rt>>24)&0xFF);
			if (i%4!=0)
			{
				dcache_write(i-1, (em_r_rt>>16)&0xFF);
				if ((i-1)%4!=0)
				{
					dcache_write(i-2, (em_r_rt>>8)&0xFF);
					if ((i-2)%4!=0)
						dcache_write(i-3, (em_r_rt)&0xFF);
				}
			}
			break; 
		case 0x3a:
//			strcpy(name,"swr");
			i=em_alu_out;
			dcache_write(i, (em_r_rt)&0xFF);
			if ((i+1)%4!=0)
			{
				dcache_write(i+1, (em_r_rt>>8)&0xFF);
				if ((i+2)%4!=0)
				{
					dcache_write(i+2, (em_r_rt>>16)&0xFF);
					if ((i+3)%4!=0)
						dcache_write(i+3, (em_r_rt>>24)&0xFF);
				}
			}
			break; 
		case 0xc0:
//			strcpy(name,"lb");
			mw_memory_out=dcache_read(em_alu_out);
			break; 
		case 0xc1:
//			strcpy(name,"lbu");
			mw_memory_out=(unsigned char)dcache_read(em_alu_out);
			break; 
		case 0xc2:
//			strcpy(name,"lh");
			i=dcache_read(em_alu_out);
			mw_memory_out=(short)(i+(em_alu_out+1)<<8);
			break; 
		case 0xc3:
//			strcpy(name,"lhu");
			i=(unsigned char)dcache_read(em_alu_out);
			mw_memory_out=(unsigned)i+((unsigned short)dcache_read(em_alu_out+1)<<8);
			break; 
		case 0xc4:
//			strcpy(name,"lw");
			mw_memory_out=dcache_read_word(em_alu_out);
			break; 
		case 0xce:
//			strcpy(name,"dlw");
			mw_memory_out=dcache_read_word(em_alu_out);
			mw_memory_out2=dcache_read_word(em_alu_out+4);
			break; 
		case 0xc5:
//			strcpy(name,"l.s");
			mw_memory_out=dcache_read_word(em_alu_out);
			break; 
		case 0xcf:
//			strcpy(name,"l.d");
			mw_memory_out=dcache_read_word(em_alu_out);
			mw_memory_out2=dcache_read_word(em_alu_out+4);
			break;
		case 0xc6:
//			strcpy(name,"sb");
			dcache_write(em_alu_out, em_r_rt&0xFF); 
			break; 
		case 0xc7:
//			strcpy(name,"sh");
			dcache_write(em_alu_out, em_r_rt&0xFF);
			dcache_write(em_alu_out+1, (em_r_rt>>8)&0xFF);
			break; 
		case 0xc8:
//			strcpy(name,"sw");
			dcache_write(em_alu_out, em_r_rt&0xFF);
			dcache_write(em_alu_out+1, (em_r_rt>>8)&0xFF);
			dcache_write(em_alu_out+2, (em_r_rt>>16)&0xFF);
			dcache_write(em_alu_out+3, (em_r_rt>>24)&0xFF);
			break; 
		case 0xd0:
//			strcpy(name,"dsw");
			dcache_write(em_alu_out, em_r_rt&0xFF);
			dcache_write(em_alu_out+1, (em_r_rt>>8)&0xFF);
			dcache_write(em_alu_out+2, (em_r_rt>>16)&0xFF);
			dcache_write(em_alu_out+3, (em_r_rt>>24)&0xFF);
			dcache_write(em_alu_out+4, em_r_rt2&0xFF);
			dcache_write(em_alu_out+5, (em_r_rt2>>8)&0xFF);
			dcache_write(em_alu_out+6, (em_r_rt2>>16)&0xFF);
			dcache_write(em_alu_out+7, (em_r_rt2>>24)&0xFF);
			break; 
		case 0xd1:
//			strcpy(name,"dsz");
			dcache_write(em_alu_out, 0);
			dcache_write(em_alu_out+1, 0);
			dcache_write(em_alu_out+2, 0);
			dcache_write(em_alu_out+3, 0);
			dcache_write(em_alu_out+4, 0);
			dcache_write(em_alu_out+5, 0);
			dcache_write(em_alu_out+6, 0);
			dcache_write(em_alu_out+7, 0);
			break; 
		case 0xc9:
//			strcpy(name,"s.s");
			dcache_write(em_alu_out, em_ft.l&0xFF);
			dcache_write(em_alu_out+1, (em_ft.l>>8)&0xFF);
			dcache_write(em_alu_out+2, (em_ft.l>>16)&0xFF);
			dcache_write(em_alu_out+3, (em_ft.l>>24)&0xFF);
			break; 
		case 0xd2:
//			strcpy(name,"s.d");
			dcache_write(em_alu_out, em_ft.l&0xFF);
			dcache_write(em_alu_out+1, (em_ft.l>>8)&0xFF);
			dcache_write(em_alu_out+2, (em_ft.l>>16)&0xFF);
			dcache_write(em_alu_out+3, (em_ft.l>>24)&0xFF);
			dcache_write(em_alu_out+4, em_ft2.l&0xFF);
			dcache_write(em_alu_out+5, (em_ft2.l>>8)&0xFF);
			dcache_write(em_alu_out+6, (em_ft2.l>>16)&0xFF);
			dcache_write(em_alu_out+7, (em_ft2.l>>24)&0xFF);
			break; 

		//d--d - consume a fp double, and produce a fp double
		case 0x71:
		case 0x73:
		case 0x75:
		case 0x77:
		case 0x79:
		case 0x7b:
		case 0x7d:
		case 0x97:
			pipeline_flush_before=1;
			//prevent pipeline_flush_before from happening twice
			pipeline_dualflush=1-pipeline_dualflush;
			pipeline_flush_after=1;
			break;

		//d--> - consume a fp double
		case 0x80:
		case 0x85:
		case 0x91:
		case 0x93:
		case 0x95:
			pipeline_flush_before=1;
			pipeline_dualflush=1-pipeline_dualflush;
			break;

		//>--d - produce a fp double
		case 0x82:
		case 0x83:
			pipeline_flush_after=1;
			break;

	}
}

//get_instruction_name writes the name of the instruction to name[]
//its used in debugging
void get_instruction_name(int inst_upper, int inst_lower)
{
	int opcode=inst_lower&0xff;

	switch(opcode)
	{
		case 0:
			strcpy(name,"nop");
			break; 
		case 1:
			strcpy(name,"j");
			break; 
		case 2:
			strcpy(name,"jal");
			break; 
		case 3:
			strcpy(name,"jr");
			break; 
		case 4:
			strcpy(name,"jalr");
			break; 
		case 5:
			strcpy(name,"beq");
			break; 
		case 6:
			strcpy(name,"bne");
			break; 
		case 7:
			strcpy(name,"blez");
			break; 
		case 8:
			strcpy(name,"bgtz");
			break; 
		case 9:
			strcpy(name,"bltz");
			break; 
		case 0xa:
			strcpy(name,"bgez");
			break; 
		case 0xb:
			strcpy(name,"bc1f");
			break; 
		case 0xc:
			strcpy(name,"bc1t");
			break; 
		case 0x20:
			strcpy(name,"lb");
			break; 
		case 0x22:
			strcpy(name,"lbu");
			break; 
		case 0x24:
			strcpy(name,"lh");
			break; 
		case 0x26:
			strcpy(name,"lhu");
			break; 
		case 0x28:
			strcpy(name,"lw");
			break; 
		case 0x29:
			strcpy(name,"dlw");
			break; 
		case 0x2a:
			strcpy(name,"l.s");
			break; 
		case 0x2b:
			strcpy(name,"l.d");
			break; 
		case 0x2c:
			strcpy(name,"lwl");
			break; 
		case 0x2d:
			strcpy(name,"lwr");
			break; 
		case 0x30:
			strcpy(name,"sb");
			break; 
		case 0x32:
			strcpy(name,"sh");
			break; 
		case 0x34:
			strcpy(name,"sw");
			break; 
		case 0x35:
			strcpy(name,"dsw");
			break; 
		case 0x36:
			strcpy(name,"s.s");
			break; 
		case 0x37:
			strcpy(name,"s.d");
			break; 
		case 0x38:
			strcpy(name,"dsz");
			break; 
		case 0x39:
			strcpy(name,"swl");
			break; 
		case 0x3a:
			strcpy(name,"swr");
			break; 
		case 0xc0:
			strcpy(name,"lb");
			break; 
		case 0xc1:
			strcpy(name,"lbu");
			break; 
		case 0xc2:
			strcpy(name,"lh");
			break; 
		case 0xc3:
			strcpy(name,"lhu");
			break; 
		case 0xc4:
			strcpy(name,"lw");
			break; 
		case 0xce:
			strcpy(name,"dlw");
			break; 
		case 0xc5:
			strcpy(name,"l.s");
			break; 
		case 0xcf:
			strcpy(name,"l.d");
			break;
		case 0xc6:
			strcpy(name,"sb");
			break; 
		case 0xc7:
			strcpy(name,"sh");
			break; 
		case 0xc8:
			strcpy(name,"sw");
			break; 
		case 0xd0:
			strcpy(name,"dsw");
			break; 
		case 0xd1:
			strcpy(name,"dsz");
			break; 
		case 0xc9:
			strcpy(name,"s.s");
			break; 
		case 0xd2:
			strcpy(name,"s.d");
			break; 
		case 0xca:
			strcpy(name,"l.s.r2");
			break; 
		case 0xcb:
			strcpy(name,"s.s.r2");
			break; 
		case 0xcc:
			strcpy(name,"lw.r2");
			break; 
		case 0xcd:
			strcpy(name,"sw.r2");
			break; 
		case 0x40:
			strcpy(name,"add");
			break; 
		case 0x41:
			strcpy(name,"addi");
			break; 
		case 0x42:
			strcpy(name,"addu");
			break; 
		case 0x43:
			strcpy(name,"addiu");
			break; 
		case 0x44:
			strcpy(name,"sub");
			break; 
		case 0x45:
			strcpy(name,"subu");
			break; 
		case 0x46:
			strcpy(name,"mult");
			break; 
		case 0x47:
			strcpy(name,"multu");
			break; 
		case 0x48:
			strcpy(name,"div");
			break; 
		case 0x49:
			strcpy(name,"divu");
			break; 
		case 0x4a:
			strcpy(name,"mfhi");
			break;
		case 0x4b:
			strcpy(name,"mthi");
			break;
		case 0x4c:
			strcpy(name,"mflo");
			break; 
		case 0x4d:
			strcpy(name,"mtlo");
			break; 
		case 0x4e:
			strcpy(name,"and");
			break; 
		case 0x4f:
			strcpy(name,"andi");
			break; 
		case 0x50:
			strcpy(name,"or");
			break; 
		case 0x51:
			strcpy(name,"ori");
			break; 
		case 0x52:
			strcpy(name,"xor");
			break; 
		case 0x53:
			strcpy(name,"xori");
			break; 
		case 0x54:
			strcpy(name,"nor");
			break; 
		case 0x55:
			strcpy(name,"sll");
			break; 
		case 0x56:
			strcpy(name,"sllv");
			break; 
		case 0x57:
			strcpy(name,"srl");
			break; 
		case 0x58:
			strcpy(name,"srlv");
			break; 
		case 0x59:
			strcpy(name,"sra");
			break; 
		case 0x5a:
			strcpy(name,"srav");
			break; 
		case 0x5b:
			strcpy(name,"slt");
			break;
		case 0x5c:
			strcpy(name,"slti");
			break; 
		case 0x5d:
			strcpy(name,"sltu");
			break; 
		case 0x5e:
			strcpy(name,"sltiu");
			break; 
		case 0x70:
			strcpy(name,"add.s");
			break; 
		case 0x71:
			strcpy(name,"add.d");
			break; 
		case 0x72:
			strcpy(name,"sub.s");
			break; 
		case 0x73:
			strcpy(name,"sub.d");
			break; 
		case 0x74:
			strcpy(name,"mul.s");
			break; 
		case 0x75:
			strcpy(name,"mul.d");
			break; 
		case 0x76:
			strcpy(name,"div.s");
			break; 
		case 0x77:
			strcpy(name,"div.d");
			break; 
		case 0x78:
			strcpy(name,"abs.s");
			break; 
		case 0x79:
			strcpy(name,"abs.d");
			break; 
		case 0x7a:
			strcpy(name,"mov.s");
			break; 
		case 0x7b:
			strcpy(name,"mov.d");
			break; 
		case 0x7c:
			strcpy(name,"neg.s");
			break; 
		case 0x7d:
			strcpy(name,"neg.d");
			break; 
		case 0x80:
			strcpy(name,"cvt.s.d");
			break; 
		case 0x81:
			strcpy(name,"cvt.s.w");
			break; 
		case 0x82:
			strcpy(name,"cvt.d.s");
			break; 
		case 0x83:
			strcpy(name,"cvt.d.w");
			break; 
		case 0x84:
			strcpy(name,"cvt.w.s");
			break; 
		case 0x85:
			strcpy(name,"cvt.w.d");
			break; 
		case 0x90:
			strcpy(name,"c.eq.s");
			break; 
		case 0x91:
			strcpy(name,"c.eq.d");
			break; 
		case 0x92:
			strcpy(name,"c.lt.s");
			break; 
		case 0x93:
			strcpy(name,"c.lt.d");
			break; 
		case 0x94:
			strcpy(name,"c.le.s");
			break; 
		case 0x95:
			strcpy(name,"c.le.d");
			break; 
		case 0x96:
			strcpy(name,"sqrt.s");
			break; 
		case 0x97:
			strcpy(name,"sqrt.d");
			break; 
		case 0xa0:
			strcpy(name,"syscall");
			break; 
		case 0xa1:
			strcpy(name,"break");
			break; 
		case 0xa2:
			strcpy(name,"lui");
			break; 
		case 0xa3:
			strcpy(name,"mfc1");
			break; 
		case 0xa7:
			strcpy(name,"dmfc1");
			break; 
		case 0xa4:
			strcpy(name,"cfc1");
			break; 
		case 0xa5:
			strcpy(name,"mtc1");
			break; 
		case 0xa8:
			strcpy(name,"dmtc1");
			break; 
		case 0xa6:
			strcpy(name,"ctc1");
			break; 
		default:
			strcpy(name,"undef");
			break;
	}
}

//doinstruction executes the entire instruction.  for functional simulation
void doinstruction(int inst_upper, int inst_lower)
{
	unsigned int nPC;
	int opcode=inst_lower&0xff;
	int rs=(inst_upper>>24);
	int rt=(inst_upper>>16)&0xff;
	int rd=(inst_upper>>8)&0xff;
	int targ=(inst_upper)&0x3ffffff;
	short int imm=(inst_upper)&0xffff;
	unsigned int uimm=(inst_upper)&0xffff;
	int shamt=(inst_upper)&0xff;
	int bcode=(inst_upper)&0xfffff;
	int i,sign1,sign2,op1,op2;
	//next PC is just PC+8 (can be altered by branches later)
	nPC=PC+8;
	//make sure R[0] is hardwired to 0
	R[0]=0;
	switch(opcode)
	{
		case 0:
			strcpy(name,"nop");
			//do nothing
			break; 
		case 1:
			strcpy(name,"j");
			nPC=targ<<2;
			break; 
		case 2:
			strcpy(name,"jal");
			R[31]=PC+8;
			nPC=targ<<2;
			break; 
		case 3:
			strcpy(name,"jr");
			nPC=R[rs];
			break; 
		case 4:
			strcpy(name,"jalr");
			R[rd]=PC+8;
			nPC=R[rs];
			break; 
		case 5:
			strcpy(name,"beq");
			if(R[rs]==R[rt])
				nPC=PC+8+(imm<<2);
			break; 
		case 6:
			strcpy(name,"bne");
			if (R[rs]!=R[rt])
				nPC=PC+8+(imm<<2);
			break; 
		case 7:
			strcpy(name,"blez");
			if (R[rs]<=0)
				nPC=PC+8+(imm<<2);
			break; 
		case 8:
			strcpy(name,"bgtz");
			if (R[rs]>0)
				nPC=PC+8+(imm<<2);
			break; 
		case 9:
			strcpy(name,"bltz");
			if (R[rs]<0)
				nPC=PC+8+(imm<<2);
			break; 
		case 0xa:
			strcpy(name,"bgez");
			if (R[rs]>=0)
				nPC=PC+8+(imm<<2);
			break; 
		case 0xb:
			strcpy(name,"bc1f");
			if (!FCC)
				nPC=PC+8+(imm<<2);
			break; 
		case 0xc:
			strcpy(name,"bc1t");
			if (FCC)
				nPC=PC+8+(imm<<2);
			break; 
		case 0x20:
			strcpy(name,"lb");
			R[rt]=dcache_read(R[rs]+(short)imm);
			break; 
		case 0x22:
			strcpy(name,"lbu");
			R[rt]=(unsigned char)dcache_read(R[rs]+(short)imm);
			break; 
		case 0x24:
			strcpy(name,"lh");
			i=dcache_read(R[rs]+(short)imm);
			i=i+(dcache_read(R[rs]+(short)imm+1)<<8);
			R[rt]=(short)i;
			break; 
		case 0x26:
			strcpy(name,"lhu");
			i=(unsigned char)dcache_read(R[rs]+(short)imm);
			R[rt]=i+((unsigned short)dcache_read(R[rs]+(short)imm+1)<<8);
			break; 
		case 0x28:
			strcpy(name,"lw");
			R[rt]=dcache_read_word(R[rs]+(short)imm);
			break; 
		case 0x29:
			strcpy(name,"dlw");
			R[rt]=dcache_read_word(R[rs]+(short)imm);
			R[rt+1]=dcache_read_word(R[rs]+(short)imm+4);
			break; 
		case 0x2a:
			strcpy(name,"l.s");
			F.l[rt]=dcache_read_word(R[rs]+(short)imm);
			break; 
		case 0x2b:
			strcpy(name,"l.d");
			F.l[rt]=dcache_read_word(R[rs]+(short)imm);
			F.l[rt+1]=dcache_read_word(R[rs]+(short)imm+4);
			break; 
		case 0x2c:
			strcpy(name,"lwl");
			i=R[rs]+(short)imm;
			R[rt]=(dcache_read(i)<<24)+(R[rt]&0x00FFFFFF);
			if (i%4!=0)
			{
				R[rt]=(dcache_read(i-1)<<16)+(R[rt]&0xFF00FFFF);
				if ((i-1)%4!=0)
				{
					R[rt]=(dcache_read(i-2)<<8)+(R[rt]&0xFFFF00FF);
					if ((i-2)%4!=0)
						R[rt]=(dcache_read(i-3))+(R[rt]&0xFFFFFF00);
				}
			}
			break; 
		case 0x2d:
			strcpy(name,"lwr");
			i=R[rs]+(short)imm;
			R[rt]=(dcache_read(i))+(R[rt]&0xFFFFFF00);
			if ((i+1)%4!=0)
			{
				R[rt]=(dcache_read(i+1)<<8)+(R[rt]&0xFFFF00FF);
				if ((i+2)%4!=0)
				{
					R[rt]=(dcache_read(i+2)<<16)+(R[rt]&0xFF00FFFF);
					if ((i+3)%4!=0)
						R[rt]=(dcache_read(i+3)<<24)+(R[rt]&0x00FFFFFF);
				}
			}
			break; 
		case 0x30:
			strcpy(name,"sb");
			dcache_write(R[rs]+imm, R[rt]&0xFF); 
			break; 
		case 0x32:
			strcpy(name,"sh");
			dcache_write(R[rs]+imm, R[rt]&0xFF);
			dcache_write(R[rs]+imm+1, (R[rt]>>8)&0xFF);
			break; 
		case 0x34:
			strcpy(name,"sw");
			dcache_write(R[rs]+imm, R[rt]&0xFF);
			dcache_write(R[rs]+imm+1, (R[rt]>>8)&0xFF);
			dcache_write(R[rs]+imm+2, (R[rt]>>16)&0xFF);
			dcache_write(R[rs]+imm+3, (R[rt]>>24)&0xFF);
			break; 
		case 0x35:
			strcpy(name,"dsw");
                        dcache_write(R[rs]+imm, R[rt]&0xFF);
                        dcache_write(R[rs]+imm+1, (R[rt]>>8)&0xFF);
                        dcache_write(R[rs]+imm+2, (R[rt]>>16)&0xFF);
                        dcache_write(R[rs]+imm+3, (R[rt]>>24)&0xFF);
                        dcache_write(R[rs]+imm+4, R[rt+1]&0xFF);
                        dcache_write(R[rs]+imm+5, (R[rt+1]>>8)&0xFF);
                        dcache_write(R[rs]+imm+6, (R[rt+1]>>16)&0xFF);
                        dcache_write(R[rs]+imm+7, (R[rt+1]>>24)&0xFF);
			break; 
		case 0x36:
			strcpy(name,"s.s");
                        dcache_write(R[rs]+imm, F.l[rt]&0xFF);
                        dcache_write(R[rs]+imm+1, (F.l[rt]>>8)&0xFF);
                        dcache_write(R[rs]+imm+2, (F.l[rt]>>16)&0xFF);
                        dcache_write(R[rs]+imm+3, (F.l[rt]>>24)&0xFF);
			break; 
		case 0x37:
			strcpy(name,"s.d");
                        dcache_write(R[rs]+imm, F.l[rt]&0xFF);
                        dcache_write(R[rs]+imm+1, (F.l[rt]>>8)&0xFF);
                        dcache_write(R[rs]+imm+2, (F.l[rt]>>16)&0xFF);
                        dcache_write(R[rs]+imm+3, (F.l[rt]>>24)&0xFF);
                        dcache_write(R[rs]+imm+4, F.l[rt+1]&0xFF);
                        dcache_write(R[rs]+imm+5, (F.l[rt+1]>>8)&0xFF);
                        dcache_write(R[rs]+imm+6, (F.l[rt+1]>>16)&0xFF);
                        dcache_write(R[rs]+imm+7, (F.l[rt+1]>>24)&0xFF);
			break; 
		case 0x38:
			strcpy(name,"dsz");
			dcache_write(R[rs]+imm, 0);
			dcache_write(R[rs]+imm+1, 0);
			dcache_write(R[rs]+imm+2, 0);
			dcache_write(R[rs]+imm+3, 0);
			dcache_write(R[rs]+imm+4, 0);
			dcache_write(R[rs]+imm+5, 0);
			dcache_write(R[rs]+imm+6, 0);
			dcache_write(R[rs]+imm+7, 0);
			break; 
		case 0x39:
			strcpy(name,"swl");
			i=R[rs]+(short)imm;
			dcache_write(i, (R[rt]>>24)&0xFF);
			if (i%4!=0)
			{
				dcache_write(i-1, (R[rt]>>16)&0xFF);
				if ((i-1)%4!=0)
				{
					dcache_write(i-2, (R[rt]>>8)&0xFF);
					if ((i-2)%4!=0)
						dcache_write(i-3, (R[rt])&0xFF);
				}
			}
			break; 
		case 0x3a:
			strcpy(name,"swr");
			i=R[rs]+(short)imm;
			dcache_write(i, (R[rt])&0xFF);
			if ((i+1)%4!=0)
			{
				dcache_write(i+1, (R[rt]>>8)&0xFF);
				if ((i+2)%4!=0)
				{
					dcache_write(i+2, (R[rt]>>16)&0xFF);
					if ((i+3)%4!=0)
						dcache_write(i+3, (R[rt]>>24)&0xFF);
				}
			}
			break; 
		case 0xc0:
			strcpy(name,"lb");
			R[rt]=dcache_read(R[rs]+R[rd]);
			break; 
		case 0xc1:
			strcpy(name,"lbu");
			R[rt]=(unsigned char)dcache_read(R[rs]+R[rd]);
			break; 
		case 0xc2:
			strcpy(name,"lh");
			i=dcache_read(R[rs]+R[rd]);
			R[rt]=(short)(i+(dcache_read(R[rs]+R[rd]+1)<<8));
			break; 
		case 0xc3:
			strcpy(name,"lhu");
			i=(unsigned char)dcache_read(R[rs]+R[rd]);
			R[rt]=(unsigned)i+((unsigned short)dcache_read(R[rs]+R[rd]+1)<<8);
			break; 
		case 0xc4:
			strcpy(name,"lw");
			R[rt]=dcache_read_word(R[rs]+R[rd]);
			break; 
		case 0xce:
			strcpy(name,"dlw");
			R[rt]=dcache_read_word(R[rs]+R[rd]);
			R[rt+1]=dcache_read_word(R[rs]+R[rd]+4);
			break; 
		case 0xc5:
			strcpy(name,"l.s");
			F.l[rt]=dcache_read_word(R[rs]+R[rd]);
			break; 
		case 0xcf:
			strcpy(name,"l.d");
			F.l[rt]=dcache_read_word(R[rs]+R[rd]);
			F.l[rt+1]=dcache_read_word(R[rs]+R[rd]+4);
			break;
		case 0xc6:
			strcpy(name,"sb");
			dcache_write(R[rs]+R[rd], R[rt]&0xFF); 
			break; 
		case 0xc7:
			strcpy(name,"sh");
			dcache_write(R[rs]+R[rd], R[rt]&0xFF);
			dcache_write(R[rs]+R[rd]+1, (R[rt]>>8)&0xFF);
			break; 
		case 0xc8:
			strcpy(name,"sw");
			dcache_write(R[rs]+R[rd], R[rt]&0xFF);
			dcache_write(R[rs]+R[rd]+1, (R[rt]>>8)&0xFF);
			dcache_write(R[rs]+R[rd]+2, (R[rt]>>16)&0xFF);
			dcache_write(R[rs]+R[rd]+3, (R[rt]>>24)&0xFF);
			break; 
		case 0xd0:
			strcpy(name,"dsw");
			dcache_write(R[rs]+R[rd], R[rt]&0xFF);
			dcache_write(R[rs]+R[rd]+1, (R[rt]>>8)&0xFF);
			dcache_write(R[rs]+R[rd]+2, (R[rt]>>16)&0xFF);
			dcache_write(R[rs]+R[rd]+3, (R[rt]>>24)&0xFF);
			dcache_write(R[rs]+R[rd]+4, R[rt+1]&0xFF);
			dcache_write(R[rs]+R[rd]+5, (R[rt+1]>>8)&0xFF);
			dcache_write(R[rs]+R[rd]+6, (R[rt+1]>>16)&0xFF);
			dcache_write(R[rs]+R[rd]+7, (R[rt+1]>>24)&0xFF);
			break; 
		case 0xd1:
			strcpy(name,"dsz");
			dcache_write(R[rs]+R[rd], 0);
			dcache_write(R[rs]+R[rd]+1, 0);
			dcache_write(R[rs]+R[rd]+2, 0);
			dcache_write(R[rs]+R[rd]+3, 0);
			dcache_write(R[rs]+R[rd]+4, 0);
			dcache_write(R[rs]+R[rd]+5, 0);
			dcache_write(R[rs]+R[rd]+6, 0);
			dcache_write(R[rs]+R[rd]+7, 0);
			break; 
		case 0xc9:
			strcpy(name,"s.s");
			dcache_write(R[rs]+R[rd], F.l[rt]&0xFF);
			dcache_write(R[rs]+R[rd]+1, (F.l[rt]>>8)&0xFF);
			dcache_write(R[rs]+R[rd]+2, (F.l[rt]>>16)&0xFF);
			dcache_write(R[rs]+R[rd]+3, (F.l[rt]>>24)&0xFF);
			break; 
		case 0xd2:
			strcpy(name,"s.d");
			dcache_write(R[rs]+R[rd], F.l[rt]&0xFF);
			dcache_write(R[rs]+R[rd]+1, (F.l[rt]>>8)&0xFF);
			dcache_write(R[rs]+R[rd]+2, (F.l[rt]>>16)&0xFF);
			dcache_write(R[rs]+R[rd]+3, (F.l[rt]>>24)&0xFF);
			dcache_write(R[rs]+R[rd]+4, F.l[rt+1]&0xFF);
			dcache_write(R[rs]+R[rd]+5, (F.l[rt+1]>>8)&0xFF);
			dcache_write(R[rs]+R[rd]+6, (F.l[rt+1]>>16)&0xFF);
			dcache_write(R[rs]+R[rd]+7, (F.l[rt+1]>>24)&0xFF);
			break; 
		case 0xca:
			strcpy(name,"l.s.r2");
			fatal("l.s.r2 not supported");
			break; 
		case 0xcb:
			strcpy(name,"s.s.r2");
			fatal("s.s.r2 not supported");
			break; 
		case 0xcc:
			strcpy(name,"lw.r2");
			fatal("lw.r2 not supported");
			break; 
		case 0xcd:
			strcpy(name,"sw.r2");
			fatal("sw.r2 not supported");
			break; 
		case 0x40:
			strcpy(name,"add");
			R[rd]=R[rs]+R[rt];
			break; 
		case 0x41:
			strcpy(name,"addi");
			R[rt]=R[rs]+imm;
			break; 
		case 0x42:
			strcpy(name,"addu");
			R[rd]=R[rs]+R[rt];
			break; 
		case 0x43:
			strcpy(name,"addiu");
			R[rt]=R[rs]+(short)imm;
			break; 
		case 0x44:
			strcpy(name,"sub");
			R[rd]=R[rs]-R[rt];
			break; 
		case 0x45:
			strcpy(name,"subu");
			R[rd]=R[rs]-R[rt];
			break; 
		case 0x46:
		{
			strcpy(name,"mult");
			sign1=0;
			sign2=0;
			HI=0;
			LO=0;
			op1=R[rs];
			op2=R[rt];
			if (op1 & 020000000000)
			{
				sign1=1;
				op1=(~op1)+1;
			}
			if (op2 & 020000000000)
			{
				sign2=1;
				op2=(~op2)+1;
			}
			if (op1 & 020000000000)
				LO=op2;
			for (i=0; i<31; i++)
			{
				HI=HI<<1;
				HI=HI+extractl(LO,31,1);
				LO=LO<<1;
				if ((extractl(op1,30-i,1))==1)
				{
					if (((unsigned)037777777777-(unsigned)LO)<(unsigned)op2)
					{
						HI=HI+1;
					}
					LO=LO+op2;
				}
			}
			if (sign1^sign2)
			{
				LO=~LO;
				HI=~HI;
				if ((unsigned)LO==037777777777)
					HI=HI+1;
				LO=LO+1;
			}
		}
		break; 
		case 0x47:
		{
			strcpy(name,"multu");
			HI=0;
			LO=0;
			if (R[rs]&020000000000)
				LO=R[rt];
			for (i=0; i<31; i++)
			{
				HI=HI<<1;
				HI=HI+extractl(LO,31,1);
				LO=LO<<1;
				if (extractl(R[rs],30-i,1)==1)
				{
					if (((unsigned)037777777777-(unsigned)LO)<(unsigned)R[rt])
						HI=HI+1;
					LO=LO+R[rt];
				}
			}
		}
		break; 
		case 0x48:
			strcpy(name,"div");
			if (R[rt]==0)
				fatal("Divide by 0");
			LO=IDIV(R[rs],R[rt]);
			HI=IMOD(R[rs],R[rt]);
			break; 
		case 0x49:
			strcpy(name,"divu");
			if (R[rt]==0)
				fatal("Divide by 0");
			LO=IDIV((unsigned)R[rs],(unsigned)R[rt]);
			HI=IMOD((unsigned)R[rs],(unsigned)R[rt]);
			break; 
		case 0x4a:
			strcpy(name,"mfhi");
			R[rd]=HI;
			break;
		case 0x4b:
			strcpy(name,"mthi");
			HI=R[rs];
			break;
		case 0x4c:
			strcpy(name,"mflo");
			R[rd]=LO;
			break; 
		case 0x4d:
			strcpy(name,"mtlo");
			LO=R[rs];
			break; 
		case 0x4e:
			strcpy(name,"and");
			R[rd]=R[rs]&R[rt];
			break; 
		case 0x4f:
			strcpy(name,"andi");
			R[rt]=R[rs]&uimm;
			break; 
		case 0x50:
			strcpy(name,"or");
			R[rd]=R[rs]|R[rt];
			break; 
		case 0x51:
			strcpy(name,"ori");
			R[rt]=R[rs]|uimm;
			break; 
		case 0x52:
			strcpy(name,"xor");
			R[rd]=R[rs]^R[rt];
			break; 
		case 0x53:
			strcpy(name,"xori");
			R[rt]=R[rs]^uimm;
			break; 
		case 0x54:
			strcpy(name,"nor");
			R[rd]=~(R[rs]|R[rt]);
			break; 
		case 0x55:
			strcpy(name,"sll");
			R[rd]=R[rt]<<shamt;
			break; 
		case 0x56:
			strcpy(name,"sllv");
			R[rd]=R[rt]<<(R[rs]&037);
			break; 
		case 0x57:
			strcpy(name,"srl");
			R[rd]=(unsigned)R[rt]>>shamt;
			break; 
		case 0x58:
			strcpy(name,"srlv");
			R[rd]=(unsigned)R[rt]>>(R[rs]&037);
			break; 
		case 0x59:
			strcpy(name,"sra");
			if (R[rt]&020000000000)
			{
				R[rd]=R[rt];
				for (i=0; i<shamt; i++)
					R[rd]=(R[rd]>>1)|020000000000;
			}
			else
				R[rd]=R[rt]>>shamt;
			break; 
		case 0x5a:
			strcpy(name,"srav");
			shamt=R[rs]&037;
			if (R[rt]&020000000000)
			{
				R[rd]=R[rt];
				for (i=0; i<shamt; i++)
					R[rd]=(R[rd]>>1)|020000000000;
			}
			else
				R[rd]=R[rt]>>shamt;
			break; 
		case 0x5b:
			strcpy(name,"slt");
			R[rd]=(R[rs]<R[rt])? 1:0;
			break; 
		case 0x5c:
			strcpy(name,"slti");
			R[rt]=(R[rs]<imm)? 1:0;
			break; 
		case 0x5d:
			strcpy(name,"sltu");
			R[rd]=((unsigned)R[rs]<(unsigned)R[rt])? 1:0;
			break; 
		case 0x5e:
			strcpy(name,"sltiu");
			R[rt]=((unsigned)R[rs]<(unsigned)imm)? 1:0;
			break; 
		case 0x70:
			strcpy(name,"add.s");
			F.f[rd]=F.f[rs]+F.f[rt];
			break; 
		case 0x71:
			strcpy(name,"add.d");
			F.d[rd>>1]=F.d[rs>>1]+F.d[rt>>1];
			break; 
		case 0x72:
			strcpy(name,"sub.s");
			F.f[rd]=F.f[rs]-F.f[rt];
			break; 
		case 0x73:
			strcpy(name,"sub.d");
			F.d[rd>>1]=F.d[rs>>1]-F.d[rt>>1];
			break; 
		case 0x74:
			strcpy(name,"mul.s");
			F.f[rd]=F.f[rs]*F.f[rt];
			break; 
		case 0x75:
			strcpy(name,"mul.d");
			F.d[rd>>1]=F.d[rs>>1]*F.d[rt>>1];
			break; 
		case 0x76:
			strcpy(name,"div.s");
			if (F.f[rt]==0)
				fatal("Divide by 0");
			F.f[rd]=F.f[rs]/F.f[rt];
			break; 
		case 0x77:
			strcpy(name,"div.d");
			if (F.d[rt>>1]==0)
				fatal("Divide by 0");
			F.d[rd>>1]=F.d[rs>>1]/F.d[rt>>1];
			break; 
		case 0x78:
			strcpy(name,"abs.s");
			F.f[rd]=fabs((double)F.f[rs]);
			break; 
		case 0x79:
			strcpy(name,"abs.d");
			F.d[rd>>1]=fabs(F.d[rs>>1]);
			break; 
		case 0x7a:
			strcpy(name,"mov.s");
			F.f[rd]=F.f[rs];
			break; 
		case 0x7b:
			strcpy(name,"mov.d");
			F.d[rd>>1]=F.d[rs>>1];
			break; 
		case 0x7c:
			strcpy(name,"neg.s");
			F.f[rd]=-F.f[rs];
			break; 
		case 0x7d:
			strcpy(name,"neg.d");
			F.d[rd>>1]=-F.d[rs>>1];
			break; 
		case 0x80:
			strcpy(name,"cvt.s.d");
			F.f[rd]=(float)F.d[rs>>1];
			break; 
		case 0x81:
			strcpy(name,"cvt.s.w");
			F.f[rd]=(float)F.l[rs];
			break; 
		case 0x82:
			strcpy(name,"cvt.d.s");
			F.d[rd>>1]=(double)F.f[rs];
			break; 
		case 0x83:
			strcpy(name,"cvt.d.w");
			F.d[rd>>1]=(double)F.l[rs];
			break; 
		case 0x84:
			strcpy(name,"cvt.w.s");
			F.l[rd]=(int)F.f[rs];
			break; 
		case 0x85:
			strcpy(name,"cvt.w.d");
			F.l[rd]=(int)F.d[rs>>1];
			break; 
		case 0x90:
			strcpy(name,"c.eq.s");
			FCC=F.f[rs]==F.f[rt];
			break; 
		case 0x91:
			strcpy(name,"c.eq.d");
			FCC=F.d[rs>>1]==F.d[rt>>1];
			break; 
		case 0x92:
			strcpy(name,"c.lt.s");
			FCC=F.f[rs]<F.f[rt];
			break; 
		case 0x93:
			strcpy(name,"c.lt.d");
			FCC=F.d[rs>>1]<F.d[rt>>1];
			break; 
		case 0x94:
			strcpy(name,"c.le.s");
			FCC=F.f[rs]<=F.f[rt];
			break; 
		case 0x95:
			strcpy(name,"c.le.d");
			FCC=F.d[rs>>1]<=F.d[rt>>1];
			break; 
		case 0x96:
			strcpy(name,"sqrt.s");
			F.f[rd]=sqrt((double)F.f[rs]);
			break; 
		case 0x97:
			strcpy(name,"sqrt.d");
			F.d[rd>>1]=sqrt(F.d[rs>>1]);
			break; 
		case 0xa0:
			strcpy(name,"syscall");
			handle_syscalls();
			break; 
		case 0xa1:
			strcpy(name,"break");
			//do nothing for the moment
			break; 
		case 0xa2:
			strcpy(name,"lui");
			R[rt]=uimm<<16;
			break; 
		case 0xa3:
			strcpy(name,"mfc1");
			R[rt]=F.l[rs];
			break; 
		case 0xa7:
			strcpy(name,"dmfc1");
			R[rt]=F.l[rs];
			R[rt+1]=F.l[rs+1];
			break; 
		case 0xa4:
			strcpy(name,"cfc1");
//			fatal("cfc1 not supported");
			break; 
		case 0xa5:
			strcpy(name,"mtc1");
			F.l[rs]=R[rt];
			break; 
		case 0xa8:
			strcpy(name,"dmtc1");
			F.l[rs]=R[rt];
			F.l[rs+1]=R[rt+1];
			break; 
		case 0xa6:
			strcpy(name,"ctc1");
//			fatal("ctc1 not supported");
			break; 
		default:
			strcpy(name,"undef");
			fatal("Undefined instruction");
			break;
	}
	PC=nPC;
}
//myinstoutorder.c
//Michael Black, 2006
//
//myinstoutorder.c actually executes the PISA instructions for the out-of-order processor

#include "mysim.h"
#include "mysimoutorder.h"
#include <math.h>

//the following variables are defined in mysim.c
extern unsigned int PC;
extern int R[NUM_REGS];
extern int HI;
extern int LO;
extern int FCC;
extern Ftype F;
extern int instruction_counter;

//the following are defined in mysimoutorder.c
extern Inst ooo_fi_inst;
extern Reservation_Station *resstat;
extern Reorder_Buffer_Entry *ROB;

//ooo_dofetch provides info on an instruction defined in inst_upper and inst_lower
//this info is returned in ooo_fi_inst
//the info tells: what registers the instruction sources/sinks,
//what kind of inst it is, what functional units it needs, and its name
void ooo_dofetch(int inst_upper, int inst_lower)
{
	Inst fetchinst;

	int opcode=inst_lower&0xff;
	char* name = ooo_fi_inst.name;

	//types: 0=add/sub, 1=int. mult, 2=float, 3=float mult, 4=load, 5=store, 6=j, 7=jr, 8=beq, 9=syscall
	int type=0;
	//rtype: 0=add/sub, 1=int. mult, 2=float, 3=float mult, 4=load/store, 5=other
	//safe default - rtype=5 means that it doesn't need an fu
	int rtype=5;

	fetchinst.sinks_rs=0;
	fetchinst.sinks_rt=0;
	fetchinst.sinks_rt2=0;
	fetchinst.sinks_HI=0;
	fetchinst.sinks_LO=0;
	fetchinst.sinks_fs=0;
	fetchinst.sinks_fs2=0;
	fetchinst.sinks_ft=0;
	fetchinst.sinks_ft2=0;
	fetchinst.sinks_FCC=0;
	fetchinst.sources_rt=0;
	fetchinst.sources_rt2=0;
	fetchinst.sources_rd=0;
	fetchinst.sources_HI=0;
	fetchinst.sources_LO=0;
	fetchinst.sources_ra=0;
	fetchinst.sources_fs=0;
	fetchinst.sources_fs2=0;
	fetchinst.sources_ft=0;
	fetchinst.sources_ft2=0;
	fetchinst.sources_fd=0;
	fetchinst.sources_fd2=0;
	fetchinst.sources_FCC=0;
	switch(opcode)
	{
		case 0:
			strcpy(name,"nop");
			type=0;
			rtype=5;
			break; 
		case 1:
			strcpy(name,"j");
			type=6;
			rtype=5;
			break; 
		case 2:
			strcpy(name,"jal");
			type=6;
			rtype=5;
			fetchinst.sources_ra=1;
			break; 
		case 3:
			strcpy(name,"jr");
			type=7;
			rtype=5;
			fetchinst.sinks_rs=1;
			break; 
		case 4:
			strcpy(name,"jalr");
			type=7;
			rtype=5;
			fetchinst.sinks_rs=1;
			fetchinst.sources_ra=1;
			break; 
		case 5:
			strcpy(name,"beq");
			type=8;
			rtype=0;
			fetchinst.sinks_rs=1;
			fetchinst.sinks_rt=1;
			break; 
		case 6:
			strcpy(name,"bne");
			type=8;
			rtype=0;
			fetchinst.sinks_rs=1;
			fetchinst.sinks_rt=1;
			break; 
		case 7:
			strcpy(name,"blez");
			type=8;
			rtype=0;
			fetchinst.sinks_rs=1;
			fetchinst.sinks_rt=1;
			break; 
		case 8:
			strcpy(name,"bgtz");
			type=8;
			rtype=0;
			fetchinst.sinks_rs=1;
			fetchinst.sinks_rt=1;
			break; 
		case 9:
			strcpy(name,"bltz");
			type=8;
			rtype=0;
			fetchinst.sinks_rs=1;
			fetchinst.sinks_rt=1;
			break; 
		case 0xa:
			strcpy(name,"bgez");
			type=8;
			rtype=0;
			fetchinst.sinks_rs=1;
			fetchinst.sinks_rt=1;
			break; 
		case 0xb:
			strcpy(name,"bc1f");
			fetchinst.sinks_FCC=1;
			type=8;
			rtype=0;
			break; 
		case 0xc:
			strcpy(name,"bc1t");
			fetchinst.sinks_FCC=1;
			type=8;
			rtype=0;
			break; 
		case 0x20:
			strcpy(name,"lb");
			type=4;
			rtype=4;
			fetchinst.sinks_rs=1;
			fetchinst.sources_rt=1;
			break; 
		case 0x22:
			strcpy(name,"lbu");
			type=4;
			rtype=4;
			fetchinst.sinks_rs=1;
			fetchinst.sources_rt=1;
			break; 
		case 0x24:
			strcpy(name,"lh");
			type=4;
			rtype=4;
			fetchinst.sinks_rs=1;
			fetchinst.sources_rt=1;
			break; 
		case 0x26:
			strcpy(name,"lhu");
			type=4;
			rtype=4;
			fetchinst.sinks_rs=1;
			fetchinst.sources_rt=1;
			break; 
		case 0x28:
			strcpy(name,"lw");
			type=4;
			rtype=4;
			fetchinst.sinks_rs=1;
			fetchinst.sources_rt=1;
			break; 
		case 0x29:
			strcpy(name,"dlw");
			type=4;
			rtype=4;
			fetchinst.sinks_rs=1;
			fetchinst.sources_rt=1;
			fetchinst.sources_rt2=1;
			break; 
		case 0x2a:
			strcpy(name,"l.s");
			fetchinst.sinks_rs=1;
			fetchinst.sources_ft=1;
			type=4;
			rtype=4;
			break; 
		case 0x2b:
			strcpy(name,"l.d");
			fetchinst.sinks_rs=1;
			fetchinst.sources_ft=1;
			fetchinst.sources_ft2=1;
			type=4;
			rtype=4;
			break; 
		case 0x2c:
			strcpy(name,"lwl");
			type=4;
			rtype=4;
			fetchinst.sinks_rs=1;
			fetchinst.sources_rt=1;
			break; 
		case 0x2d:
			strcpy(name,"lwr");
			type=4;
			rtype=4;
			fetchinst.sinks_rs=1;
			fetchinst.sources_rt=1;
			break; 
		case 0x30:
			strcpy(name,"sb");
			type=5;
			rtype=4;
			fetchinst.sinks_rs=1;
			fetchinst.sinks_rt=1;
			break; 
		case 0x32:
			strcpy(name,"sh");
			type=5;
			rtype=4;
			fetchinst.sinks_rs=1;
			fetchinst.sinks_rt=1;
			break; 
		case 0x34:
			strcpy(name,"sw");
			type=5;
			rtype=4;
			fetchinst.sinks_rs=1;
			fetchinst.sinks_rt=1;
			break; 
		case 0x35:
			strcpy(name,"dsw");
			type=5;
			rtype=4;
			fetchinst.sinks_rs=1;
			fetchinst.sinks_rt=1;
			fetchinst.sinks_rt2=1;
			break; 
		case 0x36:
			strcpy(name,"s.s");
			fetchinst.sinks_rs=1;
			fetchinst.sinks_ft=1;
			type=5;
			rtype=4;
			break; 
		case 0x37:
			strcpy(name,"s.d");
			fetchinst.sinks_rs=1;
			fetchinst.sinks_ft=1;
			fetchinst.sinks_ft2=1;
			type=5;
			rtype=4;
			break; 
		case 0x38:
			strcpy(name,"dsz");
			type=5;
			rtype=4;
			fetchinst.sinks_rs=1;
			break; 
		case 0x39:
			strcpy(name,"swl");
			type=5;
			rtype=4;
			fetchinst.sinks_rs=1;
			fetchinst.sinks_rt=1;
			break; 
		case 0x3a:
			strcpy(name,"swr");
			type=5;
			rtype=4;
			fetchinst.sinks_rs=1;
			fetchinst.sinks_rt=1;
			break; 
		case 0xc0:
			strcpy(name,"lb");
//			fatal("lb(rs,rd) not supported");
			break; 
		case 0xc1:
			strcpy(name,"lbu");
//			fatal("lbu(rs,rd) not supported");
			break; 
		case 0xc2:
			strcpy(name,"lh");
//			fatal("lh(rs,rd) not supported");
			break; 
		case 0xc3:
			strcpy(name,"lhu");
//			fatal("lhu(rs,rd) not supported");
			break; 
		case 0xc4:
			strcpy(name,"lw");
//			fatal("lw(rs,rd) not supported");
			break; 
		case 0xce:
			strcpy(name,"dlw");
//			fatal("dlw(rs,rd) not supported");
			break; 
		case 0xc5:
			strcpy(name,"l.s");
//			fatal("l.s(rs,rd) not supported");
			break; 
		case 0xcf:
			strcpy(name,"l.d");
//			fatal("l.d(rs,rd) not supported");
			break;
		case 0xc6:
			strcpy(name,"sb");
//			fatal("sb(rs,rd) not supported");
			break; 
		case 0xc7:
			strcpy(name,"sh");
//			fatal("sh(rs,rd) not supported");
			break; 
		case 0xc8:
			strcpy(name,"sw");
//			fatal("sw(rs,rd) not supported");
			break; 
		case 0xd0:
			strcpy(name,"dsw");
//			fatal("dsw(rs,rd) not supported");
			break; 
		case 0xd1:
			strcpy(name,"dsz");
//			fatal("dsz(rs,rd) not supported");
			break; 
		case 0xc9:
			strcpy(name,"s.s");
//			fatal("s.s(rs,rd) not supported");
			break; 
		case 0xd2:
			strcpy(name,"s.d");
//			fatal("s.d(rs,rd) not supported");
			break; 
		case 0xca:
			strcpy(name,"l.s.r2");
//			fatal("l.s.r2 is unsupported");
			break; 
		case 0xcb:
			strcpy(name,"s.s.r2");
//			fatal("s.s.r2 is unsupported");
			break; 
		case 0xcc:
			strcpy(name,"lw.r2");
//			fatal("lw.r2 is unsupported");
			break; 
		case 0xcd:
			strcpy(name,"sw.r2");
//			fatal("sw.r2 is unsupported");
			break; 
		case 0x40:
			strcpy(name,"add");
			type=0;
			rtype=0;
			fetchinst.sinks_rs=1;
			fetchinst.sinks_rt=1;
			fetchinst.sources_rd=1;
			break; 
		case 0x41:
			strcpy(name,"addi");
			type=0;
			rtype=0;
			fetchinst.sinks_rs=1;
			fetchinst.sources_rt=1;
			break; 
		case 0x42:
			strcpy(name,"addu");
			type=0;
			rtype=0;
			fetchinst.sinks_rs=1;
			fetchinst.sinks_rt=1;
			fetchinst.sources_rd=1;
			break; 
		case 0x43:
			strcpy(name,"addiu");
			type=0;
			rtype=0;
			fetchinst.sinks_rs=1;
			fetchinst.sources_rt=1;
			break; 
		case 0x44:
			strcpy(name,"sub");
			type=0;
			rtype=0;
			fetchinst.sinks_rs=1;
			fetchinst.sinks_rt=1;
			fetchinst.sources_rd=1;
			break; 
		case 0x45:
			strcpy(name,"subu");
			type=0;
			rtype=0;
			fetchinst.sinks_rs=1;
			fetchinst.sinks_rt=1;
			fetchinst.sources_rd=1;
			break; 
		case 0x46:
			strcpy(name,"mult");
			type=1;
			rtype=1;
			fetchinst.sinks_rs=1;
			fetchinst.sinks_rt=1;
			fetchinst.sources_HI=1;
			fetchinst.sources_LO=1;
			break; 
		case 0x47:
			strcpy(name,"multu");
			type=1;
			rtype=1;
			fetchinst.sinks_rs=1;
			fetchinst.sinks_rt=1;
			fetchinst.sources_HI=1;
			fetchinst.sources_LO=1;
			break; 
		case 0x48:
			strcpy(name,"div");
			type=1;
			rtype=1;
			fetchinst.sinks_rs=1;
			fetchinst.sinks_rt=1;
			fetchinst.sources_HI=1;
			fetchinst.sources_LO=1;
			break; 
		case 0x49:
			strcpy(name,"divu");
			type=1;
			rtype=1;
			fetchinst.sinks_rs=1;
			fetchinst.sinks_rt=1;
			fetchinst.sources_HI=1;
			fetchinst.sources_LO=1;
			break; 
		case 0x4a:
			strcpy(name,"mfhi");
			type=0;
			rtype=5;
			fetchinst.sinks_HI=1;
			fetchinst.sources_rd=1;
			break;
		case 0x4b:
			strcpy(name,"mthi");
			type=0;
			rtype=5;
			fetchinst.sinks_rs=1;
			fetchinst.sources_HI=1;
			break;
		case 0x4c:
			strcpy(name,"mflo");
			type=0;
			rtype=5;
			fetchinst.sinks_LO=1;
			fetchinst.sources_rd=1;
			break; 
		case 0x4d:
			strcpy(name,"mtlo");
			type=0;
			rtype=5;
			fetchinst.sinks_rs=1;
			fetchinst.sources_LO=1;
			break; 
		case 0x4e:
			strcpy(name,"and");
			type=0;
			rtype=0;
			fetchinst.sinks_rs=1;
			fetchinst.sinks_rt=1;
			fetchinst.sources_rd=1;
			break; 
		case 0x4f:
			strcpy(name,"andi");
			type=0;
			rtype=0;
			fetchinst.sinks_rs=1;
			fetchinst.sources_rt=1;
			break; 
		case 0x50:
			strcpy(name,"or");
			type=0;
			rtype=0;
			fetchinst.sinks_rs=1;
			fetchinst.sinks_rt=1;
			fetchinst.sources_rd=1;
			break; 
		case 0x51:
			strcpy(name,"ori");
			type=0;
			rtype=0;
			fetchinst.sinks_rs=1;
			fetchinst.sources_rt=1;
			break; 
		case 0x52:
			strcpy(name,"xor");
			type=0;
			rtype=0;
			fetchinst.sinks_rs=1;
			fetchinst.sinks_rt=1;
			fetchinst.sources_rd=1;
			break; 
		case 0x53:
			strcpy(name,"xori");
			type=0;
			rtype=0;
			fetchinst.sinks_rs=1;
			fetchinst.sources_rt=1;
			break; 
		case 0x54:
			strcpy(name,"nor");
			type=0;
			rtype=0;
			fetchinst.sinks_rs=1;
			fetchinst.sinks_rt=1;
			fetchinst.sources_rd=1;
			break; 
		case 0x55:
			strcpy(name,"sll");
			type=0;
			rtype=0;
			fetchinst.sinks_rt=1;
			fetchinst.sources_rd=1;
			break; 
		case 0x56:
			strcpy(name,"sllv");
			type=0;
			rtype=0;
			fetchinst.sinks_rs=1;
			fetchinst.sinks_rt=1;
			fetchinst.sources_rd=1;
			break; 
		case 0x57:
			strcpy(name,"srl");
			type=0;
			rtype=0;
			fetchinst.sinks_rt=1;
			fetchinst.sources_rd=1;
			break; 
		case 0x58:
			strcpy(name,"srlv");
			type=0;
			rtype=0;
			fetchinst.sinks_rs=1;
			fetchinst.sinks_rt=1;
			fetchinst.sources_rd=1;
			break; 
		case 0x59:
			strcpy(name,"sra");
			type=0;
			rtype=0;
			fetchinst.sinks_rt=1;
			fetchinst.sources_rd=1;
			break; 
		case 0x5a:
			strcpy(name,"srav");
			type=0;
			rtype=0;
			fetchinst.sinks_rs=1;
			fetchinst.sinks_rt=1;
			fetchinst.sources_rd=1;
			break; 
		case 0x5b:
			strcpy(name,"slt");
			type=0;
			rtype=0;
			fetchinst.sinks_rs=1;
			fetchinst.sinks_rt=1;
			fetchinst.sources_rd=1;
			break;
		case 0x5c:
			strcpy(name,"slti");
			type=0;
			rtype=0;
			fetchinst.sinks_rs=1;
			fetchinst.sources_rt=1;
			break; 
		case 0x5d:
			strcpy(name,"sltu");
			type=0;
			rtype=0;
			fetchinst.sinks_rs=1;
			fetchinst.sinks_rt=1;
			fetchinst.sources_rd=1;
			break; 
		case 0x5e:
			strcpy(name,"sltiu");
			type=0;
			rtype=0;
			fetchinst.sinks_rs=1;
			fetchinst.sources_rt=1;
			break; 
		case 0x70:
			strcpy(name,"add.s");
			fetchinst.sinks_fs=1;
			fetchinst.sinks_ft=1;
			fetchinst.sources_fd=1;
			type=2;
			rtype=2;
			break; 
		case 0x71:
			strcpy(name,"add.d");
			fetchinst.sinks_fs=1;
			fetchinst.sinks_ft=1;
			fetchinst.sources_fd=1;
			fetchinst.sinks_fs2=1;
			fetchinst.sinks_ft2=1;
			fetchinst.sources_fd2=1;
			type=2;
			rtype=2;
			break; 
		case 0x72:
			strcpy(name,"sub.s");
			fetchinst.sinks_fs=1;
			fetchinst.sinks_ft=1;
			fetchinst.sources_fd=1;
			type=2;
			rtype=2;
			break; 
		case 0x73:
			strcpy(name,"sub.d");
			fetchinst.sinks_fs=1;
			fetchinst.sinks_ft=1;
			fetchinst.sources_fd=1;
			fetchinst.sinks_fs2=1;
			fetchinst.sinks_ft2=1;
			fetchinst.sources_fd2=1;
			type=2;
			rtype=2;
			break; 
		case 0x74:
			strcpy(name,"mul.s");
			fetchinst.sinks_fs=1;
			fetchinst.sinks_ft=1;
			fetchinst.sources_fd=1;
			type=3;
			rtype=3;
			break; 
		case 0x75:
			strcpy(name,"mul.d");
			fetchinst.sinks_fs=1;
			fetchinst.sinks_ft=1;
			fetchinst.sources_fd=1;
			fetchinst.sinks_fs2=1;
			fetchinst.sinks_ft2=1;
			fetchinst.sources_fd2=1;
			type=3;
			rtype=3;
			break; 
		case 0x76:
			strcpy(name,"div.s");
			fetchinst.sinks_fs=1;
			fetchinst.sinks_ft=1;
			fetchinst.sources_fd=1;
			type=3;
			rtype=3;
			break; 
		case 0x77:
			strcpy(name,"div.d");
			fetchinst.sinks_fs=1;
			fetchinst.sinks_ft=1;
			fetchinst.sources_fd=1;
			fetchinst.sinks_fs2=1;
			fetchinst.sinks_ft2=1;
			fetchinst.sources_fd2=1;
			type=3;
			rtype=3;
			break; 
		case 0x78:
			strcpy(name,"abs.s");
			fetchinst.sinks_fs=1;
			fetchinst.sources_fd=1;
			type=2;
			rtype=2;
			break; 
		case 0x79:
			strcpy(name,"abs.d");
			fetchinst.sinks_fs=1;
			fetchinst.sources_fd=1;
			fetchinst.sinks_fs2=1;
			fetchinst.sources_fd2=1;
			type=2;
			rtype=2;
			break; 
		case 0x7a:
			strcpy(name,"mov.s");
			fetchinst.sinks_fs=1;
			fetchinst.sources_fd=1;
			type=2;
			rtype=5;
			break; 
		case 0x7b:
			strcpy(name,"mov.d");
			fetchinst.sinks_fs=1;
			fetchinst.sources_fd=1;
			fetchinst.sinks_fs2=1;
			fetchinst.sources_fd2=1;
			type=2;
			rtype=5;
			break; 
		case 0x7c:
			strcpy(name,"neg.s");
			fetchinst.sinks_fs=1;
			fetchinst.sources_fd=1;
			type=2;
			rtype=2;
			break; 
		case 0x7d:
			strcpy(name,"neg.d");
			fetchinst.sinks_fs=1;
			fetchinst.sources_fd=1;
			fetchinst.sinks_fs2=1;
			fetchinst.sources_fd2=1;
			type=2;
			rtype=2;
			break; 
		case 0x80:
			strcpy(name,"cvt.s.d");
			fetchinst.sinks_fs=1;
			fetchinst.sinks_fs2=1;
			fetchinst.sources_fd=1;
			type=2;
			rtype=2;
			break; 
		case 0x81:
			strcpy(name,"cvt.s.w");
			fetchinst.sinks_fs=1;
			fetchinst.sources_fd=1;
			type=2;
			rtype=2;
			break; 
		case 0x82:
			strcpy(name,"cvt.d.s");
			fetchinst.sinks_fs=1;
			fetchinst.sources_fd=1;
			fetchinst.sources_fd2=1;
			type=2;
			rtype=2;
			break; 
		case 0x83:
			strcpy(name,"cvt.d.w");
			fetchinst.sinks_fs=1;
			fetchinst.sources_fd=1;
			fetchinst.sources_fd2=1;
			type=2;
			rtype=2;
			break; 
		case 0x84:
			strcpy(name,"cvt.w.s");
			fetchinst.sinks_fs=1;
			fetchinst.sources_fd=1;
			type=2;
			rtype=2;
			break; 
		case 0x85:
			strcpy(name,"cvt.w.d");
			fetchinst.sinks_fs=1;
			fetchinst.sources_fd=1;
			fetchinst.sinks_fs2=1;
			type=2;
			rtype=2;
			break; 
		case 0x90:
			strcpy(name,"c.eq.s");
			fetchinst.sinks_fs=1;
			fetchinst.sinks_ft=1;
			fetchinst.sources_FCC=1;
			type=2;
			rtype=2;
			break; 
		case 0x91:
			strcpy(name,"c.eq.d");
			fetchinst.sinks_fs=1;
			fetchinst.sinks_ft=1;
			fetchinst.sinks_fs2=1;
			fetchinst.sinks_ft2=1;
			fetchinst.sources_FCC=1;
			type=2;
			rtype=2;
			break;
		case 0x92:
			strcpy(name,"c.lt.s");
			fetchinst.sinks_fs=1;
			fetchinst.sinks_ft=1;
			fetchinst.sources_FCC=1;
			type=2;
			rtype=2;
			break; 
		case 0x93:
			strcpy(name,"c.lt.d");
			fetchinst.sinks_fs=1;
			fetchinst.sinks_ft=1;
			fetchinst.sinks_fs2=1;
			fetchinst.sinks_ft2=1;
			fetchinst.sources_FCC=1;
			type=2;
			rtype=2;
			break; 
		case 0x94:
			strcpy(name,"c.le.s");
			fetchinst.sinks_fs=1;
			fetchinst.sinks_ft=1;
			fetchinst.sources_FCC=1;
			type=2;
			rtype=2;
			break; 
		case 0x95:
			strcpy(name,"c.le.d");
			fetchinst.sinks_fs=1;
			fetchinst.sinks_ft=1;
			fetchinst.sinks_fs2=1;
			fetchinst.sinks_ft2=1;
			fetchinst.sources_FCC=1;
			type=2;
			rtype=2;
			break; 
		case 0x96:
			strcpy(name,"sqrt.s");
			fetchinst.sinks_fs=1;
			fetchinst.sources_fd=1;
			type=2;
			rtype=2;
			break; 
		case 0x97:
			strcpy(name,"sqrt.d");
			fetchinst.sinks_fs=1;
			fetchinst.sources_fd=1;
			fetchinst.sinks_fs2=1;
			fetchinst.sources_fd2=1;
			type=2;
			rtype=2;
			break; 
		case 0xa0:
			strcpy(name,"syscall");
			type=9;
			rtype=5;
			break; 
		case 0xa1:
			strcpy(name,"break");
			type=0;
			rtype=5;
			break; 
		case 0xa2:
			strcpy(name,"lui");
			type=0;
			rtype=5;
			fetchinst.sources_rt=1;
			break; 
		case 0xa3:
			strcpy(name,"mfc1");
			fetchinst.sinks_fs=1;
			fetchinst.sources_rt=1;
			type=2;
			rtype=5;
			break; 
		case 0xa7:
			strcpy(name,"dmfc1");
			fetchinst.sinks_fs=1;
			fetchinst.sources_rt=1;
			fetchinst.sinks_fs2=1;
			fetchinst.sources_rt2=1;
			type=2;
			rtype=5;
			break; 
		case 0xa4:
			strcpy(name,"cfc1");
//			fatal("cfc1 is not supported");
			break; 
		case 0xa5:
			strcpy(name,"mtc1");
			fetchinst.sinks_rt=1;
			fetchinst.sources_fs=1;
			type=2;
			rtype=5;
			break; 
		case 0xa8:
			strcpy(name,"dmtc1");
			fetchinst.sinks_rt=1;
			fetchinst.sources_fs=1;
			fetchinst.sinks_rt2=1;
			fetchinst.sources_fs2=1;
			type=2;
			rtype=5;
			break; 
		case 0xa6:
			strcpy(name,"ctc1");
//			fatal("ctc1 is not supported");
			break; 
		default:
			strcpy(name,"undef");
//			fatal("undefined instruction");
			break;
	}
	ooo_fi_inst.sinks_rs=fetchinst.sinks_rs;
	ooo_fi_inst.sinks_rt=fetchinst.sinks_rt;
	ooo_fi_inst.sinks_rt2=fetchinst.sinks_rt2;
	ooo_fi_inst.sinks_HI=fetchinst.sinks_HI;
	ooo_fi_inst.sinks_LO=fetchinst.sinks_LO;
	ooo_fi_inst.sinks_fs=fetchinst.sinks_fs;
	ooo_fi_inst.sinks_fs2=fetchinst.sinks_fs2;
	ooo_fi_inst.sinks_ft=fetchinst.sinks_ft;
	ooo_fi_inst.sinks_ft2=fetchinst.sinks_ft2;
	ooo_fi_inst.sinks_FCC=fetchinst.sinks_FCC;
	ooo_fi_inst.sources_rt=fetchinst.sources_rt;
	ooo_fi_inst.sources_rt2=fetchinst.sources_rt2;
	ooo_fi_inst.sources_rd=fetchinst.sources_rd;
	ooo_fi_inst.sources_HI=fetchinst.sources_HI;
	ooo_fi_inst.sources_LO=fetchinst.sources_LO;
	ooo_fi_inst.sources_ra=fetchinst.sources_ra;
	ooo_fi_inst.sources_fs=fetchinst.sources_fs;
	ooo_fi_inst.sources_fs2=fetchinst.sources_fs2;
	ooo_fi_inst.sources_ft=fetchinst.sources_ft;
	ooo_fi_inst.sources_ft2=fetchinst.sources_ft2;
	ooo_fi_inst.sources_fd=fetchinst.sources_fd;
	ooo_fi_inst.sources_fd2=fetchinst.sources_fd2;
	ooo_fi_inst.sources_FCC=fetchinst.sources_FCC;
	ooo_fi_inst.type=type;
	ooo_fi_inst.rtype=rtype;
}

//oo_doexecute executes the instruction in reservation station ri
//the source operands are available at that res. stat 
//and the results will be written back to that res. stat
void ooo_doexecute(int ri)
{
	Reservation_Station r;

	int opcode=resstat[ri].instruction.inst_lower&0xff;
	int targ=(resstat[ri].instruction.inst_upper)&0x3ffffff;
        short int imm=(resstat[ri].instruction.inst_upper)&0xffff;
        unsigned int uimm=(resstat[ri].instruction.inst_upper)&0xffff;
        int shamt=(resstat[ri].instruction.inst_upper)&0xff;
        int bcode=(resstat[ri].instruction.inst_upper)&0xfffff;
        int i,sign1,sign2,op1,op2;
	fpr ftemp_s,ftemp_t,ftemp_d;

	//get the source operands
	r.r_rs=resstat[ri].r_rs;
	r.r_rt=resstat[ri].r_rt;
	r.r_rt2=resstat[ri].r_rt2;
	r.r_HI=resstat[ri].r_HI;
	r.r_LO=resstat[ri].r_LO;

	r.r_FCC=resstat[ri].r_FCC;
	r.f_fs.l=resstat[ri].f_fs.l;
	r.f_fs2.l=resstat[ri].f_fs2.l;
	r.f_ft.l=resstat[ri].f_ft.l;
	r.f_ft2.l=resstat[ri].f_ft2.l;

	//floating point is stored in integer form
	//to do double floating point operations, the conversion must be done in advance 
	ftemp_s.ll[0]=r.f_fs.l;
	ftemp_s.ll[1]=r.f_fs2.l;
	ftemp_t.ll[0]=r.f_ft.l;
	ftemp_t.ll[1]=r.f_ft2.l;


	switch(opcode)
	{
		case 0:
//			strcpy(name,"nop");
			break; 
		case 1:
//			strcpy(name,"j");
			break; 
		case 2:
//			strcpy(name,"jal");
			r.r_raout=resstat[ri].instruction.addr+8;
			break; 
		case 3:
//			strcpy(name,"jr");
			r.r_rdout=r.r_rs;
			break; 
		case 4:
//			strcpy(name,"jalr");
			r.r_rdout=r.r_rs;
			r.r_raout=resstat[ri].instruction.addr+8;
			break; 
		case 5:
//			strcpy(name,"beq");
			if (r.r_rs==r.r_rt)
				r.r_rdout=resstat[ri].instruction.addr+8+(imm<<2);
			else
				r.r_rdout=resstat[ri].instruction.addr+8;
			r.r_rtout=resstat[ri].instruction.addr+8+(imm<<2);
			break; 
		case 6:
//			strcpy(name,"bne");
			if (r.r_rs!=r.r_rt)
				r.r_rdout=resstat[ri].instruction.addr+8+(imm<<2);
			else
				r.r_rdout=resstat[ri].instruction.addr+8;
			r.r_rtout=resstat[ri].instruction.addr+8+(imm<<2);
			break; 
		case 7:
//			strcpy(name,"blez");
			if (r.r_rs<=0)
				r.r_rdout=resstat[ri].instruction.addr+8+(imm<<2);
			else
				r.r_rdout=resstat[ri].instruction.addr+8;
			r.r_rtout=resstat[ri].instruction.addr+8+(imm<<2);
			break; 
		case 8:
//			strcpy(name,"bgtz");
			if (r.r_rs>0)
				r.r_rdout=resstat[ri].instruction.addr+8+(imm<<2);
			else
				r.r_rdout=resstat[ri].instruction.addr+8;
			r.r_rtout=resstat[ri].instruction.addr+8+(imm<<2);
			break; 
		case 9:
//			strcpy(name,"bltz");
			if (r.r_rs<0)
				r.r_rdout=resstat[ri].instruction.addr+8+(imm<<2);
			else
				r.r_rdout=resstat[ri].instruction.addr+8;
			r.r_rtout=resstat[ri].instruction.addr+8+(imm<<2);
			break; 
		case 0xa:
//			strcpy(name,"bgez");
			if (r.r_rs>=0)
				r.r_rdout=resstat[ri].instruction.addr+8+(imm<<2);
			else
				r.r_rdout=resstat[ri].instruction.addr+8;
			r.r_rtout=resstat[ri].instruction.addr+8+(imm<<2);
			break; 
		case 0xb:
//			strcpy(name,"bc1f");
			if (!r.r_FCC)
				r.r_rdout=resstat[ri].instruction.addr+8+(imm<<2);
			else
				r.r_rdout=resstat[ri].instruction.addr+8;
			r.r_rtout=resstat[ri].instruction.addr+8+(imm<<2);
			break; 
		case 0xc:
//			strcpy(name,"bc1t");
			if (r.r_FCC)
				r.r_rdout=resstat[ri].instruction.addr+8+(imm<<2);
			else
				r.r_rdout=resstat[ri].instruction.addr+8;
			r.r_rtout=resstat[ri].instruction.addr+8+(imm<<2);
			break; 
		case 0x20:
//			strcpy(name,"lb");
			r.r_rtout=dcache_read(r.r_rs+(short)imm);
			power_model(12,0,0);
			break; 
		case 0x22:
//			strcpy(name,"lbu");
			r.r_rtout=(unsigned char)dcache_read(r.r_rs+(short)imm);
			power_model(12,0,0);
			break; 
		case 0x24:
//			strcpy(name,"lh");
			i=dcache_read(r.r_rs+(short)imm);
			i=i+(dcache_read(r.r_rs+(short)imm+1)<<8);
			r.r_rtout=(short)i;
			power_model(12,0,0);
			break; 
		case 0x26:
//			strcpy(name,"lhu");
			i=(unsigned char)dcache_read(r.r_rs+(short)imm);
			r.r_rtout=i+((unsigned short)dcache_read(r.r_rs+(short)imm+1)<<8);
			power_model(12,0,0);
			break; 
		case 0x28:
//			strcpy(name,"lw");
			r.r_rtout=dcache_read_word(r.r_rs+(short)imm);
			power_model(12,0,0);
			break; 
		case 0x29:
//			strcpy(name,"dlw");
			r.r_rtout=dcache_read_word(r.r_rs+(short)imm);
			r.r_rt2out=dcache_read_word(r.r_rs+(short)imm+4);
			power_model(12,0,0);
			break; 
		case 0x2a:
//			strcpy(name,"l.s");
			r.f_ftout.l=dcache_read_word(r.r_rs+(short)imm);
			power_model(12,0,0);
			break; 
		case 0x2b:
//			strcpy(name,"l.d");
			r.f_ftout.l=dcache_read_word(r.r_rs+(short)imm);
			r.f_ft2out.l=dcache_read_word(r.r_rs+(short)imm+4);
			power_model(12,0,0);
			break; 
		case 0x2c:
//			strcpy(name,"lwl");
                        i=r.r_rs+(short)imm;
                        r.r_rtout=(dcache_read(i)<<24)+(r.r_rtout&0x00FFFFFF);
                        if (i%4!=0)
                        {
                                r.r_rtout=(dcache_read(i-1)<<16)+(r.r_rtout&0xFF00FFFF);
                                if ((i-1)%4!=0)
                                {
                                        r.r_rtout=(dcache_read(i-2)<<8)+(r.r_rtout&0xFFFF00FF);
                                        if ((i-2)%4!=0)
                                                r.r_rtout=(dcache_read(i-3))+(r.r_rtout&0xFFFFFF00);
                                }
                        }
			power_model(12,0,0);
			break; 
		case 0x2d:
//			strcpy(name,"lwr");
                        i=r.r_rs+(short)imm;
                        r.r_rtout=(dcache_read(i))+(r.r_rtout&0xFFFFFF00);
                        if ((i+1)%4!=0)
                        {
                                r.r_rtout=(dcache_read(i+1)<<8)+(r.r_rtout&0xFFFF00FF);
                                if ((i+2)%4!=0)
                                {
                                        r.r_rtout=(dcache_read(i+2)<<16)+(r.r_rtout&0xFF00FFFF);
                                        if ((i+3)%4!=0)
                                                r.r_rtout=(dcache_read(i+3)<<24)+(r.r_rtout&0x00FFFFFF);
                                }  
                        }
			power_model(12,0,0);
			break; 
		case 0x30:
//			strcpy(name,"sb");
			break; 
		case 0x32:
//			strcpy(name,"sh");
			break; 
		case 0x34:
//			strcpy(name,"sw");
			break; 
		case 0x35:
//			strcpy(name,"dsw");
			break; 
		case 0x36:
//			strcpy(name,"s.s");
			break; 
		case 0x37:
//			strcpy(name,"s.d");
			break; 
		case 0x38:
//			strcpy(name,"dsz");
			break; 
		case 0x39:
//			strcpy(name,"swl");
			break; 
		case 0x3a:
//			strcpy(name,"swr");
			break; 
		case 0xc0:
//			strcpy(name,"lb");
			break; 
		case 0xc1:
//			strcpy(name,"lbu");
			break; 
		case 0xc2:
//			strcpy(name,"lh");
			break; 
		case 0xc3:
//			strcpy(name,"lhu");
			break; 
		case 0xc4:
//			strcpy(name,"lw");
			break; 
		case 0xce:
//			strcpy(name,"dlw");
			break; 
		case 0xc5:
//			strcpy(name,"l.s");
			break; 
		case 0xcf:
//			strcpy(name,"l.d");
			break;
		case 0xc6:
//			strcpy(name,"sb");
			break; 
		case 0xc7:
//			strcpy(name,"sh");
			break; 
		case 0xc8:
//			strcpy(name,"sw");
			break; 
		case 0xd0:
//			strcpy(name,"dsw");
			break; 
		case 0xd1:
//			strcpy(name,"dsz");
			break; 
		case 0xc9:
//			strcpy(name,"s.s");
			break; 
		case 0xd2:
//			strcpy(name,"s.d");
			break; 
		case 0xca:
//			strcpy(name,"l.s.r2");
			break; 
		case 0xcb:
//			strcpy(name,"s.s.r2");
			break; 
		case 0xcc:
//			strcpy(name,"lw.r2");
			break; 
		case 0xcd:
//			strcpy(name,"sw.r2");
			break; 
		case 0x40:
//			strcpy(name,"add");
			r.r_rdout=r.r_rs+r.r_rt;
			break; 
		case 0x41:
//			strcpy(name,"addi");
			r.r_rtout=r.r_rs+(short)imm;
			break; 
		case 0x42:
//			strcpy(name,"addu");
			r.r_rdout=r.r_rs+r.r_rt;
			break; 
		case 0x43:
//			strcpy(name,"addiu");
			r.r_rtout=r.r_rs+(short)imm;
			break; 
		case 0x44:
//			strcpy(name,"sub");
			r.r_rdout=r.r_rs-r.r_rt;
			break; 
		case 0x45:
//			strcpy(name,"subu");
			r.r_rdout=r.r_rs-r.r_rt;
			break; 
		case 0x46:
		{
//			strcpy(name,"mult");
                        sign1=0;
                        sign2=0;
                        r.r_HIout=0;
                        r.r_LOout=0;
                        op1=r.r_rs;
                        op2=r.r_rt;
                        if (op1 & 020000000000)
                        {
                                sign1=1;
                                op1=(~op1)+1;
                        }
                        if (op2 & 020000000000)
                        {
                                sign2=1;
                                op2=(~op2)+1;
                        }
                        if (op1 & 020000000000)
                                r.r_HIout=op2;
                        for (i=0; i<31; i++)
                        {
                                r.r_HIout=r.r_HIout<<1;
                                r.r_HIout=r.r_HIout+extractl(r.r_LOout,31,1);
                                r.r_LOout=r.r_LOout<<1;
                                if ((extractl(op1,30-i,1))==1)
                                {
                                        if (((unsigned)037777777777-(unsigned)r.r_LOout)<(unsigned)op2)
                                        {
                                                r.r_HIout=r.r_HIout+1;
                                        }
                                        r.r_LOout=r.r_LOout+op2;
                                }
                        }
                        if (sign1^sign2)
                        {
                                r.r_LOout=~r.r_LOout;
                                r.r_HIout=~r.r_HIout;
                                if ((unsigned)r.r_LOout==037777777777)
                                        r.r_HIout=r.r_HIout+1;
                                r.r_LOout=r.r_LOout+1;
                        }
                }
		break; 
		case 0x47:
		{
//			strcpy(name,"multu");
                        r.r_HIout=0;
                        r.r_LOout=0;
                        if (r.r_rs&020000000000)
                                r.r_LOout=r.r_rt;
                        for (i=0; i<31; i++)
                        {
                                r.r_HIout=r.r_HIout<<1;
                                r.r_HIout=r.r_HIout+extractl(r.r_LOout,31,1);
                                r.r_LOout=r.r_LOout<<1;
                                if (extractl(r.r_rs,30-i,1)==1)
                                {
                                        if (((unsigned)037777777777-(unsigned)r.r_LOout)<(unsigned)r.r_rt)
                                                r.r_HIout=r.r_HIout+1;
                                        r.r_LOout=r.r_LOout+r.r_rt;
                                }
                        }
 		}	
		break; 
		case 0x48:
//			strcpy(name,"div");
//			if (r.r_rt==0)
//				fatal("Divide by 0");
			if (r.r_rs<-2147483647) r.r_rs=-2147483647;
			r.r_LOout=IDIV(r.r_rs,r.r_rt);
			r.r_HIout=IMOD(r.r_rs,r.r_rt);
			break; 
		case 0x49:
//			strcpy(name,"divu");
//			if (r.r_rt==0)
//				fatal("Divide by 0");
			r.r_LOout=IDIV((unsigned)r.r_rs,(unsigned)r.r_rt);
			r.r_HIout=IMOD((unsigned)r.r_rs,(unsigned)r.r_rt);
			break; 
		case 0x4a:
//			strcpy(name,"mfhi");
			r.r_rdout=r.r_HI;
			break;
		case 0x4b:
//			strcpy(name,"mthi");
			r.r_HIout=r.r_rs;
			break;
		case 0x4c:
//			strcpy(name,"mflo");
			r.r_rdout=r.r_LO;
			break; 
		case 0x4d:
//			strcpy(name,"mtlo");
			r.r_LOout=r.r_rs;
			break; 
		case 0x4e:
//			strcpy(name,"and");
			r.r_rdout=r.r_rs&r.r_rt;
			break; 
		case 0x4f:
//			strcpy(name,"andi");
			r.r_rtout=r.r_rs&uimm;
			break; 
		case 0x50:
//			strcpy(name,"or");
			r.r_rdout=r.r_rs|r.r_rt;
			break; 
		case 0x51:
//			strcpy(name,"ori");
			r.r_rtout=r.r_rs|uimm;
			break; 
		case 0x52:
//			strcpy(name,"xor");
			r.r_rdout=r.r_rs^r.r_rt;
			break; 
		case 0x53:
//			strcpy(name,"xori");
			r.r_rtout=r.r_rs^uimm;
			break; 
		case 0x54:
//			strcpy(name,"nor");
			r.r_rdout=~(r.r_rs|r.r_rt);
			break; 
		case 0x55:
//			strcpy(name,"sll");
			r.r_rdout=r.r_rt<<shamt;
			break; 
		case 0x56:
//			strcpy(name,"sllv");
			r.r_rdout=r.r_rt<<(r.r_rs&037);
			break; 
		case 0x57:
//			strcpy(name,"srl");
			r.r_rdout=(unsigned)r.r_rt>>shamt;
			break; 
		case 0x58:
//			strcpy(name,"srlv");
			r.r_rdout=(unsigned)r.r_rt>>(r.r_rs&037);
			break; 
		case 0x59:
//			strcpy(name,"sra");
                       	if (r.r_rt&020000000000)
                        {
                                r.r_rdout=r.r_rt;
                                for (i=0; i<shamt; i++)
                                        r.r_rdout=(r.r_rdout>>1)|020000000000;
                        }
                        else
                                r.r_rdout=r.r_rt>>shamt;
			break; 
		case 0x5a:
//			strcpy(name,"srav");
                        shamt=r.r_rs&037;
                        if (r.r_rt&020000000000)
                        {
                                r.r_rdout=r.r_rt;
                                for (i=0; i<shamt; i++)
                                        r.r_rdout=(r.r_rdout>>1)|020000000000;
                        }
                        else
                                r.r_rdout=r.r_rt>>shamt;
			break; 
		case 0x5b:
//			strcpy(name,"slt");
			r.r_rdout=(r.r_rs<r.r_rt)? 1:0;
			break;
		case 0x5c:
//			strcpy(name,"slti");
			r.r_rtout=(r.r_rs<imm)? 1:0;
			break; 
		case 0x5d:
//			strcpy(name,"sltu");
			r.r_rdout=((unsigned)r.r_rs<(unsigned)r.r_rt)? 1:0;
			break; 
		case 0x5e:
//			strcpy(name,"sltiu");
			r.r_rtout=((unsigned)r.r_rs<(unsigned)imm)? 1:0;
			break; 
		case 0x70:
//			strcpy(name,"add.s");
			r.f_fdout.f=r.f_fs.f+r.f_ft.f;
			break; 
		case 0x71:
//			strcpy(name,"add.d");
			ftemp_d.d=ftemp_s.d+ftemp_t.d;
			r.f_fdout.l=ftemp_d.ll[0];
			r.f_fd2out.l=ftemp_d.ll[1];
			break; 
		case 0x72:
//			strcpy(name,"sub.s");
			r.f_fdout.f=r.f_fs.f-r.f_ft.f;
			break; 
		case 0x73:
//			strcpy(name,"sub.d");
			ftemp_d.d=ftemp_s.d-ftemp_t.d;
			r.f_fdout.l=ftemp_d.ll[0];
			r.f_fd2out.l=ftemp_d.ll[1];
			break; 
		case 0x74:
//			strcpy(name,"mul.s");
			r.f_fdout.f=r.f_fs.f*r.f_ft.f;
			break; 
		case 0x75:
//			strcpy(name,"mul.d");
			ftemp_d.d=ftemp_s.d*ftemp_t.d;
			r.f_fdout.l=ftemp_d.ll[0];
			r.f_fd2out.l=ftemp_d.ll[1];
			break; 
		case 0x76:
//			strcpy(name,"div.s");
//			if (r.f_ft.f==0)
//				fatal("Divide by 0");
			r.f_fdout.f=r.f_fs.f/r.f_ft.f;
			break; 
		case 0x77:
//			strcpy(name,"div.d");
//			if (ftemp_t.d==0)
//				fatal("Divide by 0");
			ftemp_d.d=ftemp_s.d/ftemp_t.d;
			r.f_fdout.l=ftemp_d.ll[0];
			r.f_fd2out.l=ftemp_d.ll[1];
			break; 
		case 0x78:
//			strcpy(name,"abs.s");
			r.f_fdout.f=fabs((double)r.f_fs.f);
			break; 
		case 0x79:
//			strcpy(name,"abs.d");
			ftemp_d.d=fabs(ftemp_s.d);
			r.f_fdout.l=ftemp_d.ll[0];
			r.f_fd2out.l=ftemp_d.ll[1];
			break; 
		case 0x7a:
//			strcpy(name,"mov.s");
			r.f_fdout.f=r.f_fs.f;
			break; 
		case 0x7b:
//			strcpy(name,"mov.d");
			ftemp_d.d=ftemp_s.d;
			r.f_fdout.l=ftemp_d.ll[0];
			r.f_fd2out.l=ftemp_d.ll[1];
			break; 
		case 0x7c:
//			strcpy(name,"neg.s");
			r.f_fdout.f=-r.f_fs.f;
			break; 
		case 0x7d:
//			strcpy(name,"neg.d");
			ftemp_d.d=-ftemp_s.d;
			r.f_fdout.l=ftemp_d.ll[0];
			r.f_fd2out.l=ftemp_d.ll[1];
			break; 
		case 0x80:
//			strcpy(name,"cvt.s.d");
			r.f_fdout.f=(float)ftemp_s.d;
			break; 
		case 0x81:
//			strcpy(name,"cvt.s.w");
			r.f_fdout.f=(float)r.f_fs.l;
			break; 
		case 0x82:
//			strcpy(name,"cvt.d.s");
			ftemp_d.d=(double)r.f_fs.f;
			r.f_fdout.l=ftemp_d.ll[0];
			r.f_fd2out.l=ftemp_d.ll[1];
			break; 
		case 0x83:
//			strcpy(name,"cvt.d.w");
			ftemp_d.d=(double)r.f_fs.l;
			r.f_fdout.l=ftemp_d.ll[0];
			r.f_fd2out.l=ftemp_d.ll[1];
			break; 
		case 0x84:
//			strcpy(name,"cvt.w.s");
			r.f_fdout.l=(int)r.f_fs.f;
			break; 
		case 0x85:
//			strcpy(name,"cvt.w.d");
			r.f_fdout.l=(int)ftemp_s.d;
			break; 
		case 0x90:
//			strcpy(name,"c.eq.s");
			r.r_FCCout=r.f_fs.f==r.f_ft.f;
			break; 
		case 0x91:
//			strcpy(name,"c.eq.d");
			r.r_FCCout=ftemp_s.d==ftemp_t.d;
			break; 
		case 0x92:
//			strcpy(name,"c.lt.s");
			r.r_FCCout=r.f_fs.f<r.f_ft.f;
			break; 
		case 0x93:
//			strcpy(name,"c.lt.d");
			r.r_FCCout=ftemp_s.d<ftemp_t.d;
			break; 
		case 0x94:
//			strcpy(name,"c.le.s");
			r.r_FCCout=r.f_fs.f<=r.f_ft.f;
			break; 
		case 0x95:
//			strcpy(name,"c.le.d");
			r.r_FCCout=ftemp_s.d<=ftemp_t.d;
			break; 
		case 0x96:
//			strcpy(name,"sqrt.s");
			r.f_fdout.f=sqrt((double)r.f_fs.f);
			break; 
		case 0x97:
//			strcpy(name,"sqrt.d");
			ftemp_d.d=sqrt(ftemp_s.d);
			r.f_fdout.l=ftemp_d.ll[0];
			r.f_fd2out.l=ftemp_d.ll[1];
			break; 
		case 0xa0:
//			strcpy(name,"syscall");
			break; 
		case 0xa1:
//			strcpy(name,"break");
			break; 
		case 0xa2:
//			strcpy(name,"lui");
			r.r_rtout=uimm<<16;
			break; 
		case 0xa3:
//			strcpy(name,"mfc1");
			r.r_rtout=r.f_fs.l;
			break; 
		case 0xa7:
//			strcpy(name,"dmfc1");
			r.r_rtout=r.f_fs.l;
			r.r_rt2out=r.f_fs2.l;
			break; 
		case 0xa4:
//			strcpy(name,"cfc1");
			break; 
		case 0xa5:
//			strcpy(name,"mtc1");
			r.f_fsout.l=r.r_rt;
			break; 
		case 0xa8:
//			strcpy(name,"dmtc1");
			r.f_fsout.l=r.r_rt;
			r.f_fs2out.l=r.r_rt2;
			break; 
		case 0xa6:
//			strcpy(name,"ctc1");
			break; 
		default:
//			strcpy(name,"undef");
			break;
	}


	//write results back to the reservation station
	resstat[ri].r_rtout=r.r_rtout;
	resstat[ri].r_rt2out=r.r_rt2out;
	resstat[ri].r_rdout=r.r_rdout;
	resstat[ri].r_HIout=r.r_HIout;
	resstat[ri].r_LOout=r.r_LOout;
	resstat[ri].r_raout=r.r_raout;

	resstat[ri].r_FCCout=r.r_FCCout;
	resstat[ri].f_fsout.l=r.f_fsout.l;
	resstat[ri].f_fs2out.l=r.f_fs2out.l;
	resstat[ri].f_ftout.l=r.f_ftout.l;
	resstat[ri].f_ft2out.l=r.f_ft2out.l;
	resstat[ri].f_fdout.l=r.f_fdout.l;
	resstat[ri].f_fd2out.l=r.f_fd2out.l;

}

//since stores are done in the commit stage, the actual stores are separated from
//doexecute and are done in ooo_dostore instead
void ooo_dostore(int ri)
{
	Reorder_Buffer_Entry r;

	int opcode=ROB[ri].instruction.inst_lower&0xff;
	int targ=(ROB[ri].instruction.inst_upper)&0x3ffffff;
        short int imm=(ROB[ri].instruction.inst_upper)&0xffff;
        unsigned int uimm=(ROB[ri].instruction.inst_upper)&0xffff;
        int shamt=(ROB[ri].instruction.inst_upper)&0xff;
        int bcode=(ROB[ri].instruction.inst_upper)&0xfffff;
        int i,sign1,sign2,op1,op2;

	r.r_rs=ROB[ri].r_rs;
	r.r_rt=ROB[ri].r_rt;
	r.r_rt2=ROB[ri].r_rt2;

	r.f_ft.l=ROB[ri].f_ft.l;
	r.f_ft2.l=ROB[ri].f_ft2.l;

	switch(opcode)
	{
		case 0x30:
//			strcpy(name,"sb");
			dcache_write(r.r_rs+imm, r.r_rt&0xFF);
			power_model(12,0,0);
			break; 
		case 0x32:
//			strcpy(name,"sh");
			dcache_write(r.r_rs+imm, r.r_rt&0xFF);
			dcache_write(r.r_rs+imm+1, (r.r_rt>>8)&0xFF);
			power_model(12,0,0);
			break; 
		case 0x34:
//			strcpy(name,"sw");
			dcache_write(r.r_rs+imm, r.r_rt&0xFF);
			dcache_write(r.r_rs+imm+1, (r.r_rt>>8)&0xFF);
			dcache_write(r.r_rs+imm+2, (r.r_rt>>16)&0xFF);
			dcache_write(r.r_rs+imm+3, (r.r_rt>>24)&0xFF);
			power_model(12,0,0);
			break; 
		case 0x35:
//			strcpy(name,"dsw");
			dcache_write(r.r_rs+imm, r.r_rt&0xFF);
			dcache_write(r.r_rs+imm+1, (r.r_rt>>8)&0xFF);
			dcache_write(r.r_rs+imm+2, (r.r_rt>>16)&0xFF);
			dcache_write(r.r_rs+imm+3, (r.r_rt>>24)&0xFF);
			dcache_write(r.r_rs+imm+4, r.r_rt2&0xFF);
			dcache_write(r.r_rs+imm+5, (r.r_rt2>>8)&0xFF);
			dcache_write(r.r_rs+imm+6, (r.r_rt2>>16)&0xFF);
			dcache_write(r.r_rs+imm+7, (r.r_rt2>>24)&0xFF);
			power_model(12,0,0);
			break; 
		case 0x36:
//			strcpy(name,"s.s");
			dcache_write(r.r_rs+imm, r.f_ft.l&0xFF);
			dcache_write(r.r_rs+imm+1, (r.f_ft.l>>8)&0xFF);
			dcache_write(r.r_rs+imm+2, (r.f_ft.l>>16)&0xFF);
			dcache_write(r.r_rs+imm+3, (r.f_ft.l>>24)&0xFF);
			power_model(12,0,0);
			break; 
		case 0x37:
//			strcpy(name,"s.d");
			dcache_write(r.r_rs+imm, r.f_ft.l&0xFF);
			dcache_write(r.r_rs+imm+1, (r.f_ft.l>>8)&0xFF);
			dcache_write(r.r_rs+imm+2, (r.f_ft.l>>16)&0xFF);
			dcache_write(r.r_rs+imm+3, (r.f_ft.l>>24)&0xFF);
			dcache_write(r.r_rs+imm+4, r.f_ft2.l&0xFF);
			dcache_write(r.r_rs+imm+5, (r.f_ft2.l>>8)&0xFF);
			dcache_write(r.r_rs+imm+6, (r.f_ft2.l>>16)&0xFF);
			dcache_write(r.r_rs+imm+7, (r.f_ft2.l>>24)&0xFF);
			power_model(12,0,0);
			break; 
		case 0x38:
//			strcpy(name,"dsz");
			dcache_write(r.r_rs+imm, 0);
			dcache_write(r.r_rs+imm+1, 0);
			dcache_write(r.r_rs+imm+2, 0);
			dcache_write(r.r_rs+imm+3, 0);
			dcache_write(r.r_rs+imm+4, 0);
			dcache_write(r.r_rs+imm+5, 0);
			dcache_write(r.r_rs+imm+6, 0);
			dcache_write(r.r_rs+imm+7, 0);
			power_model(12,0,0);
			break; 
		case 0x39:
//			strcpy(name,"swl");
                        i=r.r_rs+(short)imm;
                        dcache_write(i, (r.r_rt>>24)&0xFF);
                        if (i%4!=0)
                        {
                                dcache_write(i-1, (r.r_rt>>16)&0xFF);
                                if ((i-1)%4!=0)
                                {
                                        dcache_write(i-2, (r.r_rt>>8)&0xFF);
                                        if ((i-2)%4!=0)
                                                dcache_write(i-3, (r.r_rt)&0xFF);
                                }
                        }
			power_model(12,0,0);
                        break;
		case 0x3a:
//			strcpy(name,"swr");
                        i=r.r_rs+(short)imm;
                        dcache_write(i, (r.r_rt)&0xFF);
                        if ((i+1)%4!=0)
                        {
                                dcache_write(i+1, (r.r_rt>>8)&0xFF);
                                if ((i+2)%4!=0)
                                {
                                        dcache_write(i+2, (r.r_rt>>16)&0xFF);
                                        if ((i+3)%4!=0)
                                                dcache_write(i+3, (r.r_rt>>24)&0xFF);
                                }
                        }
			power_model(12,0,0);
			break; 
		case 0xc6:
//			strcpy(name,"sb");
			break; 
		case 0xc7:
//			strcpy(name,"sh");
			break; 
		case 0xc8:
//			strcpy(name,"sw");
			break; 
		case 0xd0:
//			strcpy(name,"dsw");
			break; 
		case 0xd1:
//			strcpy(name,"dsz");
			break; 
		case 0xc9:
//			strcpy(name,"s.s");
			break; 
		case 0xd2:
//			strcpy(name,"s.d");
			break; 
	}
}


//myloader.c
//Michael Black, 2006
//
//loads the benchmark program into virtual memory, writes the command line parameters
//to its stack
//
//a substantial amount of the code in myloader has been taken from simplescalar
//so that myloader will be compatible

#include <stdio.h>
#include "myloader.h"

//info on where each segment of the program is placed in memory
unsigned int Text_Start=0;
unsigned int Text_Size=0;
unsigned int Text_End=0;
unsigned int Data_Start=0;
unsigned int Data_Size=0;
unsigned int Bss_Start=0;
unsigned int Bss_Size=0;
unsigned int Prog_Entry=0;
unsigned int Rdata_Start=0;
unsigned int Rdata_Size=0;
unsigned int Stack_Start=0;
unsigned int Stack_Size=0;
unsigned int Param_Start=0;

//load actually loads the benchmark into memory
//progname is the name of the benchmark, argc is the number of arguments total (including simulator flags),
//argv are all the command line arguments (including simulator flags), envp are the environment variables,
//skip tells where the point where arguments should be passed to the benchmark program
void load(char progname[], int argc, char* argv[], char* envp[], int skip)
{
	FILE *fobj;
	struct filehdr fhdr;
	struct aouthdr ahdr;
	struct scnhdr shdr;
	long floc;
	int i,j;
	char *p;
	unsigned int sp;
	unsigned int argv_addr;
	unsigned int envp_addr;

	//open the file
	fobj=fopen(progname,"r");
	if (!fobj)
		fatal("File not found");

	//get the file header
	if (fread(&fhdr, sizeof(struct filehdr), 1, fobj)==0)
		fatal("No file header");

	//get the AOUT header
	if (fread(&ahdr, sizeof(struct aouthdr), 1, fobj)==0)
		fatal("No AOUT header");

	//0x162 tells that the program is little endian 
	if (fhdr.f_magic!=0x162)
		fatal("Bad file or wrong endian");

	//can set up the data segment from AOUT header
	Data_Start=ahdr.data_start;
	Data_Size=ahdr.dsize;
	Bss_Start=ahdr.bss_start;
	Bss_Size=ahdr.bsize;

	//set up stack
	Stack_Start=STACK_BASE;
	sp=STACK_BASE-MAX_PARAMS;
	Stack_Size=Stack_Start-sp;
	Param_Start=sp;

	//move to beginning of section headers
	fseek(fobj, sizeof(struct filehdr) + fhdr.f_opthdr, 0);
	//get location
	floc=ftell(fobj);
	//read in section headers
	for (i=0; i<fhdr.f_nscns; i++)
	{
		//reposition
		if (fseek(fobj, floc, 0)==-1)
			fatal("Cannot reposition in file");
		if (fread(&shdr, sizeof(struct scnhdr), 1, fobj) < 1)
			fatal("Cannot read section");
		floc = ftell(fobj);

		//read in sections
		switch(shdr.s_flags)
		{

			//text section
			case 0x0020:

			//find it
			if (fseek(fobj, shdr.s_scnptr, 0)==-1)
				fatal("Couldn't find .text");
			//make room for it
			p=(char*)calloc(shdr.s_size, sizeof(char));
			if (!p)
				fatal("Not enough memory to copy section");
			//copy it to temp buffer p
			if (fread(p, shdr.s_size, 1, fobj)<1)
				fatal("Couldn't read .text section");

			//put it into memory
			memory_write_array(shdr.s_vaddr, p, shdr.s_size);

			//free up p
			free(p);

			Text_Start=shdr.s_vaddr;
			Text_Size=shdr.s_size/4+10;
			Text_End=Text_Start+Text_Size*4;
			break;

			//rdata
			case 0x100:
			Rdata_Start=shdr.s_vaddr;
			Rdata_Size=shdr.s_size;

			//data
			case 0x40:
			//sdata
			case 0x200:

			//make space to copy section
			p=(char*)calloc(shdr.s_size, sizeof(char));
			if (!p)
				fatal("Not enough memory to copy section");
			fseek(fobj, shdr.s_scnptr, 0);
			//copy it to temp buffer p
			if (fread(p, shdr.s_size, 1, fobj)<1)
				fatal("Couldn't read .text section");
			//put it into memory
			memory_write_array(shdr.s_vaddr, p, shdr.s_size);
			//free up p
			free(p);

			break;

			//bss
			case 0x80:
			break;

			//sbss
			case 0x400:
			break;

		}
	}
	fclose(fobj);

	//where execution should start
	Prog_Entry=ahdr.entry;

	//put argc and argv onto stack
	//argc - remove parameters intended for simulator
	argc-=skip;
	memory_write(sp, argc&0x000000FF);
	memory_write(sp+1, (argc&0x0000FF00)>>8);
	memory_write(sp+2, (argc&0x00FF0000)>>16);
	memory_write(sp+3, (argc&0xFF000000)>>24);
	sp+=4;
	argv_addr=sp;
	sp=sp+(argc+1)*4;
	envp_addr=sp;
	for (i=0; envp[i]; i++)
		sp+=4;
	sp+=4;
	//fill in argv pointer array
	for (i=0; i<argc; i++)
	{
		memory_write(argv_addr+i*4, sp&0x000000FF);
		memory_write(argv_addr+i*4+1, (sp&0x0000FF00)>>8);
		memory_write(argv_addr+i*4+2, (sp&0x00FF0000)>>16);
		memory_write(argv_addr+i*4+3, (sp&0xFF000000)>>24);
		memory_write_array(sp, argv[i+skip], strlen(argv[i+skip]));
		sp+=strlen(argv[i+skip]);
		memory_write(sp,0);
		sp++;
	}
	//4 bytes of 0 separate argv from envp
	memory_write(argv_addr+i*4,0);
	memory_write(argv_addr+i*4+1,0);
	memory_write(argv_addr+i*4+2,0);
	memory_write(argv_addr+i*4+3,0);

	//fill in envp pointer array
	for (i=0; envp[i]; i++)
	{
		memory_write(envp_addr+i*4, sp&0x000000FF);
		memory_write(envp_addr+i*4+1, (sp&0x0000FF00)>>8);
		memory_write(envp_addr+i*4+2, (sp&0x00FF0000)>>16);
		memory_write(envp_addr+i*4+3, (sp&0xFF000000)>>24);
		memory_write_array(sp, envp[i], strlen(envp[i]));
		sp+=strlen(envp[i]);
		memory_write(sp, 0);
		sp++;
	}
	//4 bytes of 0 at the end of envp
	memory_write(envp_addr+i*4,0);
	memory_write(envp_addr+i*4+1,0);
	memory_write(envp_addr+i*4+2,0);
	memory_write(envp_addr+i*4+3,0);

	printf("\n");
	printf("Text starts at %x, is %x size, ends at %x\n",Text_Start,Text_Size,Text_End);
	printf("Data starts at %x, is %x size\n",Data_Start,Data_Size);
	printf("Stack starts at %x, is %x size\n",Stack_Start,Stack_Size);
	printf("Bss starts at %x, is %x size\n",Bss_Start,Bss_Size);
	printf("Rdata starts at %x, is %x size\n",Rdata_Start,Rdata_Size);
	printf("Program entry point is %x\n",Prog_Entry);
	printf("Initial stack pointer is at %x\n",Param_Start);
	printf("\n");
}
//mymain.c
//Michael Black, 2006
//
//This is the entry point into the simulator.  It initializes memory, interprets the command line,
//loads in the program, and starts the appropriate simulator.

#include "mymemory.h"

//number of instructions to run before terminating simulator (-1 = no limit)
int total_instructions=-1;
//number of instructions to run before starting debugger
int total_instructions_until_debug=-1;

//simulation type: 0=functional, 1=inorder
int simulation_type=0;

//whether to create an output file consisting of all reg. contents (1=yes)
int dump_trace=0;
int dump_skip_instructions=0;

//these are all defined in mysim.c
extern unsigned int PC;
extern unsigned int counter;
extern unsigned int instruction_counter;
extern unsigned int total_memory_pages_used;
extern unsigned int cycles_stalled_for_branches;
extern unsigned int cycles_stalled_for_loads;
extern unsigned int cycles_stalled_for_flushes;
extern unsigned int cycles_stalled_for_init;
extern int print_instruction;

//these are defined in mycache.c
extern unsigned int cache_exists;
extern unsigned int il1_cache_size;
extern unsigned int il2_cache_size;
extern unsigned int dl1_cache_size;
extern unsigned int dl2_cache_size;
extern unsigned int cache_block_size;
extern unsigned int il1_cache_sets;
extern unsigned int il2_cache_sets;
extern unsigned int dl1_cache_sets;
extern unsigned int dl2_cache_sets;
extern unsigned int d_cache_write_misses;
extern unsigned int d_cache_read_misses;
extern unsigned int i_cache_read_misses;
extern unsigned int d_cache_writes;
extern unsigned int d_cache_reads;
extern unsigned int i_cache_reads;
extern unsigned int il1_cache_replacements;
extern unsigned int il2_cache_replacements;
extern unsigned int dl1_cache_replacements;
extern unsigned int dl2_cache_replacements;
extern int init_il1_cache_hit_latency;
extern int init_il2_cache_hit_latency;
extern int init_il2_cache_miss_latency;
extern int init_dl1_cache_hit_latency;
extern int init_dl2_cache_hit_latency;
extern int init_dl2_cache_miss_latency;

//these are defined in mybpred.c
extern int BTB_bits;
extern int branch_predictor_type;

//these are defined in myvpred.c
extern int value_predict;
extern unsigned int vp_correct_made;
extern unsigned int vp_correct_notmade;
extern unsigned int vp_wrong_made;
extern unsigned int vp_wrong_notmade;
extern int value_context_history_size;
extern int past_value_number;
extern int tally_perceptron_weights;

//these are defined in mycritical.c
extern int criticality_type;
extern int criterion;
extern int perceptron_training_style;
extern int perceptron_training_threshold;
extern int perceptron_weight_growth;
extern int aliasing_reduction;
extern int criticality_history_size;
extern int criticality_predict_every;

//this is defined in mysyscall.c
extern unsigned int syscall_counter;

//these are defined in mysimoutorder.c
extern int power_simulate;
extern long double totaltime;
extern int reservation_station_number;
extern int integer_ALUs;
extern int integer_latency;
extern int integer_multdiv_ALUs;
extern int integer_multdiv_latency;
extern int float_ALUs;
extern int float_latency;
extern int float_multdiv_ALUs;
extern int float_multdiv_latency;
extern int reorder_buffer_size;
extern int lsq_size;
extern int ls_latency;
extern int dispatch_queue_size;
extern int fetches_per_cycle;
extern int init_clock_speed;
extern int clock_speed;

extern int addonecycle;
extern int subtractcritical;

//exit_routine() prints out statistics at the end of simulation
//it is called by main, if total_instructions is reached,
//or by the syscall exit routine, if the program terminates naturally
void exit_routine()
{
	printf("\n");
	if (simulation_type>=1)
	{
		printf("%u total instructions\n",instruction_counter);
		printf("%u cycles\n",counter);
		printf("%f instructions / cycle\n",(float)instruction_counter/(float)counter);
		if (simulation_type==1)
			printf("Cycles stalled for branches: %u, loads: %u, pipeline flushes: %u, initialization: %u\n", cycles_stalled_for_branches,cycles_stalled_for_loads,cycles_stalled_for_flushes,cycles_stalled_for_init); 
		else
		{
			printf("%f total simulated execution time\n", (float)totaltime);
			printf("%f instructions / second\n", (float)instruction_counter / (float)totaltime);

			power_model(-1,0,0);
		}
		printf("\n");
	}
	else
	{
		printf("%u total instructions\n",counter);
	}
	printf("%u bytes of memory used\n",PAGE_SIZE*total_memory_pages_used);
	if (cache_exists==1)
	{
		printf("%u i-cache reads, %u i-cache read misses, read hit rate = %f\n",i_cache_reads, i_cache_read_misses, (float)(i_cache_reads-i_cache_read_misses)/(float)i_cache_reads);
		printf("%u d-cache reads, %u d-cache read misses, read hit rate = %f\n",d_cache_reads, d_cache_read_misses, (float)(d_cache_reads-d_cache_read_misses)/(float)d_cache_reads);
		printf("%u d-cache writes, %u d-cache write misses, write hit rate = %f\n",d_cache_writes, d_cache_write_misses, (float)((float)d_cache_writes-(float)d_cache_write_misses)/(float)d_cache_writes);
		printf("%u level 1 instruction cache replacements\n",il1_cache_replacements);
		printf("%u level 2 instruction cache replacements\n",il2_cache_replacements);
		printf("%u level 1 data cache replacements\n",dl1_cache_replacements);
		printf("%u level 2 data cache replacements\n",dl2_cache_replacements);
	}
	if (value_predict>0)
	{
		printf("Value predictions:\n");
		printf("%u correct predictions made, %u incorrect predictions made\n",vp_correct_made,vp_wrong_made);
		printf("%u correct predictions not made, %u incorrect predictions not made\n",vp_correct_notmade,vp_wrong_notmade);
		printf("%f coverage, %f accuracy, %f potential accuracy\n",(float)(vp_correct_made+vp_wrong_made)/(float)(vp_correct_notmade+vp_wrong_notmade+vp_correct_made+vp_wrong_made),(float)vp_correct_made/(float)(vp_correct_made+vp_wrong_made),(float)(vp_correct_made+vp_correct_notmade)/(float)(vp_correct_made+vp_correct_notmade+vp_wrong_made+vp_wrong_notmade));

		dump_value_prediction_stats();
	}
	if (criticality_type>0)
	{
		dump_criticality();
	}
	printf("%u system calls\n",syscall_counter);
}

//dump() is used for debugging - it prints out the contents of part of memory
//addr specifies the starting address, entries is the number of bytes to print
void dump(unsigned int addr, int entries)
{
	int i;
	for (i=0; i<entries; i++)
	{
		if (i%8==0)
			printf("\n%x\t",addr+i);
		printf("%x\t",memory_read(addr+i));
	}
	printf("\n");
}

//command line flags
//ti = total instructions
//di = instructions until debugger starts
//pi = pipelined (inorder)
//fu = functional
//oo = out-of-order
//bb = branch target buffer bits
//bp = branch predictor type (0=none, 1=last branch)
//dt = dump trace
//mk = print out every ith instruction
//po = do Wattch power simulation

//value prediction flags
//vp = value predictor type (0=none)
//vh = value context history size
//pv = past value number
//cr = criticality predictor type (0=none)
//tw = tally perceptron weights (0=don't, 1=do)

//criticality flags
//a1 = add one cycle to every instruction
//sc = 1: -1 on predicted critical, 2: -1 on cycle before completion
//ct = criterion to be measured: 0=any, 1=QOLD, 2=QOLDDEP, 3=ALOLD, 4=QCONS
//pt = perceptron training style: 0=use error, 1=use desired value
//th = perceptron weight threshold
//wg = weight growth: 0=linear, 1=exponential
//ar = aliasing reduction: 0=none, 1=assigned seats, 2=assigned seats with cancellation
//ch = criticality global history size
//pe = train only every n iterations; 0=always train

//cache flags
//cb = cache block size
//ce = cache exists
//c1 = il1 cache size
//c2 = il2 cache size
//c3 = dl1 cache size
//c4 = dl2 cache size
//c5 = il1 cache sets
//c6 = il2 cache sets
//c7 = dl1 cache sets
//c8 = dl2 cache sets
//i1 = il1 hit latency
//i2 = il2 hit latency (il1 miss latency)
//i3 = il2 miss latency
//d1 = dl1 hit latency
//d2 = dl2 hit latency (dl1 miss latency)
//d3 = dl2 miss latency

//outoforder flags
//ia = integer ALUs
//il = integer latency
//ma = integer multiply/divide ALUs
//ml = integer multiple/divide latency
//fa = float ALUs
//fl = float latency
//da = float multiply/divide ALUs
//dl = float multiply/divide latency
//rb = reorder buffer size
//ls = load/store queue size
//ll = load/store address computation latency
//dq = dispatch queue size
//fc = fetches/cycle (fetch bandwidth)
//ic = instruction cache latency (time to fetch)
//dc = data cache latency (time to perform load/store)
//rs = number of reservation stations
//cl = clock speed

//getflagint parses a command line argument and returns the number
//getflagint("-ti1000") would return 1000 
int getflagint(char* arg)
{
	char buf[20];
	int i;

	i=0;
	while(arg[i+3]!='\0')
	{
		buf[i]=arg[i+3];
		i++;
	}
	buf[i]='\0';
	return atoi(buf);
}

//getflags parses the command line and sets the flags accordingly
void getflags(char* argv[], int entry)
{
	char buf[20];
	int i,j;

	if (argv[entry][1]=='t' && argv[entry][2]=='i')
		total_instructions=getflagint(argv[entry]);
	else if (argv[entry][1]=='d' && argv[entry][2]=='i')
		total_instructions_until_debug=getflagint(argv[entry]);
	else if (argv[entry][1]=='c' && argv[entry][2]=='b')
		cache_block_size=getflagint(argv[entry]);
	else if (argv[entry][1]=='b' && argv[entry][2]=='b')
		BTB_bits=getflagint(argv[entry]);
	else if (argv[entry][1]=='b' && argv[entry][2]=='p')
		branch_predictor_type=getflagint(argv[entry]);
	else if (argv[entry][1]=='v' && argv[entry][2]=='p')
		value_predict=getflagint(argv[entry]);
	else if (argv[entry][1]=='v' && argv[entry][2]=='h')
		value_context_history_size=getflagint(argv[entry]);
	else if (argv[entry][1]=='p' && argv[entry][2]=='v')
		past_value_number=getflagint(argv[entry]);
	else if (argv[entry][1]=='m' && argv[entry][2]=='k')
		print_instruction=getflagint(argv[entry]);
	else if (argv[entry][1]=='p' && argv[entry][2]=='o')
		power_simulate=1;
	else if (argv[entry][1]=='p' && argv[entry][2]=='i')
		simulation_type=1;
	else if (argv[entry][1]=='f' && argv[entry][2]=='u')
		simulation_type=0;
	else if (argv[entry][1]=='o' && argv[entry][2]=='o')
		simulation_type=2;
	else if (argv[entry][1]=='d' && argv[entry][2]=='t')
	{
		dump_trace=1;
		dump_skip_instructions=getflagint(argv[entry]);
	}
	else if (argv[entry][1]=='c' && argv[entry][2]=='e')
		cache_exists=1;
	else if (argv[entry][1]=='c' && argv[entry][2]=='l')
		init_clock_speed=getflagint(argv[entry]);
	else if (argv[entry][1]=='i' && argv[entry][2]=='a')
		integer_ALUs=getflagint(argv[entry]);
	else if (argv[entry][1]=='i' && argv[entry][2]=='l')
		integer_latency=getflagint(argv[entry]);
	else if (argv[entry][1]=='m' && argv[entry][2]=='a')
		integer_multdiv_ALUs=getflagint(argv[entry]);
	else if (argv[entry][1]=='m' && argv[entry][2]=='l')
		integer_multdiv_latency=getflagint(argv[entry]);
	else if (argv[entry][1]=='f' && argv[entry][2]=='a')
		float_ALUs=getflagint(argv[entry]);
	else if (argv[entry][1]=='f' && argv[entry][2]=='l')
		float_latency=getflagint(argv[entry]);
	else if (argv[entry][1]=='d' && argv[entry][2]=='a')
		float_multdiv_ALUs=getflagint(argv[entry]);
	else if (argv[entry][1]=='d' && argv[entry][2]=='l')
		float_multdiv_latency=getflagint(argv[entry]);
	else if (argv[entry][1]=='r' && argv[entry][2]=='b')
		reorder_buffer_size=getflagint(argv[entry]);
	else if (argv[entry][1]=='r' && argv[entry][2]=='s')
		reservation_station_number=getflagint(argv[entry]);
	else if (argv[entry][1]=='l' && argv[entry][2]=='s')
		lsq_size=getflagint(argv[entry]);
	else if (argv[entry][1]=='l' && argv[entry][2]=='l')
		ls_latency=getflagint(argv[entry]);
	else if (argv[entry][1]=='d' && argv[entry][2]=='q')
		dispatch_queue_size=getflagint(argv[entry]);
	else if (argv[entry][1]=='f' && argv[entry][2]=='c')
		fetches_per_cycle=getflagint(argv[entry]);
	else if (argv[entry][1]=='c' && argv[entry][2]=='1')
		il1_cache_size=getflagint(argv[entry]);
	else if (argv[entry][1]=='c' && argv[entry][2]=='2')
		il2_cache_size=getflagint(argv[entry]);
	else if (argv[entry][1]=='c' && argv[entry][2]=='3')
		dl1_cache_size=getflagint(argv[entry]);
	else if (argv[entry][1]=='c' && argv[entry][2]=='4')
		dl2_cache_size=getflagint(argv[entry]);
	else if (argv[entry][1]=='c' && argv[entry][2]=='5')
		il1_cache_sets=getflagint(argv[entry]);
	else if (argv[entry][1]=='c' && argv[entry][2]=='6')
		il2_cache_sets=getflagint(argv[entry]);
	else if (argv[entry][1]=='c' && argv[entry][2]=='7')
		dl1_cache_sets=getflagint(argv[entry]);
	else if (argv[entry][1]=='c' && argv[entry][2]=='8')
		dl2_cache_sets=getflagint(argv[entry]);
	else if (argv[entry][1]=='i' && argv[entry][2]=='1')
		init_il1_cache_hit_latency=getflagint(argv[entry]);
	else if (argv[entry][1]=='i' && argv[entry][2]=='2')
		init_il2_cache_hit_latency=getflagint(argv[entry]);
	else if (argv[entry][1]=='i' && argv[entry][2]=='3')
		init_il2_cache_miss_latency=getflagint(argv[entry]);
	else if (argv[entry][1]=='d' && argv[entry][2]=='1')
		init_dl1_cache_hit_latency=getflagint(argv[entry]);
	else if (argv[entry][1]=='d' && argv[entry][2]=='2')
		init_dl1_cache_hit_latency=getflagint(argv[entry]);
	else if (argv[entry][1]=='d' && argv[entry][2]=='3')
		init_dl2_cache_miss_latency=getflagint(argv[entry]);
	else if (argv[entry][1]=='c' && argv[entry][2]=='r')
		criticality_type=getflagint(argv[entry]);

	else if (argv[entry][1]=='a' && argv[entry][2]=='1')
		addonecycle=1;
	else if (argv[entry][1]=='s' && argv[entry][2]=='c')
		subtractcritical=getflagint(argv[entry]);
	else if (argv[entry][1]=='c' && argv[entry][2]=='t')
		criterion=getflagint(argv[entry]);
	else if (argv[entry][1]=='p' && argv[entry][2]=='t')
		perceptron_training_style=getflagint(argv[entry]);
	else if (argv[entry][1]=='t' && argv[entry][2]=='h')
		perceptron_training_threshold=getflagint(argv[entry]);
	else if (argv[entry][1]=='w' && argv[entry][2]=='g')
		perceptron_weight_growth=getflagint(argv[entry]);
	else if (argv[entry][1]=='a' && argv[entry][2]=='r')
		aliasing_reduction=getflagint(argv[entry]);
	else if (argv[entry][1]=='c' && argv[entry][2]=='h')
		criticality_history_size=getflagint(argv[entry]);
	else if (argv[entry][1]=='p' && argv[entry][2]=='e')
		criticality_predict_every=getflagint(argv[entry]);
	else if (argv[entry][1]=='t' && argv[entry][2]=='w')
		tally_perceptron_weights=getflagint(argv[entry]);
}

//error routine - if something is wrong in simulation,
//this is called to terminate the simulator
void fatal(const char* string)
{
        printf("%s\n", string);
        printf("Cycle: %u, PC: %x\n",counter,PC);
        printf("Simulation terminating\n");
        exit(0);
}

int main(int argc, char* argv[], char* envp[])
{
	int i;

	printf("Michael's PISA simulator\n");

	//check for flags
	for (i=1; i<argc; i++)
	{
		if (argv[i][0]!='-')
			break;
		getflags(argv, i);
	}	
	if (i==argc)
	{
		printf("No file name specified\n");
		exit(0);
	}

	//set up memory
	memory_init();

	//set up cache
	cache_init();

	//set up the branch target buffer
	BTBinit();

	//load the benchmark program
	//i points to the first command line parameter that the program
	//should be aware of (its own name)
	load(argv[i],argc,argv,envp,i);

	//start the appropriate simulator
	if (simulation_type==0)
	{
		printf("Starting functional simulation\n");
		dofuncsim();
	}
	else if (simulation_type==1)
	{
		printf("Starting inorder 5-stage pipeline simulation\n");
		doinordersim();
	}
	else if (simulation_type==2)
	{
		printf("Starting out-of-order simulation\n");
		dooutordersim();
	}

	//print out results
	exit_routine();

//	dump(0x7FFF7F8C, 0x10);

}
//mymemory.c
//Michael Black, 2006
//
//handles the virtual memory, takes care of reads and writes directly to virtual memory

#include "mymemory.h"

//page table - indexed by virtual page, gives physical page
int page_table[VIRTUAL_MEMORY_PAGES];

//actual RAM - each entry is a pointer to a page of memory
char* memory[PHYSICAL_MEMORY_PAGES];

//these are defined in mysim.c
extern unsigned int counter;
extern unsigned int PC;

//keeps track of how many pages of RAM have been allocated
unsigned int total_memory_pages_used=0;

//memory_init sets every page table entry to invalid
void memory_init()
{
	int i;

	//page table is initially empty - no pages in physical memory
	for (i=0; i<VIRTUAL_MEMORY_PAGES; i++)
		page_table[i]=-1;
}

//memory_addpage allocates a new page in memory and updates the page table accordingly
//right now, we won't have a swap strategy.  just kill program if too much memory needed
void memory_addpage(int page)
{
	memory[total_memory_pages_used]=(char*)calloc(PAGE_SIZE,1);
	if (memory[total_memory_pages_used]==0)
		fatal("Not enough physical memory");
	page_table[page]=total_memory_pages_used;
	total_memory_pages_used++;
}

//memory_write writes a byte to memory
//the page is added to virtual memory if it hasn't yet been accessed
void memory_write(unsigned int address, char thebyte)
{
	//is address in physical memory?
	if (page_table[address / PAGE_SIZE]==-1)
	{
		//nope.  better put it there
		memory_addpage(address / PAGE_SIZE);

	}
	memory[page_table[address / PAGE_SIZE]][address % PAGE_SIZE]=thebyte;
}

//memory_read reads a byte from memory
//the page is added to virtual memory if it hasn't yet been accessed
unsigned char memory_read(unsigned int address)
{
	//is address in physical memory?
	if (page_table[address / PAGE_SIZE]==-1)
	{
		//if not, let's bring it in
		memory_addpage(address / PAGE_SIZE);
	}
	return memory[page_table[address / PAGE_SIZE]][address % PAGE_SIZE];
}

//memory_read_word reads an int (4 bytes) starting at address
unsigned int memory_read_word(unsigned int address)
{
	int word;

	word=memory_read(address);
	word+=memory_read(address+1)<<8;
	word+=memory_read(address+2)<<16;
	word+=memory_read(address+3)<<24;

	return word;
}

//memory_read_array reads a specified number of bytes from memory into array
void memory_read_array(unsigned int address, char* array, int elements)
{
	int i;
	for (i=0; i<elements; i++)
	{
		//remove the byte from the cache
		cache_invalidate(address+i);
		array[i]=memory_read(address+i);
	}
}

//memory_write_array writes a specified number of bytes from array to memory
void memory_write_array(unsigned int address, char* array, int elements)
{
	int i;
	for (i=0; i<elements; i++)
	{
		//remove the byte from the cache
		cache_invalidate(address+i);
		memory_write(address+i, array[i]);
	}
}
//mysim.c
//Michael Black, 2006
//
//mysim contains the core of the simulator logic for both functional and 5-stage pipeline

#include "mysim.h"
#include <math.h>
#include <stdio.h>

//regdump is a debugging output file to contain the reg values for all instructions
FILE *regdump;

//statistics
//number of cycles
unsigned int counter=0;
//number of instructions
unsigned int instruction_counter=0;
unsigned int cycles_stalled_for_branches=0;
unsigned int cycles_stalled_for_loads=0;
unsigned int cycles_stalled_for_flushes=0;
unsigned int cycles_stalled_for_init=0;
//every "print_instruction"th instruction is dumped to screen (unless it's -1)
int print_instruction=-1;

//the register set
//PC
unsigned int PC;
//general purpose integer registers
int R[NUM_REGS+1];
//HI: holds upper 32 bits of multiplication output
int HI;
//LO: holds lower 32 bits of multiplication output
int LO;
//FCC: holds 1 or 0, used for branching based on FP registers
int FCC;
//general purpose floating point registers
Ftype F;

//defined in loader.c, inst.c, main.c
extern unsigned int Prog_Entry;
extern unsigned int Param_Start;
extern char name[];
extern int total_instructions;
extern int total_instructions_until_debug;
extern int dump_trace;
extern int dump_skip_instructions;

//pipeline registers
//FETCH-DECODE
//instruction
unsigned int fd_PC;
int fd_inst_upper;
int fd_inst_lower;
//tells whether the nop in the fetch stage is a stall (and reason for stall)
//reasons for stall:
//0 = legitimate nop, not a stall
//1 = stall due to a mispredicted branch
//2 = stall due to a load
//3 = stall due to pipeline flush (either from syscall or from .d inst)
//4 = stall due to initialization - pipeline is originally empty
int fd_is_stall;

//predicted_PC is a speculative PC used in the fetch stage and possibly undone in decode stage
unsigned int predicted_PC;
//tells whether a branch prediction is made.  contents are
//0 = no prediction (must stall one cycle after branch)
//1 = predicted "don't take"
//2 = predicted "take"
//3 = no branch pred., but value in predicted_PC to be used anyway (for jr and jalr)
int branch_prediction_made;

//DECODE-EXECUTE
//instruction
unsigned int de_PC;
int de_inst_upper;
int de_inst_lower;
//register contents generated by decode
//decode generates all of these whether they are used or not
int de_r_rs;
int de_r_rt;
//contents of rt+1 - need for double loads & stores
int de_r_rt2;
int de_r_rd;
fpr de_fs;
//contents of fs+1 - need for .d floating point operations (like add.d)
fpr de_fs2;
fpr de_ft;
//contents of ft+1 - also need for .d ops
fpr de_ft2;
fpr de_fd;
int de_HI;
int de_LO;
int de_FCC;
//is decode inst a stall?
int de_is_stall;

//EXECUTE-MEMORY
//instruction
unsigned int em_PC;
int em_inst_upper;
int em_inst_lower;
//computational outputs - used in writeback and as memory address
int em_alu_out;
//alu_out2 needed for mult.  
//it's also used for any inst that puts a value into LO (like mflo)
int em_alu_out2;
//floating point computational outputs
float em_alu_f_out;
double em_alu_d_out;
//registers values - still need them for stores at memory stage
int em_r_rt;
int em_r_rt2;
fpr em_ft;
fpr em_ft2;
//writeback flags: if they're set to 1, need to write alu to them
int em_write_rs;
int em_write_rt;
int em_write_rt2;
int em_write_rd; 
int em_write_fs_l;
int em_write_fs_l2;
int em_write_fs_f;
int em_write_fs_d;
int em_write_ft_l;
int em_write_ft_f;
int em_write_ft_d;
int em_write_fd_l;
int em_write_fd_f;
int em_write_fd_d;
int em_write_HI;
int em_write_LO;
int em_write_FCC;
int em_write_ra;
//memory writeback flags: if they're set to 1, need to write memory to them
int em_load_rt;
int em_load_rt2;
int em_load_ft;
int em_load_ft2;
//is nop in em a stall?
int em_is_stall;

//MEMORY-WRITEBACK
//instruction
unsigned int mw_PC;
int mw_inst_upper;
int mw_inst_lower;
//computational outputs
int mw_alu_out;
int mw_alu_out2;
float mw_alu_f_out;
double mw_alu_d_out;
//memory outputs - generated by loads
int mw_memory_out;
int mw_memory_out2;
//writeback flags
int mw_write_rs;
int mw_write_rt;
int mw_write_rt2;
int mw_write_rd; 
int mw_write_fs_l;
int mw_write_fs_l2;
int mw_write_fs_f;
int mw_write_fs_d;
int mw_write_ft_l;
int mw_write_ft_f;
int mw_write_ft_d;
int mw_write_fd_l;
int mw_write_fd_f;
int mw_write_fd_d;
int mw_write_HI;
int mw_write_LO;
int mw_write_FCC;
int mw_write_ra;
//memory writeback flags
int mw_load_rt;
int mw_load_rt2;
int mw_load_ft;
int mw_load_ft2;
//is nop in mw a stall?
int mw_is_stall;

//holds inst_lower of instruction that just completed writeback
//used in debugging
int last_instruction;
unsigned int last_instruction_PC;

//hazard flags
//fetch_stall, if 1, overwrites fetch stage with nop
int fetch_stall;
//fetch overwrite, if 1, means to redo the fetch (PC has changed due to j)
int fetch_overwrite;

//if these are set to 1, a load instruction is in decode and is writing to rt/rt+1/ft/ft+1
//need to check if the fetched instruction depends on the load and stall if it does
int fetch_load_stall_rt;
int fetch_load_stall_rt2;
int fetch_load_stall_ft;
int fetch_load_stall_ft2;

//if 1, stall the pipeline before / after the current instruction
int pipeline_flush_before;
int pipeline_flush_after;
//if 0, ignore the pipeline_flush_before.
//why? since inst will keep setting it every time it runs, we could keep flushing before forever
//better to flush before the first time the inst runs, then don't the next time
int pipeline_dualflush;

//print_pipeline prints out the contents of the pipeline and all the registers
void print_pipeline()
{
	int i;

	printf("Total cycles: %i\tTotal instructions: %i\tPC: %x\n",counter,instruction_counter,PC);
	get_instruction_name(fd_inst_upper,fd_inst_lower);
	printf("Fetch:  \t %x %s %i=%i,%i",fd_PC,name,(fd_inst_upper>>8)&0xff,(fd_inst_upper>>24),(fd_inst_upper>>16)&0xff);
	if (fd_is_stall!=0)
		printf(" (stall)");
	printf("\n");
	get_instruction_name(de_inst_upper,de_inst_lower);
	printf("Decode: \t %x %s %i=%i,%i",de_PC,name,(de_inst_upper>>8)&0xff,(de_inst_upper>>24),(de_inst_upper>>16)&0xff);
	if (de_is_stall!=0)
		printf(" (stall)");
	printf("\n");
	get_instruction_name(em_inst_upper,em_inst_lower);
	printf("Execute:\t %x %s %i=%i,%i",em_PC,name,(em_inst_upper>>8)&0xff,(em_inst_upper>>24),(em_inst_upper>>16)&0xff);
	if (em_is_stall!=0)
		printf(" (stall)");
	printf("\n");
	get_instruction_name(mw_inst_upper,mw_inst_lower);
	printf("Memory: \t %x %s %i=%i,%i",mw_PC,name,(mw_inst_upper>>8)&0xff,(mw_inst_upper>>24),(mw_inst_upper>>16)&0xff);
	if (mw_is_stall!=0)
		printf(" (stall)");
	printf("\n");
	get_instruction_name(0,last_instruction);
	printf("Completed instruction: %x %s\n",last_instruction_PC,name);
	printf("HI: %x\tLO: %x\tFCC: %x\n",HI,LO,FCC);
	for (i=0; i<=31; i+=2)
		printf("R[%i]:\t%x\tR[%i]:\t%x\n",i,R[i],i+1,R[i+1]);
	for (i=0; i<=31; i+=2)
	{
		printf("F[%i]:\t%x,%f,%lf\t",i,F.l[i],F.f[i],F.d[i>>1]);
		printf("F[%i]:\t%x,%f,%lf\n",i+1,F.l[i+1],F.f[i+1],F.d[(i+1)>>1]);
	}
	printf("\n");
}

//dumpregs outputs the contents of the registers to file trace.txt (regdump)
void dumpregs()
{
	int i;

	if (instruction_counter<dump_skip_instructions)
		return;

	fprintf(regdump,"@%i\n",instruction_counter);
	fprintf(regdump,"%x %x %x ",HI,LO,FCC);

	for (i=0; i<=31; i++)
		fprintf(regdump,"%x ",R[i]);
	for (i=0; i<=31; i++)
	{
		fprintf(regdump,"%x ",F.l[i]);
	}
	fprintf(regdump,"\n");
}

//printregs prints out the contents of all the registers (used in functional only)
void printregs()
{
	int i;
	printf("PC: %x\tHI: %x\tLO: %x\tFCC: %x\n",PC,HI,LO,FCC);
	for (i=0; i<=31; i+=2)
		printf("R[%i]:\t%x\tR[%i]:\t%x\n",i,R[i],i+1,R[i+1]);
	for (i=0; i<=31; i+=2)
	{
		printf("F[%i]:\t%x,%f,%lf\t",i,F.l[i],F.f[i],F.d[i>>1]);
		printf("F[%i]:\t%x,%f,%lf\n",i+1,F.l[i+1],F.f[i+1],F.d[(i+1)>>1]);
	}
}

//pipeline_flush substitutes nops for all the instructions in the pipeline
//it then sets the PC to newPC and runs that instruction next
void pipeline_flush(int newPC)
{
	PC=newPC;

	//nop instruction is 0
	fd_inst_upper=0;
	fd_inst_lower=0;
	de_inst_upper=0;
	de_inst_lower=0;
	em_inst_upper=0;
	em_inst_lower=0;
	mw_inst_upper=0;
	mw_inst_lower=0;
	fd_is_stall=3;
	de_is_stall=3;
	em_is_stall=3;
	mw_is_stall=3;

	//turn off all writeback flags at mw stage
	mw_write_rs=0;
	mw_write_rt=0;
	mw_write_rt2=0;
	mw_write_rd=0; 
	mw_write_fs_l=0;
	mw_write_fs_l2=0;	
	mw_write_fs_f=0;
	mw_write_fs_d=0;
	mw_write_ft_l=0;
	mw_write_ft_f=0;
	mw_write_ft_d=0;
	mw_write_fd_l=0;
	mw_write_fd_f=0;
	mw_write_fd_d=0;
	mw_write_HI=0;
	mw_write_LO=0;
	mw_write_FCC=0;
	mw_write_ra=0;
	mw_load_rt=0;
	mw_load_rt2=0;
	mw_load_ft=0;
	mw_load_ft2=0;

	//turn off all writeback flags at em stage
	em_write_rs=0;
	em_write_rt=0;
	em_write_rt2=0;
	em_write_rd=0; 
	em_write_fs_l=0;
	em_write_fs_l2=0;	
	em_write_fs_f=0;
	em_write_fs_d=0;
	em_write_ft_l=0;
	em_write_ft_f=0;
	em_write_ft_d=0;
	em_write_fd_l=0;
	em_write_fd_f=0;
	em_write_fd_d=0;
	em_write_HI=0;
	em_write_LO=0;
	em_write_FCC=0;
	em_write_ra=0;
	em_load_rt=0;
	em_load_rt2=0;
	em_load_ft=0;
	em_load_ft2=0;

	//the next instruction (at newPC) is not speculative
	branch_prediction_made=0;
}

//stage_writeback handles the writeback stage
//if any writeback flags are set, the alu/mem is written to the appropriate regs
//if a syscall, flush the pipeline
void stage_writeback()
{
        int rs=(mw_inst_upper>>24);
        int rt=(mw_inst_upper>>16)&0xff;
        int rd=(mw_inst_upper>>8)&0xff;
        int fs=(mw_inst_upper>>24);
        int ft=(mw_inst_upper>>16)&0xff;
        int fd=(mw_inst_upper>>8)&0xff;

	//check for syscall - if so, flush the pipeline, handle it
	if ((mw_inst_lower&0xff)==0xa0)
	{
		//run the inst after the syscall next
		pipeline_flush(mw_PC+8);
		//don't lose the syscall inst - we want it for computing stats
		mw_inst_lower=0xa0;
		mw_is_stall=0;
		//handle the syscall
		handle_syscalls();
	}

	//if pipeline_flush_before is set, flush the pipeline, rerun the instruction
	if (pipeline_flush_before==1 && pipeline_dualflush==1)
	{
		pipeline_flush(mw_PC);
	}

	//write back the alu for computational instructions
	if (mw_write_rs==1)
		R[rs]=mw_alu_out;
	if (mw_write_rt==1)
		R[rt]=mw_alu_out;
	if (mw_write_rt2==1)
		R[rt+1]=mw_alu_out2;
	if (mw_write_rd==1)
		R[rd]=mw_alu_out;
	if (mw_write_fs_l==1)
		F.l[fs]=mw_alu_out;
	if (mw_write_fs_l2==1)
		F.l[fs+1]=mw_alu_out2;
	if (mw_write_fs_f==1)
		F.f[fs]=mw_alu_f_out;
	if (mw_write_fs_d==1)
		F.d[fs>>1]=mw_alu_d_out;
	if (mw_write_ft_l==1)
		F.l[ft]=mw_alu_out;
	if (mw_write_ft_f==1)
		F.f[ft]=mw_alu_f_out;
	if (mw_write_ft_d==1)
		F.d[ft>>1]=mw_alu_d_out;
	if (mw_write_fd_l==1)
		F.l[fd]=mw_alu_out;
	if (mw_write_fd_f==1)
		F.f[fd]=mw_alu_f_out;
	if (mw_write_fd_d==1)
		F.d[fd>>1]=mw_alu_d_out;
	if (mw_write_HI==1)
		HI=mw_alu_out;
	if (mw_write_LO==1)
		LO=mw_alu_out2;
	if (mw_write_FCC==1)
		FCC=mw_alu_out;
	if (mw_write_ra==1)
		R[31]=mw_alu_out;

	//write back the memory_out for load instructions
	if (mw_load_rt==1)
		R[rt]=mw_memory_out;
	if (mw_load_rt2==1)
		R[rt+1]=mw_memory_out2;
	if (mw_load_ft==1)
		F.l[ft]=mw_memory_out;
	if (mw_load_ft2==1)
		F.l[ft+1]=mw_memory_out2;

	//R[0] should be hardwired to 0
	R[0]=0;

	//if the instruction was not a stall, update the instruction counter
	//this will tell us the IPC later
	if(mw_is_stall==0)
	{
		instruction_counter++;
		last_instruction=mw_inst_lower&0xff;
		last_instruction_PC=mw_PC;

		if (dump_trace==1)
			dumpregs();
	}
	else
	{
		//if it was a stall, let's keep track of why
		if(mw_is_stall==1)
			cycles_stalled_for_branches++;
		if(mw_is_stall==2)
			cycles_stalled_for_loads++;
		if(mw_is_stall==3)
			cycles_stalled_for_flushes++;
		if(mw_is_stall==4)
			cycles_stalled_for_init++;
	}

	//if pipeline_flush_after is set, we can flush now since the instruction is done
	if (pipeline_flush_after==1 && pipeline_dualflush==0)
	{
		pipeline_flush(mw_PC+8);
	}
}

//stage_memory handles the memory stage of the pipeline
//it does the reads and writes for the load & store instructions
void stage_memory()
{
	mw_inst_upper=em_inst_upper;
	mw_inst_lower=em_inst_lower;
	mw_PC=em_PC;
	mw_is_stall=em_is_stall;

	//just pass all the flags along
        mw_write_rs=em_write_rs;
        mw_write_rt=em_write_rt;
        mw_write_rt2=em_write_rt2;
        mw_write_rd=em_write_rd;
        mw_write_fs_l=em_write_fs_l;
        mw_write_fs_l2=em_write_fs_l2;  
        mw_write_fs_f=em_write_fs_f;     
        mw_write_fs_d=em_write_fs_d;
        mw_write_ft_l=em_write_ft_l;
        mw_write_ft_f=em_write_ft_f;   
        mw_write_ft_d=em_write_ft_d;
        mw_write_fd_l=em_write_fd_l;
        mw_write_fd_f=em_write_fd_f;
        mw_write_fd_d=em_write_fd_d;   
        mw_write_ra=em_write_ra;
        mw_write_HI=em_write_HI;
        mw_write_LO=em_write_LO;
        mw_write_FCC=em_write_FCC;

	mw_alu_out=em_alu_out;
	mw_alu_out2=em_alu_out2;
	mw_alu_f_out=em_alu_f_out;
	mw_alu_d_out=em_alu_d_out;

	mw_load_rt=em_load_rt;
	mw_load_rt2=em_load_rt2;
	mw_load_ft=em_load_ft;
	mw_load_ft2=em_load_ft2;

	//do the memory read/write
	domemory();
}

//stage_execute handles the execute stage of the pipeline
//it does the instruction computations
void stage_execute()
{
	em_inst_upper=de_inst_upper;
	em_inst_lower=de_inst_lower;
	em_PC=de_PC;
	em_is_stall=de_is_stall;

	//pass the rt contents along - we may need them for stores
	em_r_rt=de_r_rt;
	em_r_rt2=de_r_rt2;
	em_ft=de_ft;
	em_ft2=de_ft2;

	//do the instruction
	doexecute();
}

//decode_data_forward handles the data forwarding from memory to decode and from execute to decode
//output registers are compared with the decode registers.  if they are equal, the decode register values
//are overwritten with the results from execute and memory
//memory is handled first, execute second, because execute might overwrite memory (being a later instruction)
void decode_data_forward()
{
	//decode stage register numbers
        int rs=(de_inst_upper>>24);
        int rt=(de_inst_upper>>16)&0xff;
	int rt2=rt+1;
        int rd=(de_inst_upper>>8)&0xff;
        int fs=(de_inst_upper>>24);
	int fs2=fs+1;
        int ft=(de_inst_upper>>16)&0xff;
	int ft2=ft+1;
        int fd=(de_inst_upper>>8)&0xff;
	//execute stage register numbers
        int emrs=(em_inst_upper>>24);
        int emrt=(em_inst_upper>>16)&0xff;
	int emrt2=emrt+1;
        int emrd=(em_inst_upper>>8)&0xff;
        int emfs=(em_inst_upper>>24);
	int emfs2=emfs+1;
        int emft=(em_inst_upper>>16)&0xff;
        int emfd=(em_inst_upper>>8)&0xff;
	//memory stage register numbers
        int mwrs=(mw_inst_upper>>24);
        int mwrt=(mw_inst_upper>>16)&0xff;
	int mwrt2=mwrt+1;
        int mwrd=(mw_inst_upper>>8)&0xff;
        int mwfs=(mw_inst_upper>>24);
	int mwfs2=mwfs+1;
        int mwft=(mw_inst_upper>>16)&0xff;
	int mwft2=mwft+1;
        int mwfd=(mw_inst_upper>>8)&0xff;

	//data forwarding - check output of memory stage & overwrite register values if needed
	//if the memory stage is writing to the rs register:
	if (mw_write_rs==1)
	{
		//if the memory stage rs register is the same as the decode stage rs register
		if (mwrs==rs)
			//copy the memory stage computational output to the decode stage rs
			de_r_rs=mw_alu_out;
		//if the memory stage rs register is the same as the decode stage rt register
		if (mwrs==rt)
			//copy the memory stage computational output to the decode stage rt
			de_r_rt=mw_alu_out;
		//if the memory stage rs register is the same as the decode stage rt+1 register
		if (mwrs==rt2)
			//copy the memory stage computational output to the decode stage rt+1
			de_r_rt2=mw_alu_out;
		//if the memory stage rs register is the same as the decode stage rd register
		if (mwrs==rd)
			//copy the memory stage computational output to the decode stage rd
			de_r_rd=mw_alu_out;
	}
	//same for rt
	if (mw_write_rt==1)
	{
		if (mwrt==rs)
			de_r_rs=mw_alu_out;
		if (mwrt==rt)
			de_r_rt=mw_alu_out;
		if (mwrt==rt2)
			de_r_rt2=mw_alu_out;
		if (mwrt==rd)
			de_r_rd=mw_alu_out;
	}
	//and for rt+1
	if (mw_write_rt2==1)
	{
		if (mwrt2==rs)
			de_r_rs=mw_alu_out2;
		if (mwrt2==rt)
			de_r_rt=mw_alu_out2;
		if (mwrt2==rt2)
			de_r_rt2=mw_alu_out2;
		if (mwrt2==rd)
			de_r_rd=mw_alu_out2;
	}
	//and for rd
	if (mw_write_rd==1)
	{
		if (mwrd==rs)
			de_r_rs=mw_alu_out;
		if (mwrd==rt)
			de_r_rt=mw_alu_out;
		if (mwrd==rt2)
			de_r_rt2=mw_alu_out;
		if (mwrd==rd)
			de_r_rd=mw_alu_out;
	}
	//we need to do this again with floating point registers
	//this needs to be done 3 times: for integer, float, and double versions of fp regs
	if (mw_write_fs_l==1)
	{
		if (mwfs==fs)
			de_fs.l=mw_alu_out;
		if (mwfs==fs2)
			de_fs2.l=mw_alu_out;
		if (mwfs==ft)
			de_ft.l=mw_alu_out;
		if (mwfs==ft2)
			de_ft2.l=mw_alu_out;
		if (mwfs==fd)
			de_fd.l=mw_alu_out;
	}
	if (mw_write_fs_l2==1)
	{
		if (mwfs2==fs)
			de_fs.l=mw_alu_out2;
		if (mwfs2==fs2)
			de_fs2.l=mw_alu_out2;
		if (mwfs2==ft)
			de_ft.l=mw_alu_out2;
		if (mwfs2==ft2)
			de_ft2.l=mw_alu_out2;
		if (mwfs2==fd)
			de_fd.l=mw_alu_out2;
	}
	if (mw_write_ft_l==1)
	{
		if (mwft==fs)
			de_fs.l=mw_alu_out;
		if (mwft==fs2)
			de_fs2.l=mw_alu_out;
		if (mwft==ft)
			de_ft.l=mw_alu_out;
		if (mwft==ft2)
			de_ft2.l=mw_alu_out;
		if (mwft==fd)
			de_fd.l=mw_alu_out;
	}
	if (mw_write_fd_l==1)
	{
		if (mwfd==fs)
			de_fs.l=mw_alu_out;
		if (mwfd==fs2)
			de_fs2.l=mw_alu_out;
		if (mwfd==ft)
			de_ft.l=mw_alu_out;
		if (mwfd==ft2)
			de_ft2.l=mw_alu_out;
		if (mwfd==fd)
			de_fd.l=mw_alu_out;
	}
	if (mw_write_fs_f==1)
	{
		if (mwfs==fs)
			de_fs.f=mw_alu_f_out;
		if (mwfs==fs2)
			de_fs2.f=mw_alu_f_out;
		if (mwfs==ft)
			de_ft.f=mw_alu_f_out;
		if (mwfs==ft2)
			de_ft2.f=mw_alu_f_out;
		if (mwfs==fd)
			de_fd.f=mw_alu_f_out;
	}
	if (mw_write_ft_f==1)
	{
		if (mwft==fs)
			de_fs.f=mw_alu_f_out;
		if (mwft==fs2)
			de_fs2.f=mw_alu_f_out;
		if (mwft==ft)
			de_ft.f=mw_alu_f_out;
		if (mwft==ft2)
			de_ft2.f=mw_alu_f_out;
		if (mwft==fd)
			de_fd.f=mw_alu_f_out;
	}
	if (mw_write_fd_f==1)
	{
		if (mwfd==fs)
			de_fs.f=mw_alu_f_out;
		if (mwfd==fs2)
			de_fs2.f=mw_alu_f_out;
		if (mwfd==ft)
			de_ft.f=mw_alu_f_out;
		if (mwfd==ft2)
			de_ft2.f=mw_alu_f_out;
		if (mwfd==fd)
			de_fd.f=mw_alu_f_out;
	}
	if (mw_write_fs_d==1)
	{
		if (mwfs==fs)
			de_fs.d=mw_alu_d_out;
		if (mwfs==fs2)
			de_fs.d=mw_alu_d_out;
		if (mwfs==ft)
			de_ft.d=mw_alu_d_out;
		if (mwfs==ft2)
			de_ft2.d=mw_alu_d_out;
		if (mwfs==fd)
			de_fd.d=mw_alu_d_out;
	}
	if (mw_write_ft_d==1)
	{
		if (mwft==fs)
			de_fs.d=mw_alu_d_out;
		if (mwft==fs2)
			de_fs2.d=mw_alu_d_out;
		if (mwft==ft)
			de_ft.d=mw_alu_d_out;
		if (mwft==ft2)
			de_ft2.d=mw_alu_d_out;
		if (mwft==fd)
			de_fd.d=mw_alu_d_out;
	}
	if (mw_write_fd_d==1)
	{
		if (mwfd==fs)
			de_fs.d=mw_alu_d_out;
		if (mwfd==fs2)
			de_fs2.d=mw_alu_d_out;
		if (mwfd==ft)
			de_ft.d=mw_alu_d_out;
		if (mwfd==ft2)
			de_ft2.d=mw_alu_d_out;
		if (mwfd==fd)
			de_fd.d=mw_alu_d_out;
	}
	//also might need to forward the ra register if the memory inst was a jal or jalr
	if (mw_write_ra==1)
	{
		if (31==rs)
			de_r_rs=mw_alu_out;
		if (31==rt)
			de_r_rt=mw_alu_out;
		if (31==rt2)
			de_r_rt2=mw_alu_out;
		if (31==rd)
			de_r_rd=mw_alu_out;
	}
	//forward FCC
	if (mw_write_FCC==1)
	{
		de_FCC=mw_alu_out;
	}
	//forward HI
	if (mw_write_HI==1)
	{
		de_HI=mw_alu_out;
	}
	//forward LO
	if (mw_write_LO==1)
	{
		de_LO=mw_alu_out2;
	}
	//the above forwarding was only for computational instructions
	//load instruction results may also need to be forwarded
	if (mw_load_rt==1)
	{
		if (mwrt==rs)
			de_r_rs=mw_memory_out;
		if (mwrt==rt)
			de_r_rt=mw_memory_out;
		if (mwrt==rt2)
			de_r_rt2=mw_memory_out;
		if (mwrt==rd)
			de_r_rd=mw_memory_out;
	}
	if (mw_load_rt2==1)
	{
		if (mwrt2==rs)
			de_r_rs=mw_memory_out2;
		if (mwrt2==rt)
			de_r_rt=mw_memory_out2;
		if (mwrt2==rt2)
			de_r_rt2=mw_memory_out2;
		if (mwrt2==rd)
			de_r_rd=mw_memory_out2;
	}
	if (mw_load_ft==1)
	{
		if (mwft==fs)
			de_fs.l=mw_memory_out;
		if (mwft==fs2)
			de_fs2.l=mw_memory_out;
		if (mwft==ft)
			de_ft.l=mw_memory_out;
		if (mwft==ft2)
			de_ft2.l=mw_memory_out;
		if (mwft==fd)
			de_fd.l=mw_memory_out;
	}
	if (mw_load_ft2==1)
	{
		if (mwft2==fs)
			de_fs.l=mw_memory_out2;
		if (mwft2==fs2)
			de_fs2.l=mw_memory_out2;
		if (mwft2==ft)
			de_ft.l=mw_memory_out2;
		if (mwft2==ft2)
			de_ft2.l=mw_memory_out2;
		if (mwft2==fd)
			de_fd.l=mw_memory_out2;
	}


	//data forwarding - check output of execute stage & overwrite register values if needed
	//this is identical to above, except that load results aren't forwarded
	if (em_write_rs==1)
	{
		if (emrs==rs)
			de_r_rs=em_alu_out;
		if (emrs==rt)
			de_r_rt=em_alu_out;
		if (emrs==rt2)
			de_r_rt2=em_alu_out;
		if (emrs==rd)
			de_r_rd=em_alu_out;
	}
	if (em_write_rt==1)
	{
		if (emrt==rs)
			de_r_rs=em_alu_out;
		if (emrt==rt)
			de_r_rt=em_alu_out;
		if (emrt==rt2)
			de_r_rt2=em_alu_out;
		if (emrt==rd)
			de_r_rd=em_alu_out;
	}
	if (em_write_rt2==1)
	{
		if (emrt2==rs)
			de_r_rs=em_alu_out2;
		if (emrt2==rt)
			de_r_rt=em_alu_out2;
		if (emrt2==rt2)
			de_r_rt2=em_alu_out2;
		if (emrt2==rd)
			de_r_rd=em_alu_out2;
	}
	if (em_write_rd==1)
	{
		if (emrd==rs)
			de_r_rs=em_alu_out;
		if (emrd==rt)
			de_r_rt=em_alu_out;
		if (emrd==rt2)
			de_r_rt2=em_alu_out;
		if (emrd==rd)
			de_r_rd=em_alu_out;
	}
	if (em_write_fs_l==1)
	{
		if (emfs==fs)
			de_fs.l=em_alu_out;
		if (emfs==fs2)
			de_fs2.l=em_alu_out;
		if (emfs==ft)
			de_ft.l=em_alu_out;
		if (emfs==ft2)
			de_ft2.l=em_alu_out;
		if (emfs==fd)
			de_fd.l=em_alu_out;
	}
	if (em_write_fs_l2==1)
	{
		if (emfs2==fs)
			de_fs.l=em_alu_out2;
		if (emfs2==fs2)
			de_fs2.l=em_alu_out2;
		if (emfs2==ft)
			de_ft.l=em_alu_out2;
		if (emfs2==ft2)
			de_ft2.l=em_alu_out2;
		if (emfs2==fd)
			de_fd.l=em_alu_out2;
	}
	if (em_write_ft_l==1)
	{
		if (emft==fs)
			de_fs.l=em_alu_out;
		if (emft==fs2)
			de_fs2.l=em_alu_out;
		if (emft==ft)
			de_ft.l=em_alu_out;
		if (emft==ft2)
			de_ft2.l=em_alu_out;
		if (emft==fd)
			de_fd.l=em_alu_out;
	}
	if (em_write_fd_l==1)
	{
		if (emfd==fs)
			de_fs.l=em_alu_out;
		if (emfd==fs2)
			de_fs2.l=em_alu_out;
		if (emfd==ft)
			de_ft.l=em_alu_out;
		if (emfd==ft2)
			de_ft2.l=em_alu_out;
		if (emfd==fd)
			de_fd.l=em_alu_out;
	}
	if (em_write_fs_f==1)
	{
		if (emfs==fs)
			de_fs.f=em_alu_f_out;
		if (emfs==fs2)
			de_fs2.f=em_alu_f_out;
		if (emfs==ft)
			de_ft.f=em_alu_f_out;
		if (emfs==ft2)
			de_ft2.f=em_alu_f_out;
		if (emfs==fd)
			de_fd.f=em_alu_f_out;
	}
	if (em_write_ft_f==1)
	{
		if (emft==fs)
			de_fs.f=em_alu_f_out;
		if (emft==fs2)
			de_fs2.f=em_alu_f_out;
		if (emft==ft)
			de_ft.f=em_alu_f_out;
		if (emft==ft2)
			de_ft2.f=em_alu_f_out;
		if (emft==fd)
			de_fd.f=em_alu_f_out;
	}
	if (em_write_fd_f==1)
	{
		if (emfd==fs)
			de_fs.f=em_alu_f_out;
		if (emfd==fs2)
			de_fs2.f=em_alu_f_out;
		if (emfd==ft)
			de_ft.f=em_alu_f_out;
		if (emfd==ft2)
			de_ft2.f=em_alu_f_out;
		if (emfd==fd)
			de_fd.f=em_alu_f_out;
	}
	if (em_write_fs_d==1)
	{
		if (emfs==fs)
			de_fs.d=em_alu_d_out;
		if (emfs==fs2)
			de_fs2.d=em_alu_d_out;
		if (emfs==ft)
			de_ft.d=em_alu_d_out;
		if (emfs==ft2)
			de_ft2.d=em_alu_d_out;
		if (emfs==fd)
			de_fd.d=em_alu_d_out;
	}
	if (em_write_ft_d==1)
	{
		if (emft==fs)
			de_fs.d=em_alu_d_out;
		if (emft==fs2)
			de_fs2.d=em_alu_d_out;
		if (emft==ft)
			de_ft.d=em_alu_d_out;
		if (emft==ft2)
			de_ft2.d=em_alu_d_out;
		if (emft==fd)
			de_fd.d=em_alu_d_out;
	}
	if (em_write_fd_d==1)
	{
		if (emfd==fs)
			de_fs.d=em_alu_d_out;
		if (emfd==fs2)
			de_fs2.d=em_alu_d_out;
		if (emfd==ft)
			de_ft.d=em_alu_d_out;
		if (emfd==ft2)
			de_ft2.d=em_alu_d_out;
		if (emfd==fd)
			de_fd.d=em_alu_d_out;
	}
	if (em_write_ra==1)
	{
		if (31==rs)
			de_r_rs=em_alu_out;
		if (31==rt)
			de_r_rt=em_alu_out;
		if (31==rt2)
			de_r_rt2=em_alu_out;
		if (31==rd)
			de_r_rd=em_alu_out;
	}
	if (em_write_FCC==1)
	{
		de_FCC=em_alu_out;
	}
	if (em_write_HI==1)
	{
		de_HI=em_alu_out;
	}
	if (em_write_LO==1)
	{
		de_LO=em_alu_out2;
	}
}

//stage_decode handles the decode stage of the pipeline
//decode reads the register contents into pipeline registers
//it also handles data forwarding from mem and ex, and takes care of mispredicted branches
void stage_decode()
{
	de_inst_upper=fd_inst_upper;
	de_inst_lower=fd_inst_lower;
	de_PC=fd_PC;
	de_is_stall=fd_is_stall;
        int rs=(de_inst_upper>>24);
        int rt=(de_inst_upper>>16)&0xff;
        int rd=(de_inst_upper>>8)&0xff;
        int fs=(de_inst_upper>>24);
        int ft=(de_inst_upper>>16)&0xff;
        int fd=(de_inst_upper>>8)&0xff;

	//read all the registers that might be used by the decode instruction into the pipeline registers
	de_HI=HI;
	de_LO=LO;
	de_FCC=FCC;
	if (rs<=NUM_REGS)
		de_r_rs=R[rs];
	if (rt<=NUM_REGS)
		de_r_rt=R[rt];
	if (rt+1<=NUM_REGS)
		de_r_rt2=R[rt+1];
	if (rd<=NUM_REGS)
		de_r_rd=R[rd];

	if (rs<=NUM_REGS)
		de_fs.d=F.d[rs>>1];
	if (rt<=NUM_REGS)
		de_ft.d=F.d[rt>>1];
	if (rd<=NUM_REGS)
		de_fd.d=F.d[rd>>1];
	if (rs<=NUM_REGS)
		de_fs.l=F.l[rs];
	if (rs+1<=NUM_REGS)
		de_fs2.l=F.l[rs+1];
	if (rt<=NUM_REGS)
		de_ft.l=F.l[rt];
	if (rt+1<=NUM_REGS)
		de_ft2.l=F.l[rt+1];
	if (rd<=NUM_REGS)
		de_fd.l=F.l[rd];
	if (rs<=NUM_REGS)
		de_fs.f=F.f[rs];
	if (rt<=NUM_REGS)
		de_ft.f=F.f[rt];
	if (rd<=NUM_REGS)
		de_fd.f=F.f[rd];

	//forward data from the memory and execute stages whose output registers = decode registers
	decode_data_forward();

	//take care of branch instructions that must be resolved at decode
	dodecode();
}

//fetch_check_loads checks whether an instruction following a load instruction depends on the load
//if so, it needs to be stalled for a cycle, since load results aren't known until memory
int fetch_check_loads()
{
	//get the register numbers used by the inst following load
        int fdrs=(fd_inst_upper>>24);
        int fdrt=(fd_inst_upper>>16)&0xff;
	int fdrt2=fdrt+1;
        int fdfs=(fd_inst_upper>>24);
	int fdfs2=fdfs+1;
        int fdft=(fd_inst_upper>>16)&0xff;
	int fdft2=fdft+1;

	//get the target register numbers for load
        int dert=(de_inst_upper>>16)&0xff;
	int dert2=dert+1;
        int deft=(de_inst_upper>>16)&0xff;
	int deft2=deft+1;

	//if the load wrote to register rt:
	if (fetch_load_stall_rt==1)
	{
		//if the load's rt is the same as the fetch's rs, rt, or rt+1, must stall (return 1)
		if (dert==fdrs || dert==fdrt || dert==fdrt2)
			return 1;
	}
	if (fetch_load_stall_rt2==1)
	{
		if (dert2==fdrs || dert2==fdrt || dert2==fdrt2)
			return 1;
	}
	if (fetch_load_stall_ft==1)
	{
		if (deft==fdfs || deft==fdfs2 || deft==fdft || deft==fdft2)
			return 1;
	}
	if (fetch_load_stall_ft2==1)
	{
		if (deft2==fdfs || deft2==fdfs2 || deft2==fdft || deft2==fdft2)
			return 1;
	}
	//since there were no conflicts, we don't need to stall (return 0)
	return 0;
}

//stage_fetch handles the fetch stage of the pipeline
//the new instruction is read from memory, and PC is moved along to PC+8
//if the new instruction is a branch, need to find the next PC
//if the decode instruction is a load, may need to kill this instruction
void stage_fetch()
{
	//assume that we won't stall the fetch (fd_is_stall==0)
	fd_is_stall=0;

	if (fetch_overwrite==1)
	{
		//new PC has been assigned in decode by a j instruction
		//must redo fetch
		fd_inst_lower=icache_read_word(PC);
		fd_inst_upper=icache_read_word(PC+4);
		fd_PC=PC;
		PC=PC+8;
		dofetch();
	}
	else
	{
		if (fetch_stall==1)
		{
			//fetch stall due to branch - replace fetch instruction with nop
			fd_inst_lower=0;
			fd_inst_upper=0;
			fd_PC=0;
			fd_is_stall=1;

			branch_prediction_made=0;
		}
		else
		{
			//if a branch prediction was made, fetch from predicted_PC, not PC (may need to squash later though)
			if (branch_prediction_made>0)
			{
				fd_inst_lower=icache_read_word(predicted_PC);
				fd_inst_upper=icache_read_word(predicted_PC+4);
				fd_PC=predicted_PC;
				PC=PC+8;
			}
			//otherwise, fetch from PC
			else
			{
				fd_inst_lower=icache_read_word(PC);
				fd_inst_upper=icache_read_word(PC+4);
				fd_PC=PC;
				PC=PC+8;
			}

			//check for dependencies between the instruction just fetched
			//and loads in the decode stage
			if (fetch_check_loads()==1)
			{
				//if there is a dependency, undo the fetch - make instruction a stall
				fd_inst_lower=0;
				fd_inst_upper=0;
				fd_PC=0;
				fd_is_stall=2;
				PC-=8;
			}

			//if the new instruction just fetched is a branch, get prediction for next PC
			dofetch();
		}
	}
}

//inorder_simulate does the pipeline simulation by calling each stage in reverse order repeatedly
void inorder_simulate()
{
	int inst_lower;
	int inst_upper;
	int i;
	int P=print_instruction;

	//loop forever
	while(1==1)
	{
		//if a total number of instructions to run was specified at the command line
		//and that number has been run, end the simulation
		if (total_instructions>=0 && instruction_counter>=total_instructions)
			break;

		//do the 5 stages in reverse order
		//this is necessary, or else fetch might overwrite decode's input before decode can deal with it, and so on
		stage_writeback();
		stage_memory();
		stage_execute();
		stage_decode();
		stage_fetch();

		//if the user specified a # of instructions before debug, and they have elapsed,
		//print out the pipeline & registers each cycle and prompt the user between cycles
		if (total_instructions_until_debug>=0 && instruction_counter >=total_instructions_until_debug)
		{
			printf("%u: %s\n",counter,name);
			print_pipeline();
			getchar();
		}
		//if the user specified to print out every "print_instruction"th instruction,
		//print out the name of that instruction and its address
		if (print_instruction>0)
		{
			if ((instruction_counter-1)%P==0)
			{
				get_instruction_name(0,last_instruction);
				printf("%u: %x %s\n",instruction_counter,last_instruction_PC,name);
			}
		}

		//increment the cycle counter
		counter++;
	}
}

//doinordersim sets up the inorder simulator and launches it
void doinordersim()
{
	int i;

	//create the dump file, if needed
	if (dump_trace==1)
		regdump=fopen("trace.txt","w");

	//set the GPRs to 0
	for (i=0; i<NUM_REGS; i++)
		R[i]=0;
	HI=0;
	LO=0;

	//set up initial registers
	PC=Prog_Entry;
	R[SP]=Param_Start;

	//clear the pipeline
	fd_inst_upper=0;
	fd_inst_lower=0;
	fd_is_stall=4;
	de_inst_upper=0;
	de_inst_lower=0;
	de_is_stall=4;
	em_inst_upper=0;
	em_inst_lower=0;
	em_is_stall=4;
	mw_inst_upper=0;
	mw_inst_lower=0;
	mw_is_stall=4;

	pipeline_dualflush=0;

	//start the simulator
	inorder_simulate();

	//close the dump file, if needed
	if (dump_trace==1)
		fclose(regdump);
}

//functional_simulate does the functional simulation
void functional_simulate()
{
	int inst_lower;
	int inst_upper;
	int i;
	int P=print_instruction;

	//loop forever
	while(1==1)
	{
		//if user specified max instructions, and they have elapsed, stop simulating
		if (total_instructions>=0 && counter>=total_instructions)
			break;

		//fetch the instruction
		inst_lower=icache_read_word(PC);
		inst_upper=icache_read_word(PC+4);

		//execute the instruction
		doinstruction(inst_upper, inst_lower);

		//if user requested debug after so many instructions, and they have elapsed,
		//print out the registers every cycle, prompt user
		if (total_instructions_until_debug>=0 && counter>=total_instructions_until_debug)
		{
			printf("%u: %s\n",counter,name);
			printregs();
			getchar();
		}
		//prints out every "print_instruction"th instruction and address, if the used desires
		if (print_instruction>0)
		{
			if (counter%P==0)
				printf("%u: %x %s\n",counter,PC,name);
		}

		//increment counter - this would be # cycles, except the functional simulator isn't cycle accurate
		counter++;
		//increment instruction counter
		instruction_counter++;

		//dump regs to file, if user requested
		if (dump_trace==1)
			dumpregs();
	}
}

//dofuncsim launches the functional simulator
void dofuncsim()
{
	int i;

	//open a file to dump the register outputs, if the user desires
	if (dump_trace==1)
		regdump=fopen("trace.txt","w");

	//set the GPRs to 0
	for (i=0; i<NUM_REGS; i++)
		R[i]=0;
	HI=0;
	LO=0;

	//set up initial registers
	PC=Prog_Entry;
	R[SP]=Param_Start;

	//start simulating
	functional_simulate();

	//close the dump file, if the user requested it
	if (dump_trace==1)
	{
		printf("Closing\n");
		fclose(regdump);
	}
}
//mysimoutorder.c
//Michael Black, 2006
//
//mysimoutorder contains the core of the simulator logic for the out-of-order processor

#include "mysim.h"
#include "mysimoutorder.h"
#include <math.h>
#include <stdio.h>

//regdump is a debugging output file to contain the reg values for all instructions
extern FILE *regdump;

//if power_simulate is 1, power modeling is done with Wattch
int power_simulate=0;

long double totaltime=0;

//statistics
//number of cycles
extern unsigned int counter;
//number of instructions
extern unsigned int instruction_counter;
//every "print_instruction"th instruction is dumped to screen (unless it's -1)
extern int print_instruction;

//the register set - defined in mysim.c
extern unsigned int PC;
extern int R[NUM_REGS+1];
extern int HI;
extern int LO;
extern int FCC;
extern Ftype F;

//defined in loader.c, inst.c, main.c
extern unsigned int Prog_Entry;
extern unsigned int Param_Start;
extern int total_instructions;
extern int total_instructions_until_debug;
extern int dump_trace;

//defined in mycache.c
extern int init_il1_cache_hit_latency;
extern int init_il2_cache_hit_latency;
extern int init_il2_cache_miss_latency;
extern int init_dl1_cache_hit_latency;
extern int init_dl2_cache_hit_latency;
extern int init_dl2_cache_miss_latency;
extern int il1_cache_hit_latency;
extern int il2_cache_hit_latency;
extern int il2_cache_miss_latency;
extern int dl1_cache_hit_latency;
extern int dl2_cache_hit_latency;
extern int dl2_cache_miss_latency;

//criticality flags
int addonecycle=0;
int subtractcritical=0;

//structural defaults
//how any fu's, queue sizes...
int integer_ALUs=4;
int integer_multdiv_ALUs=1;
int float_ALUs=2;
int float_multdiv_ALUs=1;
int reservation_station_number=128;
int reorder_buffer_size=128;
int lsq_size=64;
int dispatch_queue_size=16;
int fetches_per_cycle=1;

//latency defaults
int integer_latency=1;
int integer_multdiv_latency=10;
int float_latency=10;
int float_multdiv_latency=10;
int ls_latency=1;
unsigned int init_clock_speed=600000000;
unsigned int clock_speed;

//quantity of functional units available
int fu_integer;
int fu_integer_multdiv;
int fu_float;
int fu_float_multdiv;

//reservation station holding instruction sourcing these registers
int res_R[NUM_REGS+1];
int res_HI;
int res_LO;
int res_FCC;
int res_F[NUM_REGS+1];
//if instruction is sourcing these regs, which dest register is it?
//0 = rt, 1 = rt+1, 2 = rd, 3 = ra
int res_R_dreg[NUM_REGS+1];
//0 = fs, 1 = fs+1, 2 = ft, 3 = ft+1, 4 = fd, 5 = fd+1
int res_F_dreg[NUM_REGS+1];

//reservation stations and load/store queue
Reservation_Station *resstat;

//load/store queue entries
int inst_in_lsq=0;

//roorder buffer
Reorder_Buffer_Entry *ROB;
int inst_in_rob=0;
int rob_head=0;
int rob_tail=0;

//fetch registers
Inst ooo_fi_inst;
int time_left_fetch=0;

//dispatch queue - holds instructions recently fetched
Inst *dispatch_queue;
int inst_in_dispatchqueue=0;

//ready queue - holds instructions needing a functional unit
int *ready_queue;
int inst_in_ready_queue=0;

//for debugging, save the most recent instruction to complete
Inst ooo_last_instruction;
int ooo_last_instruction_exists=0;

//print_state prints out the registers and pipeline for debugging
void print_state()
{
	int i,j;

	printf("-----------------------------------------------------------------------\n");
	printf("Cycles: %u\tInstructions: %u\tPC: %x\n",counter,instruction_counter,PC);
	if (ooo_last_instruction_exists==1)
		printf("Last instruction to complete: %x %s\n",ooo_last_instruction.addr,ooo_last_instruction.name);
	printf("\n");
	for (i=inst_in_dispatchqueue-1; i>=0; i--)
		printf("Dispatch: %x %s\n", dispatch_queue[i].addr, dispatch_queue[i].name);
	for (i=0; i<reservation_station_number; i++)
	{
		printf("Res Station %i (%i): ", i, resstat[i].type);
		if (resstat[i].occupied==0)
			printf("empty\n");
		else
		{
			printf("%x %s\tbusy: %i",resstat[i].instruction.addr,resstat[i].instruction.name,resstat[i].busy);
			printf(", rob: %i",resstat[i].rob_place);
			if (resstat[i].busy==3 && resstat[i].type==4)
				printf(", lsqorder: %i",resstat[i].lsq_order);
			if (resstat[i].busy==3)
				printf(", waiting flags: rs=%i, rt=%i, fs=%i, fs2=%i, ft=%i, ft2=%i\n",resstat[i].rs_available,resstat[i].rt_available,resstat[i].fs_available,resstat[i].fs2_available,resstat[i].ft_available,resstat[i].ft2_available);
			if (resstat[i].busy==2)
				printf(", waiting on fu, inputs: rs=%x, rt=%x, fs=%x, fs2=%x, ft=%x, ft2=%x\n",resstat[i].r_rs,resstat[i].r_rt,resstat[i].f_fs.l,resstat[i].f_fs2.l,resstat[i].f_ft.l,resstat[i].f_ft2.l);
			if (resstat[i].busy==1)
				printf(", inputs: rs=%x, rt=%x, fs=%x, fs2=%x, ft=%x, ft2=%x, cycles_remaining: %i\n",resstat[i].r_rs,resstat[i].r_rt,resstat[i].f_fs.l,resstat[i].f_fs2.l,resstat[i].f_ft.l,resstat[i].f_ft2.l,resstat[i].time_left);
			if (resstat[i].busy==0)
				printf(", inputs: rs=%x, rt=%x, fs=%x, fs2=%x, ft=%x, ft2=%x\n",resstat[i].r_rs,resstat[i].r_rt,resstat[i].f_fs.l,resstat[i].f_fs2.l,resstat[i].f_ft.l,resstat[i].f_ft2.l);
		}
	}
	printf("Functional units available: Integer: %i, Integer Mult: %i, Float %i, Float Mult: %i\n",fu_integer,fu_integer_multdiv,fu_float,fu_float_multdiv);
	i=rob_head-1;
	if (i<0)
		i=reorder_buffer_size-1;
	printf("ROB head = %i, tail = %i\n",rob_head,rob_tail);
	for (j=inst_in_rob-1; j>=0; j--)
	{
		printf("ROB entry %i: %x %s, resstat %i, ready: %i",i,ROB[i].instruction.addr,ROB[i].instruction.name,ROB[i].res_stat,ROB[i].ready);
		printf(", outputs: rt: %x, rt2: %x, rd: %x, ft: %x, ft2: %x, fd: %x, fd2: %x\n",ROB[i].r_rtout,ROB[i].r_rt2out,ROB[i].r_rdout,ROB[i].f_ftout.l,ROB[i].f_ft2out.l,ROB[i].f_fdout.l,ROB[i].f_fd2out.l);

		i--;
		if (i<0)
			i=reorder_buffer_size-1;
	}
	printf("\n");
	printf("HI: %x\tLO: %x\n",HI,LO);
//	for (i=0; i<31; i+=2)
//		printf("R[%i]=%x\tR[%i]=%x\n",i,R[i],i+1,R[i+1]);
	for (i=0; i<31; i+=4)
		printf("R[%i]=%x\tR[%i]=%x\tR[%i]=%x\tR[%i]=%x\n",i,R[i],i+1,R[i+1],i+2,R[i+2],i+3,R[i+3]);
	for (i=0; i<31; i+=2)
	{
		printf("F[%i]=%x, %f\tF[%i]=%x, %f, %lf\n",i,F.l[i],F.f[i],i+1,F.l[i+1],F.f[i+1],F.d[i>>1]);
	}
}

//copyinst makes a copy of an instruction object
Inst copyinst(Inst i)
{

	Inst o;
	o.addr=i.addr;
	o.inst_upper=i.inst_upper;
	o.inst_lower=i.inst_lower;
	o.type=i.type;
	o.rtype=i.rtype;
	strcpy(o.name,i.name);
	o.is_stall=i.is_stall;
	o.sinks_rs=i.sinks_rs;
	o.sinks_rt=i.sinks_rt;
	o.sinks_rt2=i.sinks_rt2;
	o.sinks_HI=i.sinks_HI;
	o.sinks_LO=i.sinks_LO;
	o.sinks_fs=i.sinks_fs;
	o.sinks_fs2=i.sinks_fs2;
	o.sinks_ft=i.sinks_ft;
	o.sinks_ft2=i.sinks_ft2;
	o.sinks_FCC=i.sinks_FCC;
	o.sources_rt=i.sources_rt;
	o.sources_rt2=i.sources_rt2;
	o.sources_rd=i.sources_rd;
	o.sources_HI=i.sources_HI;
	o.sources_LO=i.sources_LO;
	o.sources_ra=i.sources_ra;
	o.sources_fs=i.sources_fs;
	o.sources_fs2=i.sources_fs2;
	o.sources_ft=i.sources_ft;
	o.sources_ft2=i.sources_ft2;
	o.sources_fd=i.sources_fd;
	o.sources_fd2=i.sources_fd2;
	o.sources_FCC=i.sources_FCC;
	return o;
}

//copyROBEntry makes a copy of a ROB Entry
Reorder_Buffer_Entry copyROBEntry(Reorder_Buffer_Entry i)
{
	Reorder_Buffer_Entry o;

	o.instruction=copyinst(i.instruction);
	o.res_stat=i.res_stat;
	o.r_rtout=i.r_rtout;
	o.r_rt2out=i.r_rt2out;
	o.r_rdout=i.r_rdout;
	o.r_HIout=i.r_HIout;
	o.r_LOout=i.r_LOout;
	o.r_raout=i.r_raout;
	o.f_fsout.l=i.f_fsout.l;
	o.f_fs2out.l=i.f_fs2out.l;
	o.f_ftout.l=i.f_ftout.l;
	o.f_ft2out.l=i.f_ft2out.l;
	o.f_fdout.l=i.f_fdout.l;
	o.f_fd2out.l=i.f_fd2out.l;
	o.r_FCCout=i.r_FCCout;
	o.ready=i.ready;
	o.r_rs=i.r_rs;
	o.r_rt=i.r_rt;
	o.r_rt2=i.r_rt2;
	o.f_ft.l=i.f_ft.l;
	o.f_ft2.l=i.f_ft2.l;
	o.time_left=i.time_left;

	//CRITICALITY ADD-IN
	o.cycles_in_ROB=i.cycles_in_ROB;
	o.cycles_notready=i.cycles_notready;
	o.QOLDset=i.QOLDset;
	o.QOLDDEPset=i.QOLDDEPset;
	o.ALOLDset=i.ALOLDset;
	o.QCONSset=i.QCONSset;
	o.QOLDeverset=i.QOLDeverset;
	o.QOLDDEPeverset=i.QOLDDEPeverset;
	o.ALOLDeverset=i.ALOLDeverset;
	o.QCONSeverset=i.QCONSeverset;

	return o;
}

//power model calls Wattch's power modeler
//type tells which counters need to be incremented
//data is used for updating the pop_count
//res is used when the right data must be extracted from a reservation station
//this is in here for modularity
void power_model(int type, int data, int res)
{
	if (power_simulate==1)
		wattch_power_model(type,data,res);
}

//squash removes all instructions from the pipeline preceding reorder buffer entry r
void ooo_squash(int r)
{
	int i,j,k,l,m,n;
	int rt,rd,fs,ft,fd;


	//eliminate the instruction from reservation stations
	i=r+1;
	if (i>=reorder_buffer_size)
		i-=reorder_buffer_size;
	m=rob_head-(r+1);
	if (m<0)
		m+=reorder_buffer_size;
	for (k=0; k<m; k++)
	{
		if (ROB[i].res_stat==-1)
		{
			//check if a store before continuing - store might still hold inst_in_lsq=0
			if (ROB[i].instruction.type==5)
				inst_in_lsq--;

			i++;
			if (i>=reorder_buffer_size)
				i-=reorder_buffer_size;
			continue;
		}
		resstat[ROB[i].res_stat].occupied=0;

		//all registers dependent upon that res stat are now free
		//unless a non-squashed instruction still sources them
		for (j=0; j<=31; j++)
			if (res_R[j]==ROB[i].res_stat)
				res_R[j]=-1;

		for (j=0; j<=31; j++)
			if (res_F[j]==ROB[i].res_stat)
				res_F[j]=-1;

		if (res_HI==ROB[i].res_stat)
			res_HI=-1;
		if (res_LO==ROB[i].res_stat)
			res_LO=-1;
		if (res_FCC==ROB[i].res_stat)
			res_FCC=-1;

		//go through the ROB prior to the squash
		//if any instruction in there sources a register, set res_R accordingly
		j=rob_tail;
		n=r+1-rob_tail;
		if (n<0)
			n+=reorder_buffer_size;
		for (l=0; l<n; l++)
		{
		        rt=(ROB[j].instruction.inst_upper>>16)&0xff;
		        rd=(ROB[j].instruction.inst_upper>>8)&0xff;
		        fs=(ROB[j].instruction.inst_upper>>24);
		        ft=(ROB[j].instruction.inst_upper>>16)&0xff;
		        fd=(ROB[j].instruction.inst_upper>>8)&0xff;

			//if invalid instruction, rt-fd could be out of range
			//make sure they aren't
			rt=rt%(NUM_REGS);
			rd=rd%(NUM_REGS);
			fs=fs%(NUM_REGS);
			ft=ft%(NUM_REGS);
			fd=fd%(NUM_REGS);

			if (ROB[j].instruction.sources_rt==1)
			{
				res_R[rt]=ROB[j].res_stat;
				res_R_dreg[rt]=0;
			}
			if (ROB[j].instruction.sources_rt2==1)
			{
				res_R[rt+1]=ROB[j].res_stat;
				res_R_dreg[rt+1]=1;
			}
			if (ROB[j].instruction.sources_rd==1)
			{
				res_R[rd]=ROB[j].res_stat;
				res_R_dreg[rd]=2;
			}
			if (ROB[j].instruction.sources_fs==1)
			{
				res_F[fs]=ROB[j].res_stat;
				res_F_dreg[fs]=0;
			}
			if (ROB[j].instruction.sources_fs2==1)
			{
				res_F[fs+1]=ROB[j].res_stat;
				res_F_dreg[fs+1]=1;
			}
			if (ROB[j].instruction.sources_ft==1)
			{
				res_F[ft]=ROB[j].res_stat;
				res_F_dreg[ft]=2;
			}
			if (ROB[j].instruction.sources_ft2==1)
			{
				res_F[ft+1]=ROB[j].res_stat;
				res_F_dreg[ft+1]=3;
			}
			if (ROB[j].instruction.sources_fd==1)
			{
				res_F[fd]=ROB[j].res_stat;
				res_F_dreg[fd]=4;
			}
			if (ROB[j].instruction.sources_fd2==1)
			{
				res_F[fd+1]=ROB[j].res_stat;
				res_F_dreg[fd+1]=5;
			}
			if (ROB[j].instruction.sources_HI==1)
				res_HI=ROB[j].res_stat;
			if (ROB[j].instruction.sources_LO==1)
				res_LO=ROB[j].res_stat;
			if (ROB[j].instruction.sources_FCC==1)
				res_FCC=ROB[j].res_stat;
			if (ROB[j].instruction.sources_ra==1)
			{
				res_R[31]=ROB[j].res_stat;
				res_R_dreg[31]=3;
			}
			j++;
			if (j==reorder_buffer_size)
				j=0;
		}


		//if the instruction is a load or store, remove from lsq
		if (resstat[ROB[i].res_stat].type==4)
			inst_in_lsq--;

		//if the instruction is arithmetic/branch and is occupying a functional unit, put the fu back
		if (resstat[ROB[i].res_stat].type==0 && resstat[ROB[i].res_stat].busy==1)
			fu_integer++;
		else if (resstat[ROB[i].res_stat].type==1 && resstat[ROB[i].res_stat].busy==1)
			fu_integer_multdiv++;
		else if (resstat[ROB[i].res_stat].type==2 && resstat[ROB[i].res_stat].busy==1)
			fu_float++;
		else if (resstat[ROB[i].res_stat].type==3 && resstat[ROB[i].res_stat].busy==1)
			fu_float_multdiv++;
		i++;
		if (i>=reorder_buffer_size)
			i-=reorder_buffer_size;
	}

	//remove all the instructions preceding r from the reorder buffer
	inst_in_rob=r+1-rob_tail;
	if (inst_in_rob<0)
		inst_in_rob+=reorder_buffer_size;
	if (inst_in_rob==0)
		inst_in_rob=reorder_buffer_size;
	rob_head=r+1;
	if (rob_head>=reorder_buffer_size)
		rob_head-=reorder_buffer_size;

	//eliminate the dispatch instruction
	inst_in_dispatchqueue=0;
	time_left_fetch=0;

}

//commit removes all the instructions from the ROB that are ready to go, up to the first nonready inst
//for each inst, it writes back the registers
void ooo_commit()
{
	int i,j,k;
	int rt,rd,fs,ft,fd;

	while(inst_in_rob>0)
	{
		//if first ROB entry is not ready to commit, don't commit further
		if (ROB[rob_tail].ready<=0)
			break;

        	rt=(ROB[rob_tail].instruction.inst_upper>>16)&0xff;
        	rd=(ROB[rob_tail].instruction.inst_upper>>8)&0xff;
        	fs=(ROB[rob_tail].instruction.inst_upper>>24);
        	ft=(ROB[rob_tail].instruction.inst_upper>>16)&0xff;
        	fd=(ROB[rob_tail].instruction.inst_upper>>8)&0xff;

		//write back registers
		if (ROB[rob_tail].instruction.sources_rt==1)
			R[rt]=ROB[rob_tail].r_rtout;
		if (ROB[rob_tail].instruction.sources_rt2==1)
			R[rt+1]=ROB[rob_tail].r_rt2out;
		if (ROB[rob_tail].instruction.sources_rd==1)
			R[rd]=ROB[rob_tail].r_rdout;
		if (ROB[rob_tail].instruction.sources_HI==1)
			HI=ROB[rob_tail].r_HIout;
		if (ROB[rob_tail].instruction.sources_LO==1)
			LO=ROB[rob_tail].r_LOout;
		if (ROB[rob_tail].instruction.sources_ra==1)
			R[31]=ROB[rob_tail].r_raout;
		if (ROB[rob_tail].instruction.sources_FCC==1)
			FCC=ROB[rob_tail].r_FCCout;
		if (ROB[rob_tail].instruction.sources_fs==1)
			F.l[fs]=ROB[rob_tail].f_fsout.l;
		if (ROB[rob_tail].instruction.sources_fs2==1)
			F.l[fs+1]=ROB[rob_tail].f_fs2out.l;
		if (ROB[rob_tail].instruction.sources_ft==1)
			F.l[ft]=ROB[rob_tail].f_ftout.l;
		if (ROB[rob_tail].instruction.sources_ft2==1)
			F.l[ft+1]=ROB[rob_tail].f_ft2out.l;
		if (ROB[rob_tail].instruction.sources_fd==1)
			F.l[fd]=ROB[rob_tail].f_fdout.l;
		if (ROB[rob_tail].instruction.sources_fd2==1)
			F.l[fd+1]=ROB[rob_tail].f_fd2out.l;

		power_model(1,0,0);

		//write back memory if store
		if (ROB[rob_tail].instruction.type==5)
		{
			//write back memory
			ooo_dostore(rob_tail);
			//advance everybody in lsq
			for (i=0; i<reservation_station_number; i++)
			{
				if (resstat[i].type==4)
					resstat[i].lsq_order--;
			}
			inst_in_lsq--;

			power_model(2,0,0);
		}

		//squash other instruction if syscall
		if (ROB[rob_tail].instruction.type==9)
		{
			ooo_squash(rob_tail);
			PC=ROB[rob_tail].instruction.addr+8;
			handle_syscalls();
		}

		//tally instruction
		instruction_counter++;

		ooo_last_instruction=copyinst(ROB[rob_tail].instruction);
		ooo_last_instruction_exists=1;

		//CRITICALITY ADD-IN
		//tally criticality info
		train_criticality(ROB[rob_tail].instruction.addr, 0);

		if (dump_trace==1)
			dumpregs();

//		printf("Completed instruction %u: %x %s\n", instruction_counter, ROB[rob_tail].instruction.addr, ROB[rob_tail].instruction.name);

		//remove instruction from ROB
//		for (i=0; i<inst_in_rob-1; i++)
//		{
//			ROB[i]=copyROBEntry(ROB[i+1]);
//			if (ROB[i].res_stat>=0)
//				resstat[ROB[i].res_stat].rob_place=i;
//		}
		rob_tail++;
		if (rob_tail==reorder_buffer_size)
			rob_tail=0;
		inst_in_rob--;
	}

	//go through all stores in the ROB, and decrease time left if they are waiting on dcache to finish
	i=rob_tail;
	for (k=0; k<inst_in_rob; k++)
	{
		if(ROB[i].instruction.type==5 && ROB[i].time_left>0)
			ROB[i].time_left--;

		//if the store has finished its memory access, set it to ready to commit
		if(ROB[i].instruction.type==5 && ROB[i].time_left==0 && ROB[i].ready==-1)
			ROB[i].ready=1;

		i++;
		if (i>=reorder_buffer_size)
			i=0;
	}
}

//write_to_reservation_inputs is done in writeback
//it detects if a res.stat j is waiting on an output produced by res.stat i
//if so, the output of i is forwarded to the input of j
void write_to_reservation_inputs(int i, int j)
{
	//check if station j is waiting on station i's output
	if (resstat[j].rs_available==i)
	{
		if (resstat[j].rs_dreg==0)
			resstat[j].r_rs=resstat[i].r_rtout;
		if (resstat[j].rs_dreg==1)
			resstat[j].r_rs=resstat[i].r_rt2out;
		if (resstat[j].rs_dreg==2)
			resstat[j].r_rs=resstat[i].r_rdout;
		if (resstat[j].rs_dreg==3)
			resstat[j].r_rs=resstat[i].r_raout;
		resstat[j].rs_available=-1;
	}
	if (resstat[j].rt_available==i)
	{
		if (resstat[j].rt_dreg==0)
			resstat[j].r_rt=resstat[i].r_rtout;
		if (resstat[j].rt_dreg==1)
			resstat[j].r_rt=resstat[i].r_rt2out;
		if (resstat[j].rt_dreg==2)
			resstat[j].r_rt=resstat[i].r_rdout;
		if (resstat[j].rt_dreg==3)
			resstat[j].r_rt=resstat[i].r_raout;
		resstat[j].rt_available=-1;
	}
	if (resstat[j].rt2_available==i)
	{
		if (resstat[j].rt2_dreg==0)
			resstat[j].r_rt2=resstat[i].r_rtout;
		if (resstat[j].rt2_dreg==1)
			resstat[j].r_rt2=resstat[i].r_rt2out;
		if (resstat[j].rt2_dreg==2)
			resstat[j].r_rt2=resstat[i].r_rdout;
		if (resstat[j].rt2_dreg==3)
			resstat[j].r_rt2=resstat[i].r_raout;
		resstat[j].rt2_available=-1;
	}
	if (resstat[j].HI_available==i)
	{
		resstat[j].r_HI=resstat[i].r_HIout;
		resstat[j].HI_available=-1;
	}
	if (resstat[j].LO_available==i)
	{
		resstat[j].r_LO=resstat[i].r_LOout;
		resstat[j].LO_available=-1;
	}
	if (resstat[j].FCC_available==i)
	{
		resstat[j].r_FCC=resstat[i].r_FCCout;
		resstat[j].FCC_available=-1;
	}
	if (resstat[j].fs_available==i)
	{
		if (resstat[j].fs_dreg==0)
			resstat[j].f_fs.l=resstat[i].f_fsout.l;
		if (resstat[j].fs_dreg==1)
			resstat[j].f_fs.l=resstat[i].f_fs2out.l;
		if (resstat[j].fs_dreg==2)
			resstat[j].f_fs.l=resstat[i].f_ftout.l;
		if (resstat[j].fs_dreg==3)
			resstat[j].f_fs.l=resstat[i].f_ft2out.l;
		if (resstat[j].fs_dreg==4)
			resstat[j].f_fs.l=resstat[i].f_fdout.l;
		if (resstat[j].fs_dreg==5)
			resstat[j].f_fs.l=resstat[i].f_fd2out.l;
		resstat[j].fs_available=-1;
	}
	if (resstat[j].fs2_available==i)
	{
		if (resstat[j].fs2_dreg==0)
			resstat[j].f_fs2.l=resstat[i].f_fsout.l;
		if (resstat[j].fs2_dreg==1)
			resstat[j].f_fs2.l=resstat[i].f_fs2out.l;
		if (resstat[j].fs2_dreg==2)
			resstat[j].f_fs2.l=resstat[i].f_ftout.l;
		if (resstat[j].fs2_dreg==3)
			resstat[j].f_fs2.l=resstat[i].f_ft2out.l;
		if (resstat[j].fs2_dreg==4)
			resstat[j].f_fs2.l=resstat[i].f_fdout.l;
		if (resstat[j].fs2_dreg==5)
			resstat[j].f_fs2.l=resstat[i].f_fd2out.l;
		resstat[j].fs2_available=-1;
	}
	if (resstat[j].ft_available==i)
	{
		if (resstat[j].ft_dreg==0)
			resstat[j].f_ft.l=resstat[i].f_fsout.l;
		if (resstat[j].ft_dreg==1)
			resstat[j].f_ft.l=resstat[i].f_fs2out.l;
		if (resstat[j].ft_dreg==2)
			resstat[j].f_ft.l=resstat[i].f_ftout.l;
		if (resstat[j].ft_dreg==3)
			resstat[j].f_ft.l=resstat[i].f_ft2out.l;
		if (resstat[j].ft_dreg==4)
			resstat[j].f_ft.l=resstat[i].f_fdout.l;
		if (resstat[j].ft_dreg==5)
			resstat[j].f_ft.l=resstat[i].f_fd2out.l;
		resstat[j].ft_available=-1;
	}
	if (resstat[j].ft2_available==i)
	{
		if (resstat[j].ft2_dreg==0)
			resstat[j].f_ft2.l=resstat[i].f_fsout.l;
		if (resstat[j].ft2_dreg==1)
			resstat[j].f_ft2.l=resstat[i].f_fs2out.l;
		if (resstat[j].ft2_dreg==2)
			resstat[j].f_ft2.l=resstat[i].f_ftout.l;
		if (resstat[j].ft2_dreg==3)
			resstat[j].f_ft2.l=resstat[i].f_ft2out.l;
		if (resstat[j].ft2_dreg==4)
			resstat[j].f_ft2.l=resstat[i].f_fdout.l;
		if (resstat[j].ft2_dreg==5)
			resstat[j].f_ft2.l=resstat[i].f_fd2out.l;
		resstat[j].ft2_available=-1;
	}
}

//writeback checks if any reservation station has completed.
//if so, it copies the output to any reservation stations needing it as input,
//and updates the instruction's rob entry with the output
void ooo_writeback()
{
	int i,j,rt,rd,fs,ft,fd;
	short int imm;

	for (i=0; i<reservation_station_number; i++)
	{
		//ignore station if empty
		if (resstat[i].occupied==0)
			continue;

		//ignore station if busy
		if (resstat[i].busy>0)
			continue;

		//station is ready:
		//copy results from station to other stations needing it
		for (j=0; j<reservation_station_number; j++)
		{
			//ignore station if empty
			if (resstat[j].occupied==0)
				continue;
			//ignore station is not waiting on input
			if (resstat[j].busy<3)
				continue;
		
			write_to_reservation_inputs(i,j);
		}

		power_model(3,0,i);

		//send the output to the ROB entry
		ROB[resstat[i].rob_place].r_rtout=resstat[i].r_rtout;
		ROB[resstat[i].rob_place].r_rt2out=resstat[i].r_rt2out;
		ROB[resstat[i].rob_place].r_rdout=resstat[i].r_rdout;
		ROB[resstat[i].rob_place].r_HIout=resstat[i].r_HIout;
		ROB[resstat[i].rob_place].r_LOout=resstat[i].r_LOout;
		ROB[resstat[i].rob_place].r_raout=resstat[i].r_raout;
		ROB[resstat[i].rob_place].r_FCCout=resstat[i].r_FCCout;
		ROB[resstat[i].rob_place].f_fsout.l=resstat[i].f_fsout.l;
		ROB[resstat[i].rob_place].f_fs2out.l=resstat[i].f_fs2out.l;
		ROB[resstat[i].rob_place].f_ftout.l=resstat[i].f_ftout.l;
		ROB[resstat[i].rob_place].f_ft2out.l=resstat[i].f_ft2out.l;
		ROB[resstat[i].rob_place].f_fdout.l=resstat[i].f_fdout.l;
		ROB[resstat[i].rob_place].f_fd2out.l=resstat[i].f_fd2out.l;

		//stores still need these in commit
		ROB[resstat[i].rob_place].r_rs=resstat[i].r_rs;
		ROB[resstat[i].rob_place].r_rt=resstat[i].r_rt;
		ROB[resstat[i].rob_place].r_rt2=resstat[i].r_rt2;
		ROB[resstat[i].rob_place].f_ft.l=resstat[i].f_ft.l;
		ROB[resstat[i].rob_place].f_ft2.l=resstat[i].f_ft2.l;

		//set the ROB entry as ready to commit
		ROB[resstat[i].rob_place].ready=1;
		ROB[resstat[i].rob_place].res_stat=-1;

		//clear the reservation station
		resstat[i].occupied=0;

        	rt=(resstat[i].instruction.inst_upper>>16)&0xff;
        	rd=(resstat[i].instruction.inst_upper>>8)&0xff;
        	fs=(resstat[i].instruction.inst_upper>>24);
        	ft=(resstat[i].instruction.inst_upper>>16)&0xff;
        	fd=(resstat[i].instruction.inst_upper>>8)&0xff;

		//free up dependent registers
		if (resstat[i].instruction.sources_rt==1 && res_R[rt]==i)
			res_R[rt]=-1;
		if (resstat[i].instruction.sources_rt2==1 && res_R[rt+1]==i)
			res_R[rt+1]=-1;
		if (resstat[i].instruction.sources_rd==1 && res_R[rd]==i)
			res_R[rd]=-1;
		if (resstat[i].instruction.sources_HI==1 && res_HI==i)
			res_HI=-1;
		if (resstat[i].instruction.sources_LO==1 && res_LO==i)
			res_LO=-1;
		if (resstat[i].instruction.sources_ra==1 && res_R[31]==i)
			res_R[31]=-1;
		if (resstat[i].instruction.sources_FCC==1 && res_FCC==i)
			res_FCC=-1;
		if (resstat[i].instruction.sources_fs==1 && res_F[fs]==i)
			res_F[fs]=-1;
		if (resstat[i].instruction.sources_fs2==1 && res_F[fs+1]==i)
			res_F[fs+1]=-1;
		if (resstat[i].instruction.sources_ft==1 && res_F[ft]==i)
			res_F[ft]=-1;
		if (resstat[i].instruction.sources_ft2==1 && res_F[ft+1]==i)
			res_F[ft+1]=-1;
		if (resstat[i].instruction.sources_fd==1 && res_F[fd]==i)
			res_F[fd]=-1;
		if (resstat[i].instruction.sources_fd2==1 && res_F[fd+1]==i)
			res_F[fd+1]=-1;

		//if the instruction was a load (type 4), advance everybody in lsq
		//stores, however, hog place 0 in lsq until commit
		if (ROB[resstat[i].rob_place].instruction.type==4)
		{
			for (j=0; j<reservation_station_number; j++)
			{
				if(resstat[j].type==4)
					if (resstat[j].lsq_order>resstat[i].lsq_order)
						resstat[j].lsq_order--;
			}
			inst_in_lsq--;

			power_model(4,0,i);
		}
		//if a store, delay readiness for the dcache cycle latency
		if (ROB[resstat[i].rob_place].instruction.type==5)
		{    
			//get anticipated cache access latency for the store
			imm=(resstat[i].instruction.inst_upper)&0xffff;
			j=dcache_access_latency(resstat[i].r_rs+imm);
			//make the store wait for the appropriate number of cycles
			ROB[resstat[i].rob_place].time_left=j-1;
			if (ROB[resstat[i].rob_place].time_left>0)
				ROB[resstat[i].rob_place].ready=-1;
		}
	}
}

//execute goes to each reservation station
//if the operands are available, it updates busy to 1, reduces latency
//if latency is 0, it updates busy to 0 (freeing instruction), and calculates instruction's results
//busy values mean the following: 0=done (ready to commit); 1=executing in a fu, 
//2=wants to execute, need a fu; or is a load, waiting for LSQ place 0, 3=brand new, or needs an operand to execute
void ooo_execute()
{
	int i,j,k,l;
	short int imm;

	for (i=0; i<reservation_station_number; i++)
	{
		//don't worry about station if it has no instruction in it
		if (resstat[i].occupied==0)
			continue;

		//check if the instruction in the res. station is ready to start running
		//if so, put it in the ready queue to be issued to a functional unit
		if (resstat[i].busy==3 && resstat[i].rs_available<0 && resstat[i].rt_available<0 && resstat[i].rt2_available<0 && resstat[i].HI_available<0 && resstat[i].LO_available<0 && resstat[i].FCC_available<0 && resstat[i].fs_available<0 && resstat[i].fs2_available<0 && resstat[i].ft_available<0 && resstat[i].ft2_available<0)
		{
			//put number of reservation station in the ready queue, unless it doesn't need a functional unit 
			//(arithmetic & branches only)
			if (resstat[i].type<=3)
			{
				ready_queue[inst_in_ready_queue]=i;
				inst_in_ready_queue++;
			}
			resstat[i].busy=2;

			power_model(5,0,i);
			
			//if the instruction doesn't need a functional unit to execute, don't force it to wait further
			if (resstat[i].type==5)
				resstat[i].busy=1;
		}

		//check if the instruction is waiting to execute and is a load
		if (resstat[i].busy==2 && resstat[i].type==4)
		{
			//if it's a load or store, it's not ready to start running unless it's number 0 in LSQ
			if (resstat[i].lsq_order==0)
			{
				resstat[i].busy=1;
				//need to calculate the memory latency
				imm=(resstat[i].instruction.inst_upper) & 0xffff;
				j=dcache_access_latency(resstat[i].r_rs+imm);
				resstat[i].time_left+=(j-1);
			}
			//if it's a load and there are no stores ahead in the LSQ, it doesn't hurt to jump queue
			else
			{
				//check every instruction ahead in queue to see if it's a store
				l=rob_tail;
				for (k=0; k<inst_in_rob; k++)
				{
					//a store is found
					if (ROB[l].instruction.type==5)
					{
						//is it out of the reservation station (and consequently ahead)?
						if (ROB[l].res_stat==-1)
							break;
						//does it have a lower lsq number?
						if (resstat[ROB[l].res_stat].lsq_order<resstat[i].lsq_order)
							break;
						//if not, it's not a problem
					}
					l++;
					if (l==reorder_buffer_size)
						l=0;
				}
				if (k==inst_in_rob)
				{
					//start the load
					resstat[i].busy=1;
					//need to calculate the memory latency
					imm=(resstat[i].instruction.inst_upper) & 0xffff;
					j=dcache_access_latency(resstat[i].r_rs+imm);
					resstat[i].time_left+=(j-1);					
				}
			}
		}

		//if the instruction is running, reduce the time left
		if (resstat[i].busy==1 && resstat[i].time_left>0)
		{
			resstat[i].time_left--;
		}

		power_model(6,0,i);

		//criticality add-in
		//if the instruction is done, check whether it met the critical criteria too late
		if (subtractcritical==2 && resstat[i].busy==1 && resstat[i].time_left==0)
		{
			didnt_observe_critical(i);
		}

		//if the instruction has one cycle to go, check if it meets critical criteria
		//if so, remove its last cycle
		if (subtractcritical==2 && resstat[i].busy==1 && resstat[i].time_left==1)
		{
			if (observed_critical(i)==1)
			{
				resstat[i].time_left--;
				if (resstat[i].time_left<0)
					fatal("Invalid instruction latency");
			}			
		}

		//if the instruction has finished running, get its results
		if (resstat[i].busy==1 && resstat[i].time_left==0)
		{
			ooo_doexecute(i);
			resstat[i].busy=0;

			//release its functional units
			if (resstat[i].type==0)
				fu_integer++;
			else if (resstat[i].type==1)
				fu_integer_multdiv++;
			else if (resstat[i].type==2)
				fu_float++;
			else if (resstat[i].type==3)
				fu_float_multdiv++;
		}

		//if the instruction is a type 7 or type 8, and no longer busy, 
		//need to check whether branch prediction was right
		if (resstat[i].busy==0 && (resstat[i].instruction.type==7 || resstat[i].instruction.type==8))
		{
			//actual PC is in r.r_rdout
			//if PC <> predicted_PC, we need to squash everybody after the branch
			if (resstat[i].r_rdout!=resstat[i].predicted_PC)
			{
				ooo_squash(resstat[i].rob_place);

				//set the PC to the correct PC
				PC=resstat[i].r_rdout;
			}

			//if jr, update the branch target buffer with the correct destination
			if (resstat[i].instruction.type==7)
				updateBTB(resstat[i].PC, resstat[i].r_rdout);

			//if conditional branch, update the btb with the dest PC if the branch were taken (in r_rtout)
			if (resstat[i].instruction.type==8)
			{
				updateBTB(resstat[i].PC, resstat[i].r_rtout);

				//and train the branch predictor
				if (resstat[i].branch_prediction_made>0)
				{
					train_branch_predictor(resstat[i].PC, resstat[i].r_rdout != resstat[i].PC+8);
					power_model(7,0,0);
				}
			}
		}

		//if the instruction is a type 0 or 4 and no longer busy
		//need to check whether value prediction was right
		if (resstat[i].busy==0 && (resstat[i].instruction.type==0 || resstat[i].instruction.type==4))
		{
			//if prediction was made, and output is not as predicted,
			//squash everybody after the instruction and set the PC to the next instruction
			//(yes, I know this isn't great misspeculation recovery, but it works)
			//Don't do any squashing unless some instruction actually used the predicted value
			if (resstat[i].value_prediction_made==1 && resstat[i].value_prediction_used==1)
			{
				if (resstat[i].instruction.sources_rt==1 && resstat[i].r_rtout!=resstat[i].value_prediction)
				{
					ooo_squash(resstat[i].rob_place);
					PC=resstat[i].PC+8;
				}
				if (resstat[i].instruction.sources_rd==1 && resstat[i].r_rdout!=resstat[i].value_prediction)
				{
					ooo_squash(resstat[i].rob_place);
					PC=resstat[i].PC+8;
				}
			}

			//whether a prediction was made or not, if it was eligible to make a prediction, train value predictor
			if ((resstat[i].instruction.sources_rt==1 || resstat[i].instruction.sources_rd==1) && resstat[i].instruction.sources_rt2==0)
			{
				if (resstat[i].instruction.sources_rt==1)
					train_value_predictor(resstat[i].PC,resstat[i].instruction.type,resstat[i].value_prediction,resstat[i].r_rtout,resstat[i].value_prediction_made);
				if (resstat[i].instruction.sources_rd==1)
					train_value_predictor(resstat[i].PC,resstat[i].instruction.type,resstat[i].value_prediction,resstat[i].r_rdout,resstat[i].value_prediction_made);
			}

			//it's still possible that some instruction might try to use the predicted value
			//because writeback hasn't happened yet
			//make sure the value in .value_prediction is the actual value, not the speculative one
			if (resstat[i].value_prediction_made==1)
			{
				if (resstat[i].instruction.sources_rt==1)
					resstat[i].value_prediction=resstat[i].r_rtout;
				if (resstat[i].instruction.sources_rd==1)
					resstat[i].value_prediction=resstat[i].r_rdout;
			}
		}

	}
}

//ooo_issue removes instructions from the ready queue and assigns them to a functional unit
void ooo_issue()
{
	int i,j;

	while (inst_in_ready_queue>0)
	{
		//find first instruction in ready queue that can be issued to a functional unit
		for (i=0; i<inst_in_ready_queue; i++)
		{
			//if instruction was squashed but still in ready queue, clean it out
			if (resstat[ready_queue[i]].occupied==0)
				break;

			if (resstat[ready_queue[i]].type==0 && fu_integer>0)
			{
				resstat[ready_queue[i]].busy=1;
				fu_integer--;
				power_model(8,0,0);
				break;
			}
			if (resstat[ready_queue[i]].type==1 && fu_integer_multdiv>0)
			{
				resstat[ready_queue[i]].busy=1;
				fu_integer_multdiv--;
				power_model(8,0,0);
				break;
			}
			if (resstat[ready_queue[i]].type==2 && fu_float>0)
			{
				resstat[ready_queue[i]].busy=1;
				fu_float--;
				power_model(8,0,0);
				break;
			}
			if (resstat[ready_queue[i]].type==3 && fu_float_multdiv>0)
			{
				resstat[ready_queue[i]].busy=1;
				fu_float_multdiv--;
				power_model(8,0,0);
				break;
			}
		}

		//stop when no more instructions can be issued
		if (i==inst_in_ready_queue)
			break;

		//remove instruction from ready queue, and move everybody up
		for (j=i; j<inst_in_ready_queue-1; j++)
			ready_queue[j]=ready_queue[j+1];
		inst_in_ready_queue--;
	}	
}

//set_predicted_registers is called from dispatch
//it inserts value predictions made previously into an instruction
//since value predictions are made for instructions with a single integer output only,
//there's no ambiguity as to which register to forward
//also, we only need to handle rs and rt
void set_predicted_registers(int i)
{
	//don't use a predicted value (and run the risk of a misprediction)
	//if the instruction isn't critical
	if (resstat[i].guessed_critical!=1)
		return;

	//does rs depend on a value produced by another instruction?
	if (resstat[i].rs_available>=0)
	{
		//check if a value prediction was made by that instruction
		if (resstat[resstat[i].rs_available].value_prediction_made==1)
		{
			//if so, copy it to i's rs, allowing i to proceed 
			resstat[i].r_rs=resstat[resstat[i].rs_available].value_prediction;

			//mark that instruction as having its prediction used
			//(if its prediction was never used, we won't need to squash its misprediction)
			resstat[resstat[i].rs_available].value_prediction_used=1;

			resstat[i].rs_available=-1;
		}
	}
	//does rt depend on a value produced by another instruction?
	if (resstat[i].rt_available>=0)
	{
		//check if a value prediction was made by that instruction
		if (resstat[resstat[i].rt_available].value_prediction_made==1)
		{
			//if so, copy it to i's rt, allowing i to proceed 
			resstat[i].r_rt=resstat[resstat[i].rt_available].value_prediction;

			//mark that instruction as having its prediction used
			//(if its prediction was never used, we won't need to squash its misprediction)
			resstat[resstat[i].rt_available].value_prediction_used=1;

			resstat[i].rt_available=-1;
		}
	}
}

//set_resstat registers is called from dispatch.
//for a newly occupied reservation station i, it determines which res.stats (or ROB entries)
//source the operands it needs
//operand available has the following meaning: >=0 = # of res.stat sourcing the value,
//-1 = has value, not waiting on that operand, -2 = doesn't need that operand
void set_resstat_registers(int i)
{
        int rs=(ooo_fi_inst.inst_upper>>24);
        int rt=(ooo_fi_inst.inst_upper>>16)&0xff;
        int rd=(ooo_fi_inst.inst_upper>>8)&0xff;
        int fs=(ooo_fi_inst.inst_upper>>24);
        int ft=(ooo_fi_inst.inst_upper>>16)&0xff;
        int fd=(ooo_fi_inst.inst_upper>>8)&0xff;

	int rob_rt,rob_rd;
	int rob_fs,rob_ft,rob_fd;
	int j,k;

	//if this is an illegal instruction, rs, rt and all may be invalid
	//keep them in valid range so that they don't cause a fault
	rs=rs%(NUM_REGS);
	rt=rt%(NUM_REGS);
	rd=rd%(NUM_REGS);
	fs=fs%(NUM_REGS);
	ft=ft%(NUM_REGS);
	fd=fd%(NUM_REGS);

	//find out who is producing the registers that the instruction needs
	//if the instruction uses rs and rs is available, get rs	
	if (ooo_fi_inst.sinks_rs==1 && res_R[rs]==-1)
	{
		resstat[i].r_rs=R[rs];
		resstat[i].rs_available=-1;
		power_model(9,R[rs],0);
	}
	//if the instruction uses rs and rs is not available, get reservation station sourcing rs
	else if (ooo_fi_inst.sinks_rs==1 && res_R[rs]>=0)
	{
		resstat[i].rs_available=res_R[rs];
		resstat[i].rs_dreg=res_R_dreg[rs];
	}
	//if the instruction does not use rs, note that
	else if (ooo_fi_inst.sinks_rs==0)
		resstat[i].rs_available=-2;

	//repeat for rt
	if (ooo_fi_inst.sinks_rt==1 && res_R[rt]==-1)
	{
		resstat[i].r_rt=R[rt];
		resstat[i].rt_available=-1;
		power_model(9,R[rt],0);
	}
	else if (ooo_fi_inst.sinks_rt==1 && res_R[rt]>=0)
	{
		resstat[i].rt_available=res_R[rt];
		resstat[i].rt_dreg=res_R_dreg[rt];
	}
	else if (ooo_fi_inst.sinks_rt==0)
		resstat[i].rt_available=-2;

	//repeat for rt+1
	if (ooo_fi_inst.sinks_rt2==1 && res_R[rt+1]==-1)
	{
		resstat[i].r_rt2=R[rt+1];	
		resstat[i].rt2_available=-1;
		power_model(9,R[rt+1],0);
	}
	else if (ooo_fi_inst.sinks_rt2==1 && res_R[rt+1]>=0)
	{
		resstat[i].rt2_available=res_R[rt+1];
		resstat[i].rt2_dreg=res_R_dreg[rt+1];
	}
	else if (ooo_fi_inst.sinks_rt2==0)
		resstat[i].rt2_available=-2;

	//repeat for HI
	if (ooo_fi_inst.sinks_HI==1 && res_HI==-1)
	{
		resstat[i].r_HI=HI;	
		resstat[i].HI_available=-1;
		power_model(9,HI,0);
	}
	else if (ooo_fi_inst.sinks_HI==1 && res_HI>=0)
	{
		resstat[i].HI_available=res_HI;
	}
	else if (ooo_fi_inst.sinks_rt2==0)
		resstat[i].HI_available=-2;

	//repeat for LO
	if (ooo_fi_inst.sinks_LO==1 && res_LO==-1)
	{
		resstat[i].r_LO=LO;	
		resstat[i].LO_available=-1;
		power_model(9,LO,0);
	}
	else if (ooo_fi_inst.sinks_LO==1 && res_LO>=0)
	{
		resstat[i].LO_available=res_LO;
	}
	else if (ooo_fi_inst.sinks_LO==0)
		resstat[i].LO_available=-2;

	//repeat for FCC
	if (ooo_fi_inst.sinks_FCC==1 && res_FCC==-1)
	{
		resstat[i].r_FCC=FCC;	
		resstat[i].FCC_available=-1;
		power_model(9,FCC,0);
	}
	else if (ooo_fi_inst.sinks_FCC==1 && res_FCC>=0)
	{
		resstat[i].FCC_available=res_FCC;
	}
	else if (ooo_fi_inst.sinks_FCC==0)
		resstat[i].FCC_available=-2;

	//repeat for fs
	if (ooo_fi_inst.sinks_fs==1 && res_F[fs]==-1)
	{
		resstat[i].f_fs.l=F.l[fs];
		resstat[i].fs_available=-1;
		power_model(9,F.l[fs],0);
	}
	else if (ooo_fi_inst.sinks_fs==1 && res_F[fs]>=0)
	{
		resstat[i].fs_available=res_F[fs];
		resstat[i].fs_dreg=res_F_dreg[fs];
	}
	else if (ooo_fi_inst.sinks_fs==0)
		resstat[i].fs_available=-2;

	//repeat for fs+1
	if (ooo_fi_inst.sinks_fs2==1 && res_F[fs+1]==-1)
	{
		resstat[i].f_fs2.l=F.l[fs+1];	
		resstat[i].fs2_available=-1;
		power_model(9,F.l[fs+1],0);
	}
	else if (ooo_fi_inst.sinks_fs2==1 && res_F[fs+1]>=0)
	{
		resstat[i].fs2_available=res_F[fs+1];
		resstat[i].fs2_dreg=res_F_dreg[fs+1];
	}
	else if (ooo_fi_inst.sinks_fs2==0)
		resstat[i].fs2_available=-2;

	//repeat for ft
	if (ooo_fi_inst.sinks_ft==1 && res_F[ft]==-1)
	{
		resstat[i].f_ft.l=F.l[ft];
		resstat[i].ft_available=-1;
		power_model(9,F.l[ft],0);
	}
	else if (ooo_fi_inst.sinks_ft==1 && res_F[ft]>=0)
	{
		resstat[i].ft_available=res_F[ft];
		resstat[i].ft_dreg=res_F_dreg[ft];
	}
	else if (ooo_fi_inst.sinks_ft==0)
		resstat[i].ft_available=-2;

	//repeat for ft+1
	if (ooo_fi_inst.sinks_ft2==1 && res_F[ft+1]==-1)
	{
		resstat[i].f_ft2.l=F.l[ft+1];	
		resstat[i].ft2_available=-1;
		power_model(9,F.l[ft+1],0);
	}
	else if (ooo_fi_inst.sinks_ft2==1 && res_F[ft+1]>=0)
	{
		resstat[i].ft2_available=res_F[ft+1];
		resstat[i].ft2_dreg=res_F_dreg[ft+1];
	}
	else if (ooo_fi_inst.sinks_ft2==0)
		resstat[i].ft2_available=-2;

	//even though I've set an input reg as available, it may not be in R[].
	//I need to check through the instructions in the reorder buffer that already wrote back but haven't committed
	j=rob_tail;
	for (k=0; k<inst_in_rob; k++)
	{
		if (ROB[j].ready==0)
		{
			j++;
			if (j==reorder_buffer_size)
				j=0;
			continue;
		}

		rob_rt=(ROB[j].instruction.inst_upper>>16)&0xff;
		rob_rd=(ROB[j].instruction.inst_upper>>8)&0xff;
		rob_fs=(ROB[j].instruction.inst_upper>>24);
		rob_ft=(ROB[j].instruction.inst_upper>>16)&0xff;
		rob_fd=(ROB[j].instruction.inst_upper>>8)&0xff;

		//if the ROB instruction sources rt, check if I use its rt
		if (ROB[j].instruction.sources_rt==1)
		{
			if (resstat[i].rs_available==-1 && rs==rob_rt)
				resstat[i].r_rs=ROB[j].r_rtout;
			if (resstat[i].rt_available==-1 && rt==rob_rt)
				resstat[i].r_rt=ROB[j].r_rtout;
			if (resstat[i].rt2_available==-1 && rt+1==rob_rt)
				resstat[i].r_rt2=ROB[j].r_rtout;
		}
		//repeat for rt+1
		if (ROB[j].instruction.sources_rt2==1)
		{
			if (resstat[i].rs_available==-1 && rs==rob_rt+1)
				resstat[i].r_rs=ROB[j].r_rt2out;
			if (resstat[i].rt_available==-1 && rt==rob_rt+1)
				resstat[i].r_rt=ROB[j].r_rt2out;
			if (resstat[i].rt2_available==-1 && rt+1==rob_rt+1)
				resstat[i].r_rt2=ROB[j].r_rt2out;
		}
		//repeat for rd
		if (ROB[j].instruction.sources_rd==1)
		{
			if (resstat[i].rs_available==-1 && rs==rob_rd)
				resstat[i].r_rs=ROB[j].r_rdout;
			if (resstat[i].rt_available==-1 && rt==rob_rd)
				resstat[i].r_rt=ROB[j].r_rdout;
			if (resstat[i].rt2_available==-1 && rt+1==rob_rd)
				resstat[i].r_rt2=ROB[j].r_rdout;
		}
		//repeat for HI
		if (ROB[j].instruction.sources_HI==1 && resstat[i].HI_available==-1)
			resstat[i].r_HI=ROB[j].r_HIout;
		//repeat for LO
		if (ROB[j].instruction.sources_LO==1 && resstat[i].LO_available==-1)
			resstat[i].r_LO=ROB[j].r_LOout;
		//repeat for ra
		if (ROB[j].instruction.sources_ra==1)
		{
			if (resstat[i].rs_available==-1 && rs==31)
				resstat[i].r_rs=ROB[j].r_raout;
			if (resstat[i].rt_available==-1 && rt==31)
				resstat[i].r_rt=ROB[j].r_raout;
			if (resstat[i].rt2_available==-1 && rt+1==31)
				resstat[i].r_rt2=ROB[j].r_raout;
		}
		//repeat for FCC
		if (ROB[j].instruction.sources_FCC==1 && resstat[i].FCC_available==-1)
			resstat[i].r_FCC=ROB[j].r_FCCout;
		//repeat for fs
		if (ROB[j].instruction.sources_fs==1)
		{
			if (resstat[i].fs_available==-1 && fs==rob_fs)
				resstat[i].f_fs.l=ROB[j].f_fsout.l;
			if (resstat[i].fs2_available==-1 && fs+1==rob_fs)
				resstat[i].f_fs2.l=ROB[j].f_fsout.l;
			if (resstat[i].ft_available==-1 && ft==rob_fs)
				resstat[i].f_ft.l=ROB[j].f_fsout.l;
			if (resstat[i].ft2_available==-1 && ft+1==rob_fs)
				resstat[i].f_ft2.l=ROB[j].f_fsout.l;
		}
		//repeat for fs+1
		if (ROB[j].instruction.sources_fs2==1)
		{
			if (resstat[i].fs_available==-1 && fs==rob_fs+1)
				resstat[i].f_fs.l=ROB[j].f_fs2out.l;
			if (resstat[i].fs2_available==-1 && fs+1==rob_fs+1)
				resstat[i].f_fs2.l=ROB[j].f_fs2out.l;
			if (resstat[i].ft_available==-1 && ft==rob_fs+1)
				resstat[i].f_ft.l=ROB[j].f_fs2out.l;
			if (resstat[i].ft2_available==-1 && ft+1==rob_fs+1)
				resstat[i].f_ft2.l=ROB[j].f_fs2out.l;
		}
		//repeat for ft
		if (ROB[j].instruction.sources_ft==1)
		{
			if (resstat[i].fs_available==-1 && fs==rob_ft)
				resstat[i].f_fs.l=ROB[j].f_ftout.l;
			if (resstat[i].fs2_available==-1 && fs+1==rob_ft)
				resstat[i].f_fs2.l=ROB[j].f_ftout.l;
			if (resstat[i].ft_available==-1 && ft==rob_ft)
				resstat[i].f_ft.l=ROB[j].f_ftout.l;
			if (resstat[i].ft2_available==-1 && ft+1==rob_ft)
				resstat[i].f_ft2.l=ROB[j].f_ftout.l;
		}
		//repeat for ft+1
		if (ROB[j].instruction.sources_ft2==1)
		{
			if (resstat[i].fs_available==-1 && fs==rob_ft+1)
				resstat[i].f_fs.l=ROB[j].f_ft2out.l;
			if (resstat[i].fs2_available==-1 && fs+1==rob_ft+1)
				resstat[i].f_fs2.l=ROB[j].f_ft2out.l;
			if (resstat[i].ft_available==-1 && ft==rob_ft+1)
				resstat[i].f_ft.l=ROB[j].f_ft2out.l;
			if (resstat[i].ft2_available==-1 && ft+1==rob_ft+1)
				resstat[i].f_ft2.l=ROB[j].f_ft2out.l;
		}
		//repeat for fd
		if (ROB[j].instruction.sources_fd==1)
		{
			if (resstat[i].fs_available==-1 && fs==rob_fd)
				resstat[i].f_fs.l=ROB[j].f_fdout.l;
			if (resstat[i].fs2_available==-1 && fs+1==rob_fd)
				resstat[i].f_fs2.l=ROB[j].f_fdout.l;
			if (resstat[i].ft_available==-1 && ft==rob_fd)
				resstat[i].f_ft.l=ROB[j].f_fdout.l;
			if (resstat[i].ft2_available==-1 && ft+1==rob_fd)
				resstat[i].f_ft2.l=ROB[j].f_fdout.l;
		}
		//repeat for fd+1
		if (ROB[j].instruction.sources_fd2==1)
		{
			if (resstat[i].fs_available==-1 && fs==rob_fd+1)
				resstat[i].f_fs.l=ROB[j].f_fd2out.l;
			if (resstat[i].fs2_available==-1 && fs+1==rob_fd+1)
				resstat[i].f_fs2.l=ROB[j].f_fd2out.l;
			if (resstat[i].ft_available==-1 && ft==rob_fd+1)
				resstat[i].f_ft.l=ROB[j].f_fd2out.l;
			if (resstat[i].ft2_available==-1 && ft+1==rob_fd+1)
				resstat[i].f_ft2.l=ROB[j].f_fd2out.l;
		}

		j++;
		if (j==reorder_buffer_size)
			j=0;
	}


	//if the instruction is sourcing a register, mark that register with this res. stat.
	if (ooo_fi_inst.sources_rt==1)
	{
		res_R[rt]=i;
		res_R_dreg[rt]=0;
	}
	if (ooo_fi_inst.sources_rt2==1)
	{
		res_R[rt+1]=i;
		res_R_dreg[rt+1]=1;
	}
	if (ooo_fi_inst.sources_rd==1)
	{
		res_R[rd]=i;
		res_R_dreg[rd]=2;
	}
	if (ooo_fi_inst.sources_HI==1)
	{
		res_HI=i;
	}
	if (ooo_fi_inst.sources_LO==1)
	{
		res_LO=i;
	}
	if (ooo_fi_inst.sources_ra==1)
	{
		res_R[31]=i;
		res_R_dreg[31]=3;
	}
	if (ooo_fi_inst.sources_FCC==1)
	{
		res_FCC=i;
	}
	if (ooo_fi_inst.sources_fs==1)
	{
		res_F[fs]=i;
		res_F_dreg[fs]=0;
	}
	if (ooo_fi_inst.sources_fs2==1)
	{
		res_F[fs+1]=i;
		res_F_dreg[fs+1]=1;
	}
	if (ooo_fi_inst.sources_ft==1)
	{
		res_F[ft]=i;
		res_F_dreg[ft]=2;
	}
	if (ooo_fi_inst.sources_ft2==1)
	{
		res_F[ft+1]=i;
		res_F_dreg[ft+1]=3;
	}
	if (ooo_fi_inst.sources_fd==1)
	{
		res_F[fd]=i;
		res_F_dreg[fd]=4;
	}
	if (ooo_fi_inst.sources_fd2==1)
	{
		res_F[fd+1]=i;
		res_F_dreg[fd+1]=5;
	}

	//set all the possible outputs as unavailable
	resstat[i].rtout_available=0;
	resstat[i].rt2out_available=0;
	resstat[i].rdout_available=0;
	resstat[i].HIout_available=0;
	resstat[i].LOout_available=0;
	resstat[i].raout_available=0;
	resstat[i].FCCout_available=0;
	resstat[i].fsout_available=0;
	resstat[i].fs2out_available=0;
	resstat[i].ftout_available=0;
	resstat[i].ft2out_available=0;
	resstat[i].fdout_available=0;
	resstat[i].fd2out_available=0;

}

//dispatch requires two conditions:
//1) there must be a free reservation station, 2) there must be room in the reorder buffer
//if so, the inst. is removed from the dispatch queue and placed in the ROB and a reservation station
void ooo_dispatch()
{
	int i,j,targ;

	//dispatch as many instructions as possible
	while (1)
	{
	//if there are no instructions in the dispatch queue, don't dispatch, and don't stall fetch
	if (inst_in_dispatchqueue==0)
	{
		return;
	}

	//pull the first instruction out of the bottom of the dispatch queue
	ooo_fi_inst=copyinst(dispatch_queue[0]);

	//check if there is a free reservation station
	for (i=0; i<reservation_station_number; i++)
	{
		if (resstat[i].occupied==0)
			break;
	}

	//check whether there is room in the load/store queue if a load or store
	//otherwise, cannot dispatch
	if (ooo_fi_inst.rtype==4 && inst_in_lsq>=lsq_size)
		return;

	//check whether there is room in the reorder buffer
	if (i<reservation_station_number && inst_in_rob<reorder_buffer_size)
	{

		//assign the new instruction to reservation station i
		resstat[i].instruction=copyinst(ooo_fi_inst);
		resstat[i].occupied=1;
		resstat[i].PC=resstat[i].instruction.addr;
		resstat[i].rob_place=rob_head;
		resstat[i].busy=3;

		//assign a type & latency to the reservation station based on the instruction
		resstat[i].type=resstat[i].instruction.rtype;
		if (resstat[i].type==0)
			resstat[i].latency=integer_latency;
		else if (resstat[i].type==1)
			resstat[i].latency=integer_multdiv_latency;
		else if (resstat[i].type==2)
			resstat[i].latency=float_latency;
		else if (resstat[i].type==3)
			resstat[i].latency=float_multdiv_latency;
		else if (resstat[i].type==4)
			resstat[i].latency=ls_latency;
		else
			resstat[i].latency=1;

		resstat[i].time_left=resstat[i].latency;

		//criticality add-in
		//check if the instruction is likely to be on the critical path
		//if so, reduce its time_left by 1 cycle (smallest possible decrease)
		if (addonecycle==1)
			resstat[i].time_left+=1;

		resstat[i].guessed_critical=0;
		j=get_criticality(resstat[i].PC);
		if (j>0)
		{
			if (subtractcritical==1)
			{
				resstat[i].time_left--;
				if (resstat[i].time_left<0)
					fatal("Invalid instruction latency");
			}

			resstat[i].guessed_critical=j;
		}

		//assign the new instruction a place in the reorder buffer
		ROB[rob_head].instruction=copyinst(ooo_fi_inst);
		ROB[rob_head].res_stat=i;
		ROB[rob_head].ready=0;

		//CRITICALITY ADD-IN
		ROB[rob_head].cycles_in_ROB=0;
		ROB[rob_head].cycles_notready=0;
		ROB[rob_head].QOLDeverset=0;
		ROB[rob_head].QOLDDEPeverset=0;
		ROB[rob_head].ALOLDeverset=0;
		ROB[rob_head].QCONSeverset=0;

		inst_in_rob++;
		rob_head++;
		if (rob_head==reorder_buffer_size)
			rob_head=0;

		power_model(10,0,0);

		//set the input and output dependent registers for that instruction
		set_resstat_registers(i);

		//insert value predictions into input registers
		set_predicted_registers(i);

		//by default, no value prediction is made
		resstat[i].value_prediction_made=0;

		//initially, the value prediction has not been used by another instruction
		resstat[i].value_prediction_used=0;

		//keep lsq order - loads/stores must be executed in order
		if (resstat[i].type==4)
			resstat[i].lsq_order=inst_in_lsq++;

		//shift everybody in dispatch queue down one
		for (j=0; j<inst_in_dispatchqueue-1; j++)
			dispatch_queue[j]=copyinst(dispatch_queue[j+1]);
		inst_in_dispatchqueue--;

		//if the instruction is a j or a jal, can set next PC immediately
		if (ooo_fi_inst.type==6)
		{
       		 	targ=(ooo_fi_inst.inst_upper)&0x3ffffff;
			PC=targ<<2;

			inst_in_dispatchqueue=0;
		}
		//if the instruction is a jr or a jalr, need to get next address speculatively from BTB
		if (ooo_fi_inst.type==7)
		{
			PC=getBTB(ooo_fi_inst.addr);
			resstat[i].predicted_PC=PC;
			resstat[i].branch_prediction_made=3;
	
			inst_in_dispatchqueue=0;
		}
		//if the instruction is a conditional branch, need to predict the next address
		if (ooo_fi_inst.type==8)
		{
			j=get_branch_prediction(ooo_fi_inst.addr);
			//if branch predictor doesn't want to make a prediction, guess not taken
			if (j==0)
			{
				resstat[i].predicted_PC=resstat[i].PC+8;
				resstat[i].branch_prediction_made=0;
			}
			//if branch predictor guesses not taken, choose the next instruction (PC)
			if (j==1)
			{
				resstat[i].predicted_PC=resstat[i].PC+8;
				resstat[i].branch_prediction_made=1;
			}
			//if branch predictor guesses taken, get the next PC from the BTB
			if (j==2)
			{
				PC=getBTB(ooo_fi_inst.addr);
				resstat[i].predicted_PC=PC;
				resstat[i].branch_prediction_made=2;

				inst_in_dispatchqueue=0;
			}
		}

		//if the instruction is integer arithmetic or a load we can make a value prediction
		if (ooo_fi_inst.type==0 || ooo_fi_inst.type==4)
		{
			//only make a prediction if it has a single integer output in rt or rd
			if ((ooo_fi_inst.sources_rt==1 || ooo_fi_inst.sources_rd==1) && ooo_fi_inst.sources_rt2==0)
			{
				resstat[i].value_prediction_made=get_value_prediction_confidence(ooo_fi_inst.addr,ooo_fi_inst.type);
				resstat[i].value_prediction=get_value_prediction(ooo_fi_inst.addr,ooo_fi_inst.type);
			}
		}

	}
	//otherwise, no more room this cycle
	else
		return;
	}

}

//ooo_fetch gets instructions from memory and puts them in the dispatch queue
void ooo_fetch()
{
	int i;
	int inst_upper, inst_lower;
	int fetches=0;

	while (inst_in_dispatchqueue<dispatch_queue_size && fetches<fetches_per_cycle)
	{
		//start timer - make fetches take the number of cycles of icache latency
		if (time_left_fetch==0)
			time_left_fetch=icache_access_latency(PC);
		time_left_fetch--;
		//if the instruction fetch is still happening, can't read instruction this cycle
		if (time_left_fetch>0)
			break;

		//get a new instruction
                inst_lower=icache_read_word(PC);
                inst_upper=icache_read_word(PC+4);

		power_model(11,0,0);

		//get the name and attributes of the instruction
		ooo_dofetch(inst_upper, inst_lower);
		ooo_fi_inst.inst_upper=inst_upper;
		ooo_fi_inst.inst_lower=inst_lower;
		ooo_fi_inst.addr=PC;

		//copy new instruction to top of dispatch queue
		dispatch_queue[inst_in_dispatchqueue++]=copyinst(ooo_fi_inst);

		PC=PC+8;

		fetches++;
	}
}

void set_clock_speed(unsigned int c)
{
	float f;

	clock_speed=c;

	//cycle latency decrease factor - proportional to clock speed
	f=(float)clock_speed / (float)init_clock_speed;

	//set cache latencies proportionally to clock speed
	il1_cache_hit_latency = ceil(f*(float)init_il1_cache_hit_latency);
	il2_cache_hit_latency = ceil(f*(float)init_il2_cache_hit_latency);
	il2_cache_miss_latency = ceil(f*(float)init_il2_cache_miss_latency);
	dl1_cache_hit_latency = ceil(f*(float)init_dl1_cache_hit_latency);
	dl2_cache_hit_latency = ceil(f*(float)init_dl2_cache_hit_latency);
	dl2_cache_miss_latency = ceil(f*(float)init_dl2_cache_miss_latency);

	//recompute power
	power_model(-2,0,0);
}

//outorder_simulate repeatedly calls the 6 pipeline stages until the simulation ends
void outorder_simulate()
{
	int inst_lower;
	int inst_upper;
	int i;
	unsigned int P=0;

	//loop forever
	while(1==1)
	{
		//if a total number of instructions to run was specified at the command line
		//and that number has been run, end the simulation
		if (total_instructions>=0 && instruction_counter>=total_instructions)
			break;
		ooo_commit();
		ooo_writeback();
		ooo_execute();
		ooo_issue();
		ooo_dispatch();
		ooo_fetch();

		//model the power consumed this cycle
		power_model(-3,0,0);

		//CRITICALITY ADD-IN
		//examine reservation station and look for
		//criticality indicators
		update_criticality_flags();

		//if the user specified a # of instructions before debug, and they have elapsed,
		//print out the pipeline & registers each cycle and prompt the user between cycles
		if (total_instructions_until_debug>=0 && instruction_counter >=total_instructions_until_debug)
		{
			print_state();
			getchar();
		}
		//if the user specified to print out every "print_instruction"th instruction,
		//print out the name of that instruction and its address
		if (print_instruction>0)
		{
			if ((instruction_counter-1)>=P && instruction_counter>0)
			{
				printf("%u: %x %s\n",instruction_counter,ooo_last_instruction.addr,ooo_last_instruction.name);
				P+=print_instruction;
			}
		}

		//increment the cycle counter
		counter++;

		//add on the elapsed cycle time
		totaltime += 1/(float)clock_speed;
	}
}

//dooutordersim sets up the outoforder simulator and launches it
void dooutordersim()
{
	int i;
	//create the dump file, if needed
	if (dump_trace==1)
		regdump=fopen("trace.txt","w");

	//initialize the clock speed
	clock_speed=init_clock_speed;

	//set the GPRs to 0
	for (i=0; i<NUM_REGS; i++)
		R[i]=0;
	HI=0;
	LO=0;

	for (i=0; i<NUM_REGS; i++)
		F.l[i]=0;

	//set up initial registers
	PC=Prog_Entry;
	R[SP]=Param_Start;

	//initialize the dispatch queue
	dispatch_queue=(Inst *)calloc(dispatch_queue_size, sizeof(Inst));
	if (dispatch_queue==0)
		fatal("Cannot initialize the dispatch queue");
	inst_in_dispatchqueue=0;

	//initialize the ready queue
	ready_queue=(int *)calloc(reservation_station_number, sizeof(int));
	if (ready_queue==0)
		fatal("Cannot initialize the ready queue");

	//initialize the reservation stations
	resstat=(Reservation_Station *)calloc(reservation_station_number, sizeof(Reservation_Station));
	if (resstat==0)
		fatal("Cannot initialize the reservation stations");

	//make all the functional units available
	fu_integer=integer_ALUs;
	fu_integer_multdiv=integer_multdiv_ALUs;
	fu_float=float_ALUs;
	fu_float_multdiv=float_multdiv_ALUs;

	//empty the reservation stations
	for (i=0; i<reservation_station_number; i++)
	{
		resstat[i].occupied=0;
		//type of -1 means unoccupied: type not yet known
		//types are assigned at dispatch
		resstat[i].type=-1;
	}

	//clear all the registers of dependencies
	for (i=0; i<NUM_REGS; i++)
	{
		res_R[i]=-1;
		res_F[i]=-1;
	}
	res_HI=-1;
	res_LO=-1;
	res_FCC=-1;

	//initialize the reorder buffer
	ROB=(Reorder_Buffer_Entry *)calloc(reorder_buffer_size, sizeof(Reorder_Buffer_Entry));
	if (ROB==0)
		fatal("Cannot initialize the reorder buffer");

	//initialize the power simulator
	power_model(0,0,0);

	//initialize the value predictor
	initialize_value_predictor();

	//initialize the criticality predictor
	initialize_criticality();

	//start the simulator
	outorder_simulate();

	//close the dump file, if needed
	if (dump_trace==1)
		fclose(regdump);
}
//mysyscall.c
//Michael Black, 2006
//
//handles syscalls called by the benchmark program by passing them on to UNIX
//most of these handling routines have been taken directly from simplescalar

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/param.h>
#include <errno.h>
#include <time.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <signal.h>
#include <sys/file.h>
#include <sys/stat.h>
#include <sys/uio.h>
#include <setjmp.h>
#include <sys/times.h>
#include <limits.h>
#include <sys/ioctl.h>
#include "mysyscall.h"

//counts the number of syscalls
unsigned int syscall_counter=0;

//defined in mysim.c
extern unsigned int counter;
extern unsigned int PC;
extern int R[];

struct
{
	int my_flag;
	int local_flag;
} my_flag_table[]=
	{
		{0, O_RDONLY},
		{1, O_WRONLY},
		{2, O_RDWR},
		{0x8, O_APPEND},
		{0x200, O_CREAT},
		{0x400, O_TRUNC},
		{0x800, O_EXCL},
		{0x4000, O_NONBLOCK},
		{0x8000, O_NOCTTY},
		{0x2000, O_SYNC},
	};

#define MY_NFLAGS (sizeof(my_flag_table)/sizeof(my_flag_table[0]))

//general allow me to write data in various types to virtual memory
typedef union
{
	char c;
	struct mstatbuf mstat;
	struct my_timeeval mytime;
	struct my_timezone mytzone;
	struct my_rusage ru;
} general;

//handle_syscalls is called by the syscall instruction.  it actually performs the syscall
void handle_syscalls()
{
	syscall_counter++;

	switch(R[2])
	{
		//exit
		case 0x01:
			printf("\nSimulation completed.  Exiting normally\n");
			exit_routine();
			exit(0);
			break;

		//read
		case 0x03:
		{
			char *buf;
			if (!(buf=(char *) calloc(R[6],1)))
				fatal("Out of memory - can't do read");
			R[2]=read(R[4],buf,R[6]);
			if (R[2]!=-1)
				R[7]=0;
			else
			{
				R[2]=errno;
				R[7]=1;
			}
			memory_write_array(R[5],buf,R[2]);
			free(buf);
		}

		break;

		//write
		case 0x04:
		{
			char *buf;

			buf=calloc(R[6],1);
			memory_read_array(R[5], buf, R[6]);
			R[2]=write(R[4], buf, R[6]);
			if (R[2]==R[6])
				R[7]=0;
			else
			{
				R[2]=errno;
				R[7]=1;
			}
			free(buf);
		}
		break;

		//open
		case 0x05:
		{
			char buf[100];
			unsigned int i;
			int my_flags=R[5];
			int local_flags=0;

			for (i=0; i<MY_NFLAGS; i++)
			{
				if (my_flags & my_flag_table[i].my_flag)
				{
					my_flags&=~my_flag_table[i].my_flag;
					local_flags|=my_flag_table[i].local_flag;
				}
			}
			if (my_flags!=0)
				fatal("Incorrect flag value in open syscall");

			memory_read_array(R[4], buf, 100);
			R[2]=open(buf, local_flags, R[6]);
			if (R[2]!=-1)
				R[7]=0;
			else
			{
				R[2]=errno;
				R[7]=1;
			}
		}
		break;

		//close
		case 0x06:
		{
			if ((R[4]==0)||(R[4]==1)||(R[4]==2))
			{
				R[7]=0;
				break;
			}
			R[2]=close(R[4]);
			if (R[2]!=-1)
				R[7]=0;
			else
			{
				R[2]=errno;
				R[7]=1;
			}
		}
		break;

		//create
		case 0x08:
		{
			char buf[100];
			memory_read_array(R[4],buf,100);
			R[2]=creat(buf,R[5]);
			if (R[2]!=-1)
				R[7]=0;
			else
			{
				R[2]=errno;
				R[7]=1;
			}
		}
		break;

		//unlink
		case 0xA:
		{
			char buf[100];

			memory_read_array(R[4], buf, 100);
			R[2]=unlink(buf);

			if (R[2]!=-1)
				R[7]=0;
			else
			{
				R[2]=errno;
				R[7]=1;
			}
		}
		break;

		//brk
		case 0x11:
		{
			//checks whether heap space is left
			//ignore it
//			printf("Syscall 11 - brk ignored\n");
			R[2]=0;
			R[7]=0;
		}
		break;

		//lseek
		case 0x13:
			R[2]=lseek(R[4],R[5],R[6]);
			if (R[2]!=-1)
				R[7]=0;
			else
			{
				R[2]=errno;
				R[7]=1;
			}
			break;

		//getpid
		case 0x14:
			R[2]=getpid();
			if (R[2]!=-1)
				R[7]=0;
			else
			{
				R[2]=errno;
				R[7]=1;
			}	
			break;

		//getuid
		case 0x18:
			R[2]=getuid();
			R[3]=geteuid();
			if (R[2]!=-1)
				R[7]=0;
			else
			{
				R[2]=errno;
				R[7]=1;
			}
			break;

		//stat
		case 0x26:
		{
			char buf[100];
			general my_sbuf;
			struct stat sbuf;

			memory_read_array(R[4], buf, 100);
			R[2]=stat(buf, &sbuf);
			if (R[2]!=-1)
				R[7]=0;
			else
			{
				R[2]=errno;
				R[7]=1;
			}
			my_sbuf.mstat.mst_dev=sbuf.st_dev;
			my_sbuf.mstat.mst_ino=sbuf.st_ino;
			my_sbuf.mstat.mst_mode=sbuf.st_mode;
			my_sbuf.mstat.mst_nlink=sbuf.st_nlink;
			my_sbuf.mstat.mst_uid=sbuf.st_uid;
			my_sbuf.mstat.mst_gid=sbuf.st_gid;
			my_sbuf.mstat.mst_rdev=sbuf.st_rdev;
			my_sbuf.mstat.mst_size=sbuf.st_size;
			my_sbuf.mstat.mst_atime=sbuf.st_atime;
			my_sbuf.mstat.mst_mtime=sbuf.st_mtime;
			my_sbuf.mstat.mst_ctime=sbuf.st_ctime;
			my_sbuf.mstat.mst_blksize=sbuf.st_blksize;
			my_sbuf.mstat.mst_blocks=sbuf.st_blocks;

			memory_write_array(R[5], &my_sbuf.c, sizeof (struct mstatbuf));
		}
		break;

		//getgid
		case 0x2F:
			R[2]=getgid();
			R[3]=getegid();
			if (R[2]!=-1)
				R[7]=0;
			else
			{
				R[2]=errno;
				R[7]=1;
			}
			break;

		//ioctl
		case 0x36:
			//don't bother handling it
//			printf("Syscall 36 - ioctl ignored\n");
			R[2]=0;
			R[7]=0;
			break;

		//fstat
		case 0x3e:
		{
			general msbuf;
			struct stat sbuf;

			R[2]=fstat(R[4],&sbuf);
			if (R[2]!=-1)
				R[7]=0;
			else
			{
				R[2]=errno;
				R[7]=1;
			}
			msbuf.mstat.mst_dev = (sbuf.st_dev);
			msbuf.mstat.mst_ino = (sbuf.st_ino);
			msbuf.mstat.mst_mode = (sbuf.st_mode);
			msbuf.mstat.mst_nlink = (sbuf.st_nlink);
			msbuf.mstat.mst_uid = (sbuf.st_uid);
			msbuf.mstat.mst_gid = (sbuf.st_gid);
			msbuf.mstat.mst_rdev = (sbuf.st_rdev);
			msbuf.mstat.mst_size = (sbuf.st_size);
			msbuf.mstat.mst_atime = (sbuf.st_atime);
			msbuf.mstat.mst_mtime = (sbuf.st_mtime);
			msbuf.mstat.mst_ctime = (sbuf.st_ctime);
			msbuf.mstat.mst_blksize = (sbuf.st_blksize);
			msbuf.mstat.mst_blocks = (sbuf.st_blocks);

			memory_write_array(R[5], &msbuf.c, sizeof(struct mstatbuf));
		}
		break;

		//fcntl
		case 0x5c:
			R[2]=fcntl(R[4],R[5],R[6]);
			if (R[2]!=-1)
				R[7]=0;
			else
			{
				R[2]=errno;
				R[7]=1;
			}
			break;

		//setitimer
		case 0x6c:
			//Simplescalar doesn't handle this, so why should I?
//			printf("Syscall 6c - setitimer ignored\n");
			R[2]=0;
			R[7]=0;
			break;

		//getttimeofday
		case 0x74:
		{
			general my_tv;
			struct timeval tv, *tvp;
			general my_tz;
			struct timezone tz, *tzp;

			if (R[4]!=0)
			{
				memory_read_array(R[4],&my_tv.c,sizeof(struct my_timeeval));
				tv.tv_sec=my_tv.mytime.my_tv_sec;
				tv.tv_usec=my_tv.mytime.my_tv_usec;
				tvp=&tv;
			}
			else
				tvp=NULL;
			if (R[5]!=0)
			{
				memory_read_array(R[5],&my_tz.c,sizeof(struct my_timezone));
				tz.tz_minuteswest=my_tz.mytzone.my_tz_minuteswest;
				tz.tz_dsttime=my_tz.mytzone.my_tz_dsttime;
				tzp=&tz;
			}
			else
				tzp=NULL;
			R[2]=gettimeofday(tvp,tzp);
			if (R[2]!=-1)
				R[7]=0;
			else
			{
				R[2]=errno;
				R[7]=1;
			}
			if (R[4]!=0)
			{
				my_tv.mytime.my_tv_sec=tv.tv_sec;
				my_tv.mytime.my_tv_usec=tv.tv_usec;
				memory_write_array(R[4],&my_tv.c,sizeof(struct my_timeeval));
			}
			if (R[5]!=0)
			{
				my_tz.mytzone.my_tz_minuteswest=tz.tz_minuteswest;
				my_tz.mytzone.my_tz_dsttime=tz.tz_dsttime;
				memory_write_array(R[5],&my_tz.c,sizeof(struct my_timezone));
			}
		}
		break;


		//getrusage
		case 0x75:
		{
			struct rusage local_rusage;
			general rusage;

			R[2]=getrusage(R[4],&local_rusage);
			if (R[2]!=-1)
				R[7]=0;
			else
			{
				R[2]=errno;
				R[7]=1;
			}
			
			rusage.ru.my_ru_utime.my_tv_sec = local_rusage.ru_utime.tv_sec;
			rusage.ru.my_ru_utime.my_tv_usec = local_rusage.ru_utime.tv_usec;
			rusage.ru.my_ru_utime.my_tv_sec = (local_rusage.ru_utime.tv_sec);
			rusage.ru.my_ru_utime.my_tv_usec = (local_rusage.ru_utime.tv_usec);
			rusage.ru.my_ru_stime.my_tv_sec = local_rusage.ru_stime.tv_sec;
			rusage.ru.my_ru_stime.my_tv_usec = local_rusage.ru_stime.tv_usec;
			rusage.ru.my_ru_stime.my_tv_sec = (local_rusage.ru_stime.tv_sec);
			rusage.ru.my_ru_stime.my_tv_usec = (local_rusage.ru_stime.tv_usec);
			rusage.ru.my_ru_maxrss = (local_rusage.ru_maxrss);
			rusage.ru.my_ru_ixrss = (local_rusage.ru_ixrss);
			rusage.ru.my_ru_idrss = (local_rusage.ru_idrss);
			rusage.ru.my_ru_isrss = (local_rusage.ru_isrss);
			rusage.ru.my_ru_minflt = (local_rusage.ru_minflt);
			rusage.ru.my_ru_majflt = (local_rusage.ru_majflt);
			rusage.ru.my_ru_nswap = (local_rusage.ru_nswap);
			rusage.ru.my_ru_inblock = (local_rusage.ru_inblock);
			rusage.ru.my_ru_oublock = (local_rusage.ru_oublock);
			rusage.ru.my_ru_msgsnd = (local_rusage.ru_msgsnd);
			rusage.ru.my_ru_msgrcv = (local_rusage.ru_msgrcv);
			rusage.ru.my_ru_nsignals = (local_rusage.ru_nsignals);
			rusage.ru.my_ru_nvcsw = (local_rusage.ru_nvcsw);
			rusage.ru.my_ru_nivcsw = (local_rusage.ru_nivcsw);	

			memory_write_array(R[5],&rusage.c,sizeof(struct rusage));
		}
		break;

		default:
			printf("Syscall %x called\n",R[2]);
			fatal("Syscall not handled");
	}
}
//myvpred.c
//Michael Black, 2006

#include <math.h>
#include <stdlib.h>

int value_predict=0;
int load_predict=0;
int context_stride_hybrid=0;

extern unsigned int instruction_counter;

unsigned int vp_correct_made=0;
unsigned int vp_correct_notmade=0;
unsigned int vp_wrong_made=0;
unsigned int vp_wrong_notmade=0;

//flags
int value_hash_table_size=4096;
int value_context_history_size=32;
int value_threshold=4;
int weight_cap=1023;
int past_value_number=32;
int confidence_threshold=100;
int alpha=1;
int value_cache_size=1024;
int tally_perceptron_weights=0;

//weight tallying
#define WEIGHT_MAG 1024
unsigned int weight_distribution[2*WEIGHT_MAG];
float weight_averages[2*WEIGHT_MAG];
unsigned int weight_averages_count[2*WEIGHT_MAG];
float weight_accuracies[2*WEIGHT_MAG];
unsigned int weight_accuracies_count[2*WEIGHT_MAG];

//STRIDE PREDICTOR

typedef struct
{
	int stride_last;
	int stride_last2;
	int confidence;
} stride_entry;

stride_entry *value_stride_table;

void initialize_stride()
{
	int i;
	value_stride_table=calloc(value_hash_table_size, sizeof(stride_entry));
	if (value_stride_table==0)
		fatal("Not enough memory for value predictor");
	for (i=0; i<value_hash_table_size; i++)
	{
		value_stride_table[i].stride_last=0;
		value_stride_table[i].stride_last2=0;
		value_stride_table[i].confidence=0;
	}
}

int get_stride(unsigned int PC)
{
	int entry;

	entry=(PC>>3)%value_hash_table_size;

	return value_stride_table[entry].stride_last-value_stride_table[entry].stride_last2+value_stride_table[entry].stride_last;
}

int get_stride_confidence(unsigned int PC)
{
	int entry;
	return 1;
	entry=(PC>>3)%value_hash_table_size;

	if (value_stride_table[entry].confidence>=value_threshold)
		return 1;
	else
		return 0;
}

void train_stride(unsigned int PC, int actual)
{
	int entry;

	entry=(PC>>3)%value_hash_table_size;

	value_stride_table[entry].stride_last2=value_stride_table[entry].stride_last;
	value_stride_table[entry].stride_last=actual;
}

void train_stride_confidence(unsigned int PC, int actual, int predicted)
{
	int entry;
	return;
	entry=(PC>>3)%value_hash_table_size;

	if (actual==predicted)
		value_stride_table[entry].confidence++;
	else
		value_stride_table[entry].confidence--;

	if (value_stride_table[entry].confidence>value_threshold)
		value_stride_table[entry].confidence=value_threshold;
	if (value_stride_table[entry].confidence<0)
		value_stride_table[entry].confidence=0;
}

//CONTEXT PREDICTOR - Wang and Franklin

typedef struct
{
	int tag;
	int* past_value;
	int* LRU;
	char* value_history_pattern;

	int* input_value_history;
	int* input_pc_history;

	int** weight;

	int** correct;
	int total_correct;

	int stride_last;
	int stride_last2;
	int stride_confidence;

	int* entire_value_history;
	int entire_value_history_entries;

	int confidence;
} vht_entry;

vht_entry *value_history_table;

typedef struct
{
	char* confidence;

	int** weight;
} pht_entry;

pht_entry *pattern_history_table;

void tally_weights(int entry, int wxsize, int wysize)
{

	int wd[2*WEIGHT_MAG];
	int i,j;

	if (tally_perceptron_weights==0)
		return;

	for (i=0; i<2*WEIGHT_MAG; i++)
		wd[i]=0;


	//if tag is not -1, tally the weights
	if (value_history_table[entry].tag>-1)
	{
		//first tally the total distribution
		for (i=0; i<wxsize; i++)
		{
			for (j=0; j<wysize; j++)
			{
				if (value_history_table[entry].weight[i][j]>=WEIGHT_MAG-1)
				{
					weight_distribution[2*WEIGHT_MAG-1]++;
					wd[2*WEIGHT_MAG-1]++;
				}
				else if (value_history_table[entry].weight[i][j]<=-WEIGHT_MAG)
				{	
					weight_distribution[0]++;
					wd[0]++;
				}
				else
				{
					weight_distribution[value_history_table[entry].weight[i][j]+WEIGHT_MAG]++;
					wd[value_history_table[entry].weight[i][j]+WEIGHT_MAG]++;
				}
			}
		}

		//next tally the percentage of weights at each value for this entry
		for (i=0; i<2*WEIGHT_MAG; i++)
		{
			weight_averages[i] += (float)wd[i]/(float)(wxsize*wysize);
			weight_averages_count[i]++;
		}
		//finally tally the accuracies of the weights at each magnitude
		for (i=0; i<wxsize; i++)
		{
			for (j=0; j<wysize; j++)
			{
				if (value_history_table[entry].weight[i][j]>=WEIGHT_MAG-1)
				{
					weight_accuracies[2*WEIGHT_MAG-1]+=(float)(value_history_table[entry].correct[i][j])/(float)(value_history_table[entry].total_correct);
					weight_accuracies_count[2*WEIGHT_MAG-1]++;
				}
				else if (value_history_table[entry].weight[i][j]<=-WEIGHT_MAG)
				{	
					weight_accuracies[0]+=(float)(value_history_table[entry].correct[i][j])/(float)(value_history_table[entry].total_correct);
					weight_accuracies_count[0]++;
				}
				else
				{
					weight_accuracies[value_history_table[entry].weight[i][j]+WEIGHT_MAG]+=(float)(value_history_table[entry].correct[i][j])/(float)(value_history_table[entry].total_correct);
					weight_accuracies_count[value_history_table[entry].weight[i][j]+WEIGHT_MAG]++;
				}
			}
		}
	}
	//reset to 0
	for (i=0; i<wxsize; i++)
		for (j=0; j<wysize; j++)
			value_history_table[entry].correct[i][j]=0;
	value_history_table[entry].total_correct=0;

}

void initialize_weight_tally()
{
	int i;

	if (tally_perceptron_weights==0)
		return;

	for (i=0; i<WEIGHT_MAG*2; i++)
	{
		weight_distribution[i]=0;
		weight_accuracies[i]=0;
		weight_accuracies_count[i]=0;
		weight_averages[i]=0;
		weight_averages_count[i]=0;
	}
}

void dump_weight_tally()
{
	int i;

	if (tally_perceptron_weights==0)
		return;

	for (i=0; i<value_hash_table_size; i++)
	{
		if (value_history_table[i].tag>-1)
		{
			tally_weights(i,log2(past_value_number),value_context_history_size);		//will work only for -vp7
		}
	}

	printf("\n");
	printf("Perceptron weight statistics:\n");
	printf("Weight distribution:  total quantity of weights at each magnitude\n");
	for (i=0; i<WEIGHT_MAG*2; i++)
	{
		printf("%i\t%u\n",i-WEIGHT_MAG,weight_distribution[i]);
	}
	printf("\nWeight averages:  average percentage of weights at each magnitude over every entry\n");
	for (i=0; i<WEIGHT_MAG*2; i++)
	{
		printf("%i\t%f\n",i-WEIGHT_MAG,weight_averages[i]/(float)weight_averages_count[i]);
	}
	printf("\nWeight accuracies:  average accuracy of input for weight at each magnitude over every entry\n");
	for (i=0; i<WEIGHT_MAG*2; i++)
	{
		if (weight_accuracies_count[i]==0)
			printf("%i\t%f\n",i-WEIGHT_MAG,weight_accuracies[i]);
		else
			printf("%i\t%f\n",i-WEIGHT_MAG,weight_accuracies[i]/(float)weight_accuracies_count[i]);
	}
}

void set_weight_correctness(char input, int* weight, int* correct, int predicted, int actual)
{
	if (tally_perceptron_weights==0)
		return;

	if ((input==1)==(actual==1) && *weight>0)
		*correct=*correct+1;
	else if ((input==1)!=(actual==1) && *weight<0)
		*correct=*correct+1;
}

void dump_value_history(int entry)
{
	int i;

	if (value_history_table[entry].tag==-1)
		return;

	printf("Values for entry %x:\n",entry);
	for (i=0; i<value_history_table[entry].entire_value_history_entries; i++)
		printf("%i\n",value_history_table[entry].entire_value_history[i]);

	value_history_table[entry].entire_value_history_entries=0;
}
void initialize_value_history(int entry)
{
	value_history_table[entry].entire_value_history=calloc(100,4);
	value_history_table[entry].entire_value_history_entries=0;
}
void save_value_history(int entry, int value)
{
	if (value_history_table[entry].entire_value_history_entries<100)
		value_history_table[entry].entire_value_history[value_history_table[entry].entire_value_history_entries++]=value;
}
void dump_all_value_history()
{
	int i;
	for (i=0; i<value_hash_table_size; i++)
	{
		if (value_history_table[i].tag!=-1)
			dump_value_history(i);
	}
}

void initialize_context()
{
	int i,j;

	value_history_table=(vht_entry*)calloc(value_hash_table_size, sizeof(vht_entry));
	pattern_history_table=(pht_entry*)calloc((int)pow(2,value_context_history_size*log2(value_context_history_size)), sizeof(pht_entry));
	if (value_history_table==0 || pattern_history_table==0)
		fatal("Not enough memory for value predictor");
	for (i=0; i<value_hash_table_size; i++)
	{
		value_history_table[i].tag=-1;
		value_history_table[i].past_value=(int*)calloc(value_context_history_size, 4);
		value_history_table[i].LRU=(int*)calloc(value_context_history_size, 4);
		value_history_table[i].value_history_pattern=(char*)calloc(value_context_history_size, 1);

		if (value_history_table[i].past_value==0 || value_history_table[i].LRU==0 || value_history_table[i].value_history_pattern==0)
			fatal("Not enough memory for value predictor");

		for (j=0; j<value_context_history_size; j++)
		{
			value_history_table[i].past_value[j]=0;
			value_history_table[i].LRU[j]=0;
			value_history_table[i].value_history_pattern[j]=0;
		}
		value_history_table[i].stride_last=0;
		value_history_table[i].stride_last2=0;
		value_history_table[i].stride_confidence=0;
	}
	for (i=0; i<(int)pow(2,value_context_history_size*log2(value_context_history_size)); i++)
	{
		pattern_history_table[i].confidence=(char*)calloc(value_context_history_size, 1);

		if (pattern_history_table[i].confidence==0)
			fatal("Not enough memory for value predictor");

		for (j=0; j<value_context_history_size; j++)
			pattern_history_table[i].confidence[j]=0;
	}
}

int get_context(unsigned int PC)
{
	int entry;
	int pht_entry;
	int i,j;

	entry=(PC>>3)%value_hash_table_size;

	//get pattern history entry by concatenating the history
	pht_entry=0;
	for (i=0; i<value_context_history_size; i++)
	{
		pht_entry=pht_entry*log2(value_context_history_size);
		pht_entry+=value_history_table[entry].value_history_pattern[i];
	}
	//use pht to choose a past value
	j=0;
	for (i=1; i<value_context_history_size; i++)
	{
		if (pattern_history_table[pht_entry].confidence[i]>pattern_history_table[pht_entry].confidence[j])
			j=i;
	}

	if (context_stride_hybrid==0)
		return value_history_table[entry].past_value[j];

	//if a hybrid and confidence is low, return the stride
	if (pattern_history_table[pht_entry].confidence[j]<value_threshold)
		return value_history_table[entry].stride_last-value_history_table[entry].stride_last2+value_history_table[entry].stride_last;
	else
		return value_history_table[entry].past_value[j];
}

int get_context_confidence(unsigned int PC)
{
	int entry,i,j,pht_entry;

	entry=(PC>>3)%value_hash_table_size;

	return 1;

	if (value_history_table[entry].tag!=(PC>>3)/value_hash_table_size)
		return 0;

	//get pattern history entry by concatenating the history
	pht_entry=0;
	for (i=0; i<value_context_history_size; i++)
	{
		pht_entry=pht_entry*log2(value_context_history_size);
		pht_entry+=value_history_table[entry].value_history_pattern[i];
	}

	//use pht to choose a past value
	j=0;
	for (i=1; i<value_context_history_size; i++)
	{
		if (pattern_history_table[pht_entry].confidence[i]>pattern_history_table[pht_entry].confidence[j])
			j=i;
	}

	//take if confidence exceeds a threshold
	if (pattern_history_table[pht_entry].confidence[j]>=value_threshold)
		return 1;
	else if (context_stride_hybrid==1 && value_history_table[entry].stride_confidence>=value_threshold)
		return 1;
	else
		return 0;
}

void train_context(unsigned int PC, int actual, int predicted)
{
	int entry,i,j,pht_entry;

	entry=(PC>>3)%value_hash_table_size;

	//set tag to current
	if (value_history_table[entry].tag!=(PC>>3)/value_hash_table_size)
	{
		value_history_table[entry].tag=(PC>>3)/value_hash_table_size;
	}

	//get the appropriate pattern history table entry
	pht_entry=0;
	for (i=0; i<value_context_history_size; i++)
	{
		pht_entry=pht_entry*log2(value_context_history_size);
		pht_entry+=value_history_table[entry].value_history_pattern[i];
	}

	//find the pht confidence entry that matches the actual value and change it appropriately
	for (i=0; i<value_context_history_size; i++)
	{
		if(value_history_table[entry].past_value[i]==actual)
		{
			pattern_history_table[pht_entry].confidence[i]++;
			if (pattern_history_table[pht_entry].confidence[i]>value_threshold)
				pattern_history_table[pht_entry].confidence[i]=value_threshold;
		}
		else
		{
			pattern_history_table[pht_entry].confidence[i]--;
			if (pattern_history_table[pht_entry].confidence[i]<0)
				pattern_history_table[pht_entry].confidence[i]=0;
		}
	}

	//have we seen this value before?  if not, swap it in
	for (i=0; i<value_context_history_size; i++)
	{
		if (value_history_table[entry].past_value[i]==actual)
			break;
	}
	if (i==value_context_history_size)
	{
		i=0;
		for (j=1; j<value_context_history_size; j++)
		{
			if (value_history_table[entry].LRU[j]>value_history_table[entry].LRU[i])
				i=j;
		} 
		value_history_table[entry].past_value[i]=actual;
	}
	//update LRU info
	for (j=0; j<value_context_history_size; j++)
	{
		if (i==j)
			continue;
		if (value_history_table[entry].LRU[j]<=value_history_table[entry].LRU[i])
			value_history_table[entry].LRU[j]++;
	}
	value_history_table[entry].LRU[i]=0;

	//update pattern history - shift everyone over
	for (j=value_context_history_size-1; j>0; j--)
		value_history_table[entry].value_history_pattern[j]=value_history_table[entry].value_history_pattern[j-1];
	value_history_table[entry].value_history_pattern[0]=i;

	//if hybrid, update the stride and its confidence
	if (context_stride_hybrid==0)
		return;

	//would the stride have been right (regardless of whether it was used)?
	if  (actual==value_history_table[entry].stride_last-value_history_table[entry].stride_last2+value_history_table[entry].stride_last)
	{
		value_history_table[entry].stride_confidence++;
		if (value_history_table[entry].stride_confidence>value_threshold)
			value_history_table[entry].stride_confidence=value_threshold;
	}
	else
	{
		value_history_table[entry].stride_confidence--;
		if (value_history_table[entry].stride_confidence<0)
			value_history_table[entry].stride_confidence=0;
	}
	//update the stride
	value_history_table[entry].stride_last2=value_history_table[entry].stride_last;
	value_history_table[entry].stride_last=actual;
}

//PERCEPTRON APPROACHES:

//POOL OF PAST VALUES (CONTEXT)

//BIGGEST SUM METHOD
//I'm modeling this off the Wang Franklin context predictor
//Value history table - n entries, each entry has a perceptron associated with it
//The past value with the highest perceptron sum is chosen
//Each perceptron has an input for each past value for each history position (n^2 storage)
//Training 1 = that value was seen in that position; 0 = that value was not seen in that position

void initialize_perceptron_context1()
{
	int i,j,k;

	value_history_table=calloc(value_hash_table_size, sizeof(vht_entry));
	if (value_history_table==0)
		fatal("Not enough memory for value predictor");

	for (i=0; i<value_hash_table_size; i++)
	{
		value_history_table[i].tag=-1;
		value_history_table[i].past_value=calloc(past_value_number, 4);
		value_history_table[i].LRU=calloc(past_value_number, 1);
		value_history_table[i].value_history_pattern=calloc(value_context_history_size, 1);

		value_history_table[i].weight=(int**)calloc(past_value_number, sizeof(int*));
		for (j=0; j<past_value_number; j++)
			value_history_table[i].weight[j]=(int*)calloc(past_value_number*value_context_history_size+1, 4);

		if (value_history_table[i].past_value==0 || value_history_table[i].LRU==0 || value_history_table[i].value_history_pattern==0)
			fatal("Not enough memory for value predictor");

		for (j=0; j<past_value_number; j++)
		{
			value_history_table[i].past_value[j]=0;
			value_history_table[i].LRU[j]=0;
		}
		for (j=0; j<value_context_history_size; j++)
		{
			value_history_table[i].value_history_pattern[j]=0;
		}

		for (j=0; j<past_value_number; j++)
			for (k=0; k<past_value_number*value_context_history_size+1; k++)
				value_history_table[i].weight[j][k]=0;
	}
}

int get_perceptron_context1(unsigned int PC)
{
	int entry;
	int i,j,k;
	int sum=0;
	int maxsum, bestvalue=0;

	entry=(PC>>3)%value_hash_table_size;

	maxsum=-weight_cap-1;

	//feed history into perceptrons
	for (k=0; k<past_value_number; k++)
	{
		for (i=0; i<value_context_history_size; i++)
		{
			for (j=0; j<past_value_number; j++)
			{
				if (value_history_table[entry].value_history_pattern[i]==j)
					sum+=value_history_table[entry].weight[k][i*past_value_number+j+1] * 1;
				else
					sum+=value_history_table[entry].weight[k][i*past_value_number+j+1] * -1;
			}
		}
		sum+=value_history_table[entry].weight[k][0];

		if (sum>maxsum)
		{
			maxsum=sum;
			bestvalue=k;
		}
	}

	return value_history_table[entry].past_value[bestvalue];
}

int get_perceptron_context_confidence1(unsigned int PC)
{
	int entry,i,j,k,sum,maxsum;

	entry=(PC>>3)%value_hash_table_size;

	return 1;

	if (value_history_table[entry].tag!=(PC>>3)/value_hash_table_size)
		return 0;

	//feed history into perceptrons
	for (k=0; k<past_value_number; k++)
	{
		for (i=0; i<value_context_history_size; i++)
		{
			for (j=0; j<past_value_number; j++)
			{
				if (value_history_table[entry].value_history_pattern[i]==j)
					sum+=value_history_table[entry].weight[k][i*past_value_number+j+1] * 1;
				else
					sum+=value_history_table[entry].weight[k][i*past_value_number+j+1] * -1;
			}
		}
		sum+=value_history_table[entry].weight[k][0];

		if (sum>maxsum)
			maxsum=sum;
	}

	if (maxsum>confidence_threshold)
		return 1;
	else
		return 0;
}

void train_perceptron_context1(unsigned int PC, int actual, int predicted)
{
	int entry,i,j,k,l;

	entry=(PC>>3)%value_hash_table_size;

	//set tag to current
	if (value_history_table[entry].tag!=(PC>>3)/value_hash_table_size)
		value_history_table[entry].tag=(PC>>3)/value_hash_table_size;

	//have we seen this value before?  if not, swap it in
	for (i=0; i<past_value_number; i++)
	{
		if (value_history_table[entry].past_value[i]==actual)
			break;
	}
	if (i==past_value_number)
	{
		i=0;
		for (j=1; j<past_value_number; j++)
		{
			if (value_history_table[entry].LRU[j]>value_history_table[entry].LRU[i])
				i=j;
		} 
		value_history_table[entry].past_value[i]=actual;
	}
	//update LRU info
	for (j=0; j<past_value_number; j++)
	{
		if (i==j)
			continue;
		if (value_history_table[entry].LRU[j]<=value_history_table[entry].LRU[i])
			value_history_table[entry].LRU[j]++;
	}
	value_history_table[entry].LRU[i]=0;

	//train perceptron
	//train perceptron i in the positive
	for (k=0; k<value_context_history_size; k++)
	{
		for (j=0; j<past_value_number; j++)
		{
			if (value_history_table[entry].value_history_pattern[k]==j)
			{
				value_history_table[entry].weight[i][k*past_value_number+j+1]+=3;
				if (value_history_table[entry].weight[i][k*past_value_number+j+1]>weight_cap)
					value_history_table[entry].weight[i][k*past_value_number+j+1]=weight_cap;
			}
			else
			{
				value_history_table[entry].weight[i][k*past_value_number+j+1]-=3;
				if (value_history_table[entry].weight[i][k*past_value_number+j+1]<-weight_cap)
					value_history_table[entry].weight[i][k*past_value_number+j+1]=-weight_cap;
			}
		}
		value_history_table[entry].weight[i][0]+=3;
		if (value_history_table[entry].weight[i][0]>weight_cap)
			value_history_table[entry].weight[i][0]=weight_cap;
	}
	
	//train everyone else in the negative
	for (l=0; l<past_value_number; l++)
	{
		if (l==i)
			continue;

		for (k=0; k<value_context_history_size; k++)
		{
			for (j=0; j<past_value_number; j++)
			{
				if (value_history_table[entry].value_history_pattern[k]==j)
				{
					value_history_table[entry].weight[l][k*past_value_number+j+1]-=1;
					if (value_history_table[entry].weight[l][k*past_value_number+j+1]<-weight_cap)
						value_history_table[entry].weight[l][k*past_value_number+j+1]=-weight_cap;
				}
				else
				{
					value_history_table[entry].weight[l][k*past_value_number+j+1]+=1;
					if (value_history_table[entry].weight[l][k*past_value_number+j+1]>weight_cap)
						value_history_table[entry].weight[l][k*past_value_number+j+1]=weight_cap;
				}
			}
			value_history_table[entry].weight[l][0]-=1;
			if (value_history_table[entry].weight[l][0]<-weight_cap)
				value_history_table[entry].weight[l][0]=-weight_cap;
		}
	}

	//update pattern history - shift everyone over
	for (j=value_context_history_size-1; j>0; j--)
		value_history_table[entry].value_history_pattern[j]=value_history_table[entry].value_history_pattern[j-1];
	value_history_table[entry].value_history_pattern[0]=i;
}


//HYPERPERCEPTRON METHOD
//Perceptron for each bit of past value index
//Combination of perceptron results gives predicted value index

void initialize_perceptron_context2()
{
	int i,j,k;

	value_history_table=(vht_entry*)calloc(value_hash_table_size, sizeof(vht_entry));
	if (value_history_table==0)
		fatal("Not enough memory for value predictor");

	for (i=0; i<value_hash_table_size; i++)
	{
		value_history_table[i].tag=-1;
		value_history_table[i].past_value=(int*)calloc(past_value_number, sizeof(int));
		value_history_table[i].LRU=(int*)calloc(past_value_number, sizeof(int));
		value_history_table[i].value_history_pattern=(char*)calloc(value_context_history_size, sizeof(char));

		value_history_table[i].weight=(int**)calloc(log2(past_value_number), sizeof(int*));
		if (value_history_table[i].weight==0)
			fatal("Not enough memory for value predictor");
		for (j=0; j<log2(past_value_number); j++)
		{
			value_history_table[i].weight[j]=(int*)calloc(value_context_history_size+1, sizeof(int));
			if (value_history_table[i].weight[j]==0)
				fatal("Not enough memory for value predictor");
		}

		if (value_history_table[i].past_value==0 || value_history_table[i].LRU==0 || value_history_table[i].value_history_pattern==0)
			fatal("Not enough memory for value predictor");

		for (j=0; j<past_value_number; j++)
		{
			value_history_table[i].past_value[j]=0;
			value_history_table[i].LRU[j]=0;
		}
		for (j=0; j<value_context_history_size; j++)
		{
			value_history_table[i].value_history_pattern[j]=0;
		}

		for (j=0; j<log2(past_value_number); j++)
			for (k=0; k<value_context_history_size+1; k++)
				value_history_table[i].weight[j][k]=0;
	}
}

int get_perceptron_context2(unsigned int PC)
{
	int entry;
	int i,j,k;
	int sum=0;
	int index=0;

	entry=(PC>>3)%value_hash_table_size;

	//feed history into perceptrons
	for (k=0; k<log2(past_value_number); k++)
	{
		sum=0;
		for (i=0; i<value_context_history_size; i++)
		{
			if ((value_history_table[entry].value_history_pattern[i]>>k)%2==1)
				sum+=value_history_table[entry].weight[k][i+1] * 1;
			else
				sum+=value_history_table[entry].weight[k][i+1] * -1;
		}
		sum+=value_history_table[entry].weight[k][0];

		if (sum>0)
			index+=(int)pow(2,k);
		else
			index+=0;
	}

	return value_history_table[entry].past_value[index];
}

int get_perceptron_context_confidence2(unsigned int PC)
{
	int entry,i,j,k,sum,maxsum;

	entry=(PC>>3)%value_hash_table_size;

	return 1;
}

void train_perceptron_context2(unsigned int PC, int actual, int predicted)
{
	int entry,i,j,k,l;
	int e,sum;

	entry=(PC>>3)%value_hash_table_size;

	//set tag to current
	if (value_history_table[entry].tag!=(PC>>3)/value_hash_table_size)
		value_history_table[entry].tag=(PC>>3)/value_hash_table_size;

	//have we seen this value before?  if not, swap it in
	for (i=0; i<past_value_number; i++)
	{
		if (value_history_table[entry].past_value[i]==actual)
			break;
	}
	if (i==past_value_number)
		i=-1;

	//train perceptron

	//if value wasn't seen before, don't train

	if (i>=0)
	{
	for (k=0; k<log2(past_value_number); k++)
	{
		sum=0;
		//first get predicted value for this bit
		for (j=0; j<value_context_history_size; j++)
		{
			if ((value_history_table[entry].value_history_pattern[j]>>k)%2==1)
				sum+=value_history_table[entry].weight[k][j+1] * 1;
			else
				sum+=value_history_table[entry].weight[k][j+1] * -1;
		}
		sum+=value_history_table[entry].weight[k][0];

		//compute error
		e=0;
		//actual==1, predicted==0
		if (sum<=0 && (i>>k)%2==1)
			e=1;
		//actual==0, predicted==1
		else if (sum>0 && (i>>k)%2==0)
			e=-1;

		//train bit
		for (j=0; j<value_context_history_size; j++)
		{
			train_perceptron_weight(((value_history_table[entry].value_history_pattern[j]>>k)%2==1)? 1:-1,&value_history_table[entry].weight[k][j+1], (sum>0)? 1:0, (i>>k)%2);
		}

		//train bias
		train_perceptron_weight(1,&value_history_table[entry].weight[k][0], (sum>0)? 1:0, (i>>k)%2);
	}
	}

	if (i<0)
		i=0;

	//update pattern history - shift everyone over
	for (j=value_context_history_size-1; j>0; j--)
		value_history_table[entry].value_history_pattern[j]=value_history_table[entry].value_history_pattern[j-1];
	value_history_table[entry].value_history_pattern[0]=i;

	//shift values over
	for (j=past_value_number-1; j>0; j--)
		value_history_table[entry].past_value[j]=value_history_table[entry].past_value[j-1];
	value_history_table[entry].past_value[0]=actual;
}

//THOMAS AND KAELI, WITH HYPERPERCEPTRONS

void initialize_perceptron_context3()
{
	int i,j;

	value_history_table=(vht_entry*)calloc(value_hash_table_size, sizeof(vht_entry));
	pattern_history_table=(pht_entry*)calloc((int)pow(past_value_number,past_value_number), sizeof(pht_entry));
	if (value_history_table==0 || pattern_history_table==0)
		fatal("Not enough memory for value predictor");

	for (i=0; i<value_hash_table_size; i++)
	{
		value_history_table[i].tag=-1;
		value_history_table[i].past_value=(int*)calloc(past_value_number, sizeof(int));
		value_history_table[i].LRU=(int*)calloc(past_value_number, sizeof(int));
		value_history_table[i].value_history_pattern=(char*)calloc(value_context_history_size, sizeof(char));

		if (value_history_table[i].past_value==0 || value_history_table[i].LRU==0 || value_history_table[i].value_history_pattern==0)
			fatal("Not enough memory for value predictor");

		for (j=0; j<past_value_number; j++)
		{
			value_history_table[i].past_value[j]=0;
			value_history_table[i].LRU[j]=0;
			value_history_table[i].value_history_pattern[j]=0;
		}
		value_history_table[i].stride_last=0;
		value_history_table[i].stride_last2=0;
		value_history_table[i].stride_confidence=0;
	}

	for (i=0; i<(int)pow(past_value_number,past_value_number); i++)
	{
		pattern_history_table[i].weight=(int**)calloc(log2(past_value_number), sizeof(int*));
		if (pattern_history_table[i].weight==0)
			fatal("Not enough memory for value predictor");

		for (j=0; j<past_value_number; j++)
		{
			pattern_history_table[i].weight[j]=(int*)calloc(value_context_history_size+1, sizeof(int));
			if (pattern_history_table[i].weight[j]==0)
				fatal("Not enough memory for value predictor");			
		}
	}

	confidence_threshold=1.93*value_context_history_size+14;
}

int get_perceptron_context3(unsigned int PC)
{
	int entry;
	int pht_entry;
	int i,j,k;
	int sum=0,index=0;

	entry=(PC>>3)%value_hash_table_size;

	//get pattern history entry by concatenating the history
	pht_entry=0;
	for (i=0; i<past_value_number; i++)
	{
		pht_entry=pht_entry*past_value_number;
		pht_entry+=value_history_table[entry].value_history_pattern[i];
	}

	//feed history into perceptrons
	for (k=0; k<log2(past_value_number); k++)
	{
		sum=0;
		for (i=0; i<value_context_history_size; i++)
		{
			if ((value_history_table[entry].value_history_pattern[i]>>k)%2==1)
				sum+=pattern_history_table[pht_entry].weight[k][i+1] * 1;
			else
				sum+=pattern_history_table[pht_entry].weight[k][i+1] * -1;
		}
		sum+=pattern_history_table[pht_entry].weight[k][0];

		if (sum>0)
			index+=(int)pow(2,k);
		else
			index+=0;
	}
	return value_history_table[entry].past_value[index];
}

int get_perceptron_context_confidence3(unsigned int PC)
{
	int entry,i,j,pht_entry,sum,k;

	entry=(PC>>3)%value_hash_table_size;

	return 1;

	if (value_history_table[entry].tag!=(PC>>3)/value_hash_table_size)
		return 0;

	//get pattern history entry by concatenating the history
	pht_entry=0;
	for (i=0; i<past_value_number; i++)
	{
		pht_entry=pht_entry*past_value_number;
		pht_entry+=value_history_table[entry].value_history_pattern[i];
	}

	//feed history into perceptrons
	for (k=0; k<log2(past_value_number); k++)
	{
		sum=0;
		for (i=0; i<value_context_history_size; i++)
		{
			if ((value_history_table[entry].value_history_pattern[i]>>k)%2==1)
				sum+=pattern_history_table[pht_entry].weight[k][i+1] * 1;
			else
				sum+=pattern_history_table[pht_entry].weight[k][i+1] * -1;
		}
		sum+=pattern_history_table[pht_entry].weight[k][0];

		if (sum>=0 && sum<confidence_threshold)
			return 0;
		if (sum<0 && sum>-confidence_threshold)
			return 0;
	}


	return 1;
}

void train_perceptron_context3(unsigned int PC, int actual, int predicted)
{
	int entry,i,j,pht_entry;
	int sum,k,e;

	entry=(PC>>3)%value_hash_table_size;

	//set tag to current
	if (value_history_table[entry].tag!=(PC>>3)/value_hash_table_size)
	{
		value_history_table[entry].tag=(PC>>3)/value_hash_table_size;
	}

	//get the appropriate pattern history table entry
	pht_entry=0;
	for (i=0; i<past_value_number; i++)
	{
		pht_entry=pht_entry*past_value_number;
		pht_entry+=value_history_table[entry].value_history_pattern[i];
	}

	//have we seen this value before?  if not, swap it in
	for (i=0; i<past_value_number; i++)
	{
		if (value_history_table[entry].past_value[i]==actual)
			break;
	}
	if (i==past_value_number)
	{
		i=0;
		for (j=1; j<past_value_number; j++)
		{
			if (value_history_table[entry].LRU[j]>value_history_table[entry].LRU[i])
				i=j;
		} 
		value_history_table[entry].past_value[i]=actual;

	}
	//update LRU info
	for (j=0; j<past_value_number; j++)
	{
		if (i==j)
			continue;
		if (value_history_table[entry].LRU[j]<=value_history_table[entry].LRU[i])
			value_history_table[entry].LRU[j]++;
	}
	value_history_table[entry].LRU[i]=0;

	for (k=0; k<log2(past_value_number); k++)
	{
		sum=0;
		//first get predicted value for this bit
		for (j=0; j<value_context_history_size; j++)
		{
			if ((value_history_table[entry].value_history_pattern[j]>>k)%2==1)
				sum+=pattern_history_table[pht_entry].weight[k][j+1] * 1;
			else
				sum+=pattern_history_table[pht_entry].weight[k][j+1] * -1;
		}
		sum+=pattern_history_table[pht_entry].weight[k][0];

		//compute error
		e=0;
		//actual==1, predicted==0
		if (sum<=0 && (i>>k)%2==1)
			e=1;
		//actual==0, predicted==1
		else if (sum>0 && (i>>k)%2==0)
			e=-1;

		//train bit
		for (j=0; j<value_context_history_size; j++)
		{
			train_perceptron_weight(((value_history_table[entry].value_history_pattern[j]>>k)%2==1)? 1:-1,&pattern_history_table[pht_entry].weight[k][j+1], (sum>0)? 1:0, (i>>k)%2);
		}

		//train bias
		train_perceptron_weight(1,&pattern_history_table[pht_entry].weight[k][0], (sum>0)? 1:0, (i>>k)%2);
	}

	//update pattern history - shift everyone over
	for (j=value_context_history_size-1; j>0; j--)
		value_history_table[entry].value_history_pattern[j]=value_history_table[entry].value_history_pattern[j-1];
	value_history_table[entry].value_history_pattern[0]=i;

	//if hybrid, update the stride and its confidence
	if (context_stride_hybrid==0)
		return;

	//would the stride have been right (regardless of whether it was used)?
	if  (actual==value_history_table[entry].stride_last-value_history_table[entry].stride_last2+value_history_table[entry].stride_last)
	{
		value_history_table[entry].stride_confidence++;
		if (value_history_table[entry].stride_confidence>value_threshold)
			value_history_table[entry].stride_confidence=value_threshold;
	}
	else
	{
		value_history_table[entry].stride_confidence--;
		if (value_history_table[entry].stride_confidence<0)
			value_history_table[entry].stride_confidence=0;
	}
	//update the stride
	value_history_table[entry].stride_last2=value_history_table[entry].stride_last;
	value_history_table[entry].stride_last=actual;
}

//PERCEPTRON GLOBALLY-GUIDED LOCAL CONTEXT

int* global_value_index_history;
int* global_value_pc_index_history;

extern int aliasing_reduction;
void update_global_value_history_table(unsigned int PC, int entry)
{
	int j,location;

	if (aliasing_reduction==0)
	{
		//update pattern history - shift everyone over
		for (j=value_context_history_size-1; j>0; j--)
			global_value_index_history[j]=global_value_index_history[j-1];
		global_value_index_history[0]=entry;
	}
	else
	{
		location=(PC>>3)%value_context_history_size;
		global_value_index_history[location]=entry;
	}
}

void initialize_perceptron_context4()
{
	int i,j,k,l;

	value_history_table=(vht_entry*)calloc(value_hash_table_size, sizeof(vht_entry));
	global_value_index_history=(int*)calloc(value_context_history_size, sizeof(int));

	if (value_history_table==0 || global_value_index_history==0)
		fatal("Not enough memory for value predictor");

	for (i=0; i<value_hash_table_size; i++)
	{
		value_history_table[i].tag=-1;
		value_history_table[i].past_value=(int*)calloc(past_value_number, sizeof(int));
		if (value_history_table[i].past_value==0)
			fatal("Not enough memory for value predictor");
		value_history_table[i].LRU=(int*)calloc(past_value_number, sizeof(int));
		if (value_history_table[i].LRU==0)
			fatal("Not enough memory for value predictor");
		value_history_table[i].weight=(int**)calloc(log2(past_value_number), sizeof(int*));
		if (value_history_table[i].weight==0)
			fatal("Not enough memory for value predictor");
		value_history_table[i].correct=(int**)calloc(log2(past_value_number), sizeof(int*));
		if (value_history_table[i].correct==0)
			fatal("Not enough memory for value predictor");

		value_history_table[i].input_value_history=(int*)calloc(value_context_history_size, sizeof(int));
		if (value_history_table[i].input_value_history==0)
			fatal("Not enough memory for value predictor");

		for (j=0; j<value_context_history_size; j++)
			value_history_table[i].input_value_history[j]=-1;

		for (j=0; j<log2(past_value_number); j++)
		{
			value_history_table[i].weight[j]=(int*)calloc(value_context_history_size+1, sizeof(int));
			if (value_history_table[i].weight[j]==0)
				fatal("Not enough memory for value predictor");

			value_history_table[i].correct[j]=(int*)calloc(value_context_history_size+1, sizeof(int));
			if (value_history_table[i].correct[j]==0)
				fatal("Not enough memory for value predictor");
		}
		for (j=0; j<past_value_number; j++)
		{
			value_history_table[i].past_value[j]=0;
			value_history_table[i].LRU[j]=0;
		}
		for (j=0; j<log2(past_value_number); j++)
			for (k=0; k<value_context_history_size+1; k++)
				value_history_table[i].weight[j][k]=0;
	}
	for (i=0; i<value_context_history_size; i++)
	{
		global_value_index_history[i]=-1;
	}

	initialize_weight_tally();
}

int get_perceptron_context4(unsigned int PC)
{
	int entry,i,j,k,index,sum;

	entry=(PC>>3)%value_hash_table_size;

	//save the history
	for (i=0; i<value_context_history_size; i++)
	{
		value_history_table[entry].input_value_history[i]=global_value_index_history[i];
	}

	if (value_history_table[entry].tag!=(PC>>3)/value_hash_table_size)
		return 0;

	index=0;
	for (i=0; i<log2(past_value_number); i++)
	{
		sum=0;
		for (j=0; j<value_context_history_size; j++)
		{
			if (global_value_index_history[j]==-1)
				continue;

			if ((global_value_index_history[j]>>i)%2==1)
				sum+=value_history_table[entry].weight[i][j+1] * 1;
			else
				sum+=value_history_table[entry].weight[i][j+1] * -1;

		}
		sum+=value_history_table[entry].weight[i][0];
		if (sum>0)
			index+=(int)pow(2,i);
	}
	return value_history_table[entry].past_value[index];
}

int get_perceptron_context_confidence4(unsigned int PC)
{
	int entry;

	return 1;

	entry=(PC>>3)%value_hash_table_size;
	if (value_history_table[entry].confidence>=value_threshold)
		return 1;	

	return 0;
}

void train_perceptron_context4(unsigned int PC, int actual, int predicted)
{
	int entry,i,j,k,l,sum,e;

	entry=(PC>>3)%value_hash_table_size;

	//set tag to current
	if (value_history_table[entry].tag!=(PC>>3)/value_hash_table_size)
	{
		value_history_table[entry].tag=(PC>>3)/value_hash_table_size;
		tally_weights(entry,log2(past_value_number),value_context_history_size+1);
	}

	//have we seen this value before?  if not, swap it in
	for (i=0; i<past_value_number; i++)
	{
		if (value_history_table[entry].past_value[i]==actual)
			break;
	}
	if (i==past_value_number)
	{
		i=0;
		for (j=1; j<past_value_number; j++)
		{
			if (value_history_table[entry].LRU[j]>value_history_table[entry].LRU[i])
				i=j;
		} 
		value_history_table[entry].past_value[i]=actual;
	}
	//update LRU info
	for (j=0; j<past_value_number; j++)
	{
		if (i==j)
			continue;
		if (value_history_table[entry].LRU[j]<=value_history_table[entry].LRU[i])
			value_history_table[entry].LRU[j]++;
	}
	value_history_table[entry].LRU[i]=0;
	for (k=0; k<log2(past_value_number); k++)
	{
		sum=0;
		//first get predicted value for this bit
		for (j=0; j<value_context_history_size; j++)
		{
			if (value_history_table[entry].input_value_history[j]==-1)
				continue;

			if ((value_history_table[entry].input_value_history[j]>>k)%2==1)
				sum+=value_history_table[entry].weight[k][j+1] * 1;
			else
				sum+=value_history_table[entry].weight[k][j+1] * -1;
		}
		sum+=value_history_table[entry].weight[k][0];

		//train bit
		for (j=0; j<value_context_history_size; j++)
		{
			if (value_history_table[entry].input_value_history[j]==-1)
				continue;


			train_perceptron_weight(((value_history_table[entry].input_value_history[j]>>k)%2==1)? 1:-1,&value_history_table[entry].weight[k][j+1], (sum>0)? 1:0, (i>>k)%2);
			set_weight_correctness(((value_history_table[entry].input_value_history[j]>>k)%2==1)? 1:-1,&value_history_table[entry].weight[k][j+1], &value_history_table[entry].correct[k][j+1], (sum>0)? 1:0, (i>>k)%2);

		}

		//train bias
		train_perceptron_weight(1,&value_history_table[entry].weight[k][0],(sum>0)? 1:0, (i>>k)%2);
		set_weight_correctness(1,&value_history_table[entry].weight[k][0],&value_history_table[entry].correct[k][0],(sum>0)? 1:0, (i>>k)%2);
	}

	//update pattern history
	update_global_value_history_table(PC,i);

	if (predicted==actual)
		value_history_table[entry].confidence++;
	else
		value_history_table[entry].confidence--;
	if (value_history_table[entry].confidence>value_threshold)
		value_history_table[entry].confidence=value_threshold;
	if (value_history_table[entry].confidence<0)
		value_history_table[entry].confidence=0;

	value_history_table[entry].total_correct++;
}

//PERCEPTRON GLOBALLY-GUIDED LOCAL CONTEXT - HISTORY IS UPDATED WITH SPECULATIVE VALUES

void initialize_perceptron_context_spechist()
{
	int i,j,k,l;

	value_history_table=(vht_entry*)calloc(value_hash_table_size, sizeof(vht_entry));
	global_value_index_history=(int*)calloc(value_context_history_size, sizeof(int));

	if (value_history_table==0 || global_value_index_history==0)
		fatal("Not enough memory for value predictor");

	for (i=0; i<value_hash_table_size; i++)
	{
		value_history_table[i].tag=-1;
		value_history_table[i].past_value=(int*)calloc(past_value_number, sizeof(int));
		if (value_history_table[i].past_value==0)
			fatal("Not enough memory for value predictor");
		value_history_table[i].LRU=(int*)calloc(past_value_number, sizeof(int));
		if (value_history_table[i].LRU==0)
			fatal("Not enough memory for value predictor");
		value_history_table[i].weight=(int**)calloc(log2(past_value_number), sizeof(int*));
		if (value_history_table[i].weight==0)
			fatal("Not enough memory for value predictor");

		value_history_table[i].input_value_history=(int*)calloc(value_context_history_size, sizeof(int));
		if (value_history_table[i].input_value_history==0)
			fatal("Not enough memory for value predictor");

		for (j=0; j<value_context_history_size; j++)
			value_history_table[i].input_value_history[j]=-1;

		for (j=0; j<log2(past_value_number); j++)
		{
			value_history_table[i].weight[j]=(int*)calloc(value_context_history_size+1, sizeof(int));
			if (value_history_table[i].weight[j]==0)
				fatal("Not enough memory for value predictor");
		}
		for (j=0; j<past_value_number; j++)
		{
			value_history_table[i].past_value[j]=0;
			value_history_table[i].LRU[j]=0;
		}
		for (j=0; j<log2(past_value_number); j++)
			for (k=0; k<value_context_history_size+1; k++)
				value_history_table[i].weight[j][k]=0;
	}
	for (i=0; i<value_context_history_size; i++)
	{
		global_value_index_history[i]=-1;
	}
}

int get_perceptron_context_spechist(unsigned int PC)
{
	int entry,i,j,k,index,sum;

	entry=(PC>>3)%value_hash_table_size;

	//save the history
	for (i=0; i<value_context_history_size; i++)
	{
		value_history_table[entry].input_value_history[i]=global_value_index_history[i];
	}

	if (value_history_table[entry].tag!=(PC>>3)/value_hash_table_size)
		return 0;

	index=0;
	for (i=0; i<log2(past_value_number); i++)
	{
		sum=0;
		for (j=0; j<value_context_history_size; j++)
		{
			if (global_value_index_history[j]==-1)
				continue;

			if ((global_value_index_history[j]>>i)%2==1)
				sum+=value_history_table[entry].weight[i][j+1] * 1;
			else
				sum+=value_history_table[entry].weight[i][j+1] * -1;

		}
		sum+=value_history_table[entry].weight[i][0];
		if (sum>0)
			index+=(int)pow(2,i);
	}
	update_global_value_history_table(PC,index);
	return value_history_table[entry].past_value[index];
}

int get_perceptron_context_confidence_spechist(unsigned int PC)
{
	int entry;

	return 1;

	entry=(PC>>3)%value_hash_table_size;
	if (value_history_table[entry].confidence>=value_threshold)
		return 1;	

	return 0;
}

void train_perceptron_context_spechist(unsigned int PC, int actual, int predicted)
{
	int entry,i,j,k,l,sum,e;

	entry=(PC>>3)%value_hash_table_size;

	//set tag to current
	if (value_history_table[entry].tag!=(PC>>3)/value_hash_table_size)
		value_history_table[entry].tag=(PC>>3)/value_hash_table_size;

	//have we seen this value before?  if not, swap it in
	for (i=0; i<past_value_number; i++)
	{
		if (value_history_table[entry].past_value[i]==actual)
			break;
	}
	if (i==past_value_number)
	{
		i=0;
		for (j=1; j<past_value_number; j++)
		{
			if (value_history_table[entry].LRU[j]>value_history_table[entry].LRU[i])
				i=j;
		} 
		value_history_table[entry].past_value[i]=actual;
	}
	//update LRU info
	for (j=0; j<past_value_number; j++)
	{
		if (i==j)
			continue;
		if (value_history_table[entry].LRU[j]<=value_history_table[entry].LRU[i])
			value_history_table[entry].LRU[j]++;
	}
	value_history_table[entry].LRU[i]=0;
	for (k=0; k<log2(past_value_number); k++)
	{
		sum=0;
		//first get predicted value for this bit
		for (j=0; j<value_context_history_size; j++)
		{
			if (value_history_table[entry].input_value_history[j]==-1)
				continue;

			if ((value_history_table[entry].input_value_history[j]>>k)%2==1)
				sum+=value_history_table[entry].weight[k][j+1] * 1;
			else
				sum+=value_history_table[entry].weight[k][j+1] * -1;
		}
		sum+=value_history_table[entry].weight[k][0];

		//train bit
		for (j=0; j<value_context_history_size; j++)
		{
			if (value_history_table[entry].input_value_history[j]==-1)
				continue;


			train_perceptron_weight(((value_history_table[entry].input_value_history[j]>>k)%2==1)? 1:-1,&value_history_table[entry].weight[k][j+1], (sum>0)? 1:0, (i>>k)%2);

		}

		//train bias
		train_perceptron_weight(1,&value_history_table[entry].weight[k][0],(sum>0)? 1:0, (i>>k)%2);
	}


	if (predicted==actual)
		value_history_table[entry].confidence++;
	else
		value_history_table[entry].confidence--;
	if (value_history_table[entry].confidence>value_threshold)
		value_history_table[entry].confidence=value_threshold;
	if (value_history_table[entry].confidence<0)
		value_history_table[entry].confidence=0;
}

//PERCEPTRON GLOBALLY-GUIDED LOCAL CONTEXT - PIECEWISE LINEAR APPROACH

void initialize_perceptron_context_piecewise()
{
	int i,j,k,l;

	value_history_table=(vht_entry*)calloc(value_hash_table_size, sizeof(vht_entry));
	global_value_index_history=(int*)calloc(value_context_history_size, sizeof(int));
	global_value_pc_index_history=(int*)calloc(value_context_history_size, sizeof(int));

	if (value_history_table==0 || global_value_index_history==0)
		fatal("Not enough memory for value predictor");

	for (i=0; i<value_hash_table_size; i++)
	{
		value_history_table[i].tag=-1;
		value_history_table[i].past_value=(int*)calloc(past_value_number, sizeof(int));
		if (value_history_table[i].past_value==0)
			fatal("Not enough memory for value predictor");
		value_history_table[i].LRU=(int*)calloc(past_value_number, sizeof(int));
		if (value_history_table[i].LRU==0)
			fatal("Not enough memory for value predictor");
		value_history_table[i].weight=(int**)calloc(log2(past_value_number), sizeof(int*));
		if (value_history_table[i].weight==0)
			fatal("Not enough memory for value predictor");

		value_history_table[i].input_value_history=(int*)calloc(value_context_history_size, sizeof(int));
		if (value_history_table[i].input_value_history==0)
			fatal("Not enough memory for value predictor");
		value_history_table[i].input_pc_history=(int*)calloc(value_context_history_size, sizeof(int));
		if (value_history_table[i].input_pc_history==0)
			fatal("Not enough memory for value predictor");

		for (j=0; j<value_context_history_size; j++)
			value_history_table[i].input_value_history[j]=-1;

		for (j=0; j<log2(past_value_number); j++)
		{
			value_history_table[i].weight[j]=(int*)calloc(value_context_history_size*value_context_history_size+1, sizeof(int));
			if (value_history_table[i].weight[j]==0)
				fatal("Not enough memory for value predictor");
		}
		for (j=0; j<past_value_number; j++)
		{
			value_history_table[i].past_value[j]=0;
			value_history_table[i].LRU[j]=0;
		}
		for (j=0; j<log2(past_value_number); j++)
			for (k=0; k<value_context_history_size*value_context_history_size+1; k++)
				value_history_table[i].weight[j][k]=0;
	}
	for (i=0; i<value_context_history_size; i++)
	{
		global_value_index_history[i]=-1;
	}
}

int get_perceptron_context_piecewise(unsigned int PC)
{
	int entry,i,j,k,index,sum;

	entry=(PC>>3)%value_hash_table_size;

	//save the history
	for (i=0; i<value_context_history_size; i++)
	{
		value_history_table[entry].input_value_history[i]=global_value_index_history[i];
		value_history_table[entry].input_pc_history[i]=global_value_pc_index_history[i];
	}

	if (value_history_table[entry].tag!=(PC>>3)/value_hash_table_size)
		return 0;

	index=0;
	for (i=0; i<log2(past_value_number); i++)
	{
		sum=0;
		for (j=0; j<value_context_history_size; j++)
		{
			if (global_value_index_history[j]==-1)
				continue;

			k=global_value_pc_index_history[j];

			if ((global_value_index_history[j]>>i)%2==1)
				sum+=value_history_table[entry].weight[i][j*value_context_history_size+k+1] * 1;
			else
				sum+=value_history_table[entry].weight[i][j*value_context_history_size+k+1] * -1;

		}
		sum+=value_history_table[entry].weight[i][0];
		if (sum>0)
			index+=(int)pow(2,i);
	}
	return value_history_table[entry].past_value[index];
}

int get_perceptron_context_confidence_piecewise(unsigned int PC)
{
	return 1;
}

void train_perceptron_context_piecewise(unsigned int PC, int actual, int predicted)
{
	int entry,i,j,k,l,sum,e;

	entry=(PC>>3)%value_hash_table_size;

	//set tag to current
	if (value_history_table[entry].tag!=(PC>>3)/value_hash_table_size)
		value_history_table[entry].tag=(PC>>3)/value_hash_table_size;

	//have we seen this value before?  if not, swap it in
	for (i=0; i<past_value_number; i++)
	{
		if (value_history_table[entry].past_value[i]==actual)
			break;
	}
	if (i==past_value_number)
	{
		i=0;
		for (j=1; j<past_value_number; j++)
		{
			if (value_history_table[entry].LRU[j]>value_history_table[entry].LRU[i])
				i=j;
		} 
		value_history_table[entry].past_value[i]=actual;
	}
	//update LRU info
	for (j=0; j<past_value_number; j++)
	{
		if (i==j)
			continue;
		if (value_history_table[entry].LRU[j]<=value_history_table[entry].LRU[i])
			value_history_table[entry].LRU[j]++;
	}
	value_history_table[entry].LRU[i]=0;
	for (k=0; k<log2(past_value_number); k++)
	{
		sum=0;
		//first get predicted value for this bit
		for (j=0; j<value_context_history_size; j++)
		{
			if (value_history_table[entry].input_value_history[j]==-1)
				continue;

			l=value_history_table[entry].input_pc_history[j];

			if ((value_history_table[entry].input_value_history[j]>>k)%2==1)
				sum+=value_history_table[entry].weight[k][j*value_context_history_size+l+1] * 1;
			else
				sum+=value_history_table[entry].weight[k][j*value_context_history_size+l+1] * -1;
		}
		sum+=value_history_table[entry].weight[k][0];

		//train bit
		for (j=0; j<value_context_history_size; j++)
		{
			if (value_history_table[entry].input_value_history[j]==-1)
				continue;

			l=value_history_table[entry].input_pc_history[j];

			train_perceptron_weight(((value_history_table[entry].input_value_history[j]>>k)%2==1)? 1:-1,&value_history_table[entry].weight[k][j*value_context_history_size+l+1], (sum>0)? 1:0, (i>>k)%2);
		}

		//train bias
		train_perceptron_weight(1,&value_history_table[entry].weight[k][0], (sum>0)? 1:0, (i>>k)%2);
	}

	//update pattern history - shift everyone over
	for (j=value_context_history_size-1; j>0; j--)
		global_value_index_history[j]=global_value_index_history[j-1];
	global_value_index_history[0]=i;

	for (j=value_context_history_size-1; j>0; j--)
		global_value_pc_index_history[j]=global_value_pc_index_history[j-1];
	global_value_pc_index_history[0]=PC%(value_context_history_size-1);
}

//PERCEPTRON GLOBALLY-GUIDED LOCAL CONTEXT - INTERWEIGHT CORRELATION

void initialize_perceptron_context5()
{
	int i,j,k,l,numd;

	value_history_table=(vht_entry*)calloc(value_hash_table_size, sizeof(vht_entry));
	global_value_index_history=(int*)calloc(value_context_history_size, sizeof(int));

	if (value_history_table==0 || global_value_index_history==0)
		fatal("Not enough memory for value predictor");

	numd=(int)log2(past_value_number);

	for (i=0; i<value_hash_table_size; i++)
	{
		value_history_table[i].tag=-1;
		value_history_table[i].past_value=(int*)calloc(past_value_number, sizeof(int));
		if (value_history_table[i].past_value==0)
			fatal("Not enough memory for value predictor");
		value_history_table[i].LRU=(int*)calloc(past_value_number, sizeof(int));
		if (value_history_table[i].LRU==0)
			fatal("Not enough memory for value predictor");
		value_history_table[i].weight=(int**)calloc(log2(past_value_number), sizeof(int*));
		if (value_history_table[i].weight==0)
			fatal("Not enough memory for value predictor");
		value_history_table[i].correct=(int**)calloc(log2(past_value_number), sizeof(int*));
		if (value_history_table[i].correct==0)
			fatal("Not enough memory for value predictor");

		value_history_table[i].input_value_history=(int*)calloc(value_context_history_size, sizeof(int));
		if (value_history_table[i].input_value_history==0)
			fatal("Not enough memory for value predictor");

		for (j=0; j<value_context_history_size; j++)
			value_history_table[i].input_value_history[j]=-1;

		for (j=0; j<numd; j++)
		{
			value_history_table[i].weight[j]=(int*)calloc(value_context_history_size*numd+1, sizeof(int));
			if (value_history_table[i].weight[j]==0)
				fatal("Not enough memory for value predictor");
			value_history_table[i].correct[j]=(int*)calloc(value_context_history_size*numd+1, sizeof(int));
			if (value_history_table[i].correct[j]==0)
				fatal("Not enough memory for value predictor");
		}
		for (j=0; j<past_value_number; j++)
		{
			value_history_table[i].past_value[j]=0;
			value_history_table[i].LRU[j]=0;
		}
		for (j=0; j<numd; j++)
			for (k=0; k<value_context_history_size*numd+1; k++)
				value_history_table[i].weight[j][k]=0;
	}
	for (i=0; i<value_context_history_size; i++)
	{
		global_value_index_history[i]=-1;
	}

	initialize_weight_tally();
}

int get_perceptron_context5(unsigned int PC)
{
	int entry,i,j,k,l,index,sum,numd;

	entry=(PC>>3)%value_hash_table_size;
	numd=(int)log2(past_value_number);

	//save the history
	for (i=0; i<value_context_history_size; i++)
	{
		value_history_table[entry].input_value_history[i]=global_value_index_history[i];
	}

	if (value_history_table[entry].tag!=(PC>>3)/value_hash_table_size)
		return 0;

	index=0;
	for (i=0; i<numd; i++)
	{
		sum=0;
		for (j=0; j<value_context_history_size; j++)
		{
			if (global_value_index_history[j]==-1)
				continue;

			for (l=0; l<numd; l++)
			{
				if ((global_value_index_history[j]>>i)%2==1)
					sum+=value_history_table[entry].weight[i][(j)*numd+l+1] * 1;
				else
					sum+=value_history_table[entry].weight[i][(j)*numd+l+1] * -1;
			}
		}
		sum+=value_history_table[entry].weight[i][0];
		if (sum>0)
			index+=(int)pow(2,i);
	}
	return value_history_table[entry].past_value[index];
}

int get_perceptron_context_confidence5(unsigned int PC)
{
	return 1;
}

void train_perceptron_context5(unsigned int PC, int actual, int predicted)
{
	int entry,i,j,k,l,m,sum,e,numd;

	entry=(PC>>3)%value_hash_table_size;
	numd=(int)log2(past_value_number);

	//set tag to current
	if (value_history_table[entry].tag!=(PC>>3)/value_hash_table_size)
	{
		value_history_table[entry].tag=(PC>>3)/value_hash_table_size;
		tally_weights(entry,log2(past_value_number),value_context_history_size+1);
	}

	//have we seen this value before?  if not, swap it in
	for (i=0; i<past_value_number; i++)
	{
		if (value_history_table[entry].past_value[i]==actual)
			break;
	}
	if (i==past_value_number)
	{
		i=0;
		for (j=1; j<past_value_number; j++)
		{
			if (value_history_table[entry].LRU[j]>value_history_table[entry].LRU[i])
				i=j;
		} 
		value_history_table[entry].past_value[i]=actual;
	}
	//update LRU info
	for (j=0; j<past_value_number; j++)
	{
		if (i==j)
			continue;
		if (value_history_table[entry].LRU[j]<=value_history_table[entry].LRU[i])
			value_history_table[entry].LRU[j]++;
	}
	value_history_table[entry].LRU[i]=0;
	for (k=0; k<numd; k++)
	{
		sum=0;
		//first get predicted value for this bit
		for (j=0; j<value_context_history_size; j++)
		{
			if (value_history_table[entry].input_value_history[j]==-1)
				continue;

			for (m=0; m<numd; m++)
			{
				if ((value_history_table[entry].input_value_history[j]>>k)%2==1)
					sum+=value_history_table[entry].weight[k][(j)*numd+m+1] * 1;
				else
					sum+=value_history_table[entry].weight[k][(j)*numd+m+1] * -1;
			}
		}
		sum+=value_history_table[entry].weight[k][0];

		//compute error
		e=0;
		//actual==1, predicted==0
		if (sum<=0 && (i>>k)%2==1)
			e=1;
		//actual==0, predicted==1
		else if (sum>0 && (i>>k)%2==0)
			e=-1;


		//train bit
		for (j=0; j<value_context_history_size; j++)
		{
			if (value_history_table[entry].input_value_history[j]==-1)
				continue;

			for (m=0; m<numd; m++)
			{
				train_perceptron_weight(((value_history_table[entry].input_value_history[j]>>m)%2==1)? 1:-1,&value_history_table[entry].weight[k][j*numd+m+1], (sum>0)? 1:0, (i>>k)%2);
				set_weight_correctness(((value_history_table[entry].input_value_history[j]>>m)%2==1)? 1:-1,&value_history_table[entry].weight[k][j*numd+m+1], &value_history_table[entry].correct[k][j*numd+m+1], (sum>0)? 1:0, (i>>k)%2);

			}
		}

		//train bias
		train_perceptron_weight(1,&value_history_table[entry].weight[k][0], (sum>0)? 1:0, (i>>k)%2);
		set_weight_correctness(1,&value_history_table[entry].weight[k][0],&value_history_table[entry].correct[k][0], (sum>0)? 1:0, (i>>k)%2);
	}

	//update pattern history - shift everyone over
	update_global_value_history_table(PC,i);

	value_history_table[entry].total_correct++;
}

//PERCEPTRON GLOBALLY-GUIDED LOCAL CONTEXT WITH WEIGHT FOR EACH VALUE, ONE OUTPUT PER DIGIT


void initialize_perceptron_context6()
{
	int i,j,k,l,numd;

	value_history_table=(vht_entry*)calloc(value_hash_table_size, sizeof(vht_entry));
	global_value_index_history=(int*)calloc(value_context_history_size, sizeof(int));

	if (value_history_table==0 || global_value_index_history==0)
		fatal("Not enough memory for value predictor");

	numd=(int)log2(past_value_number);

	for (i=0; i<value_hash_table_size; i++)
	{
		value_history_table[i].tag=-1;
		value_history_table[i].past_value=(int*)calloc(past_value_number, sizeof(int));
		if (value_history_table[i].past_value==0)
			fatal("Not enough memory for value predictor");
		value_history_table[i].LRU=(int*)calloc(past_value_number, sizeof(int));
		if (value_history_table[i].LRU==0)
			fatal("Not enough memory for value predictor");
		value_history_table[i].weight=(int**)calloc(log2(past_value_number), sizeof(int*));
		if (value_history_table[i].weight==0)
			fatal("Not enough memory for value predictor");

		value_history_table[i].input_value_history=(int*)calloc(value_context_history_size, sizeof(int));
		if (value_history_table[i].input_value_history==0)
			fatal("Not enough memory for value predictor");

		for (j=0; j<value_context_history_size; j++)
			value_history_table[i].input_value_history[j]=-1;

		for (j=0; j<numd; j++)
		{
			value_history_table[i].weight[j]=(int*)calloc(value_context_history_size*past_value_number+1, sizeof(int));
			if (value_history_table[i].weight[j]==0)
				fatal("Not enough memory for value predictor");
		}
		for (j=0; j<past_value_number; j++)
		{
			value_history_table[i].past_value[j]=0;
			value_history_table[i].LRU[j]=0;
		}
		for (j=0; j<numd; j++)
			for (k=0; k<value_context_history_size*past_value_number+1; k++)
				value_history_table[i].weight[j][k]=0;
	}
	for (i=0; i<value_context_history_size; i++)
	{
		global_value_index_history[i]=-1;
	}
}

int get_perceptron_context6(unsigned int PC)
{
	int entry,i,j,k,l,index,sum,numd;

	entry=(PC>>3)%value_hash_table_size;
	numd=(int)log2(past_value_number);

	//save the history
	for (i=0; i<value_context_history_size; i++)
	{
		value_history_table[entry].input_value_history[i]=global_value_index_history[i];
	}

	if (value_history_table[entry].tag!=(PC>>3)/value_hash_table_size)
		return 0;

	index=0;
	for (i=0; i<numd; i++)
	{
		sum=0;
		for (j=0; j<value_context_history_size; j++)
		{
			if (global_value_index_history[j]==-1)
				continue;

			l=global_value_index_history[j];

			sum+=value_history_table[entry].weight[i][(j)*past_value_number+l+1] * 1;
		}
		sum+=value_history_table[entry].weight[i][0];
		if (sum>0)
			index+=(int)pow(2,i);
	}
	return value_history_table[entry].past_value[index];
}

int get_perceptron_context_confidence6(unsigned int PC)
{
	return 1;
}

void train_perceptron_context6(unsigned int PC, int actual, int predicted)
{
	int entry,i,j,k,l,m,sum,e,numd;

	entry=(PC>>3)%value_hash_table_size;
	numd=(int)log2(past_value_number);

	//set tag to current
	if (value_history_table[entry].tag!=(PC>>3)/value_hash_table_size)
		value_history_table[entry].tag=(PC>>3)/value_hash_table_size;

	//have we seen this value before?  if not, swap it in
	for (i=0; i<past_value_number; i++)
	{
		if (value_history_table[entry].past_value[i]==actual)
			break;
	}
	if (i==past_value_number)
	{
		i=0;
		for (j=1; j<past_value_number; j++)
		{
			if (value_history_table[entry].LRU[j]>value_history_table[entry].LRU[i])
				i=j;
		} 
		value_history_table[entry].past_value[i]=actual;
	}
	//update LRU info
	for (j=0; j<past_value_number; j++)
	{
		if (i==j)
			continue;
		if (value_history_table[entry].LRU[j]<=value_history_table[entry].LRU[i])
			value_history_table[entry].LRU[j]++;
	}
	value_history_table[entry].LRU[i]=0;
	for (k=0; k<numd; k++)
	{
		sum=0;
		//first get predicted value for this bit
		for (j=0; j<value_context_history_size; j++)
		{
			if (value_history_table[entry].input_value_history[j]==-1)
				continue;

			m=value_history_table[entry].input_value_history[j];

			sum+=value_history_table[entry].weight[k][(j)*past_value_number+m+1] * 1;
		}
		sum+=value_history_table[entry].weight[k][0];

		//train bit
		for (j=0; j<value_context_history_size; j++)
		{
			if (value_history_table[entry].input_value_history[j]==-1)
				continue;

			m=value_history_table[entry].input_value_history[j];

			train_perceptron_weight(1,&value_history_table[entry].weight[k][j*past_value_number+m+1], (sum>0)? 1:0, (i>>k)%2);
			
		}

		//train bias
		train_perceptron_weight(1,&value_history_table[entry].weight[k][0], (sum>0)? 1:0, (i>>k)%2);
	}

	//update pattern history - shift everyone over
	update_global_value_history_table(PC,i);
}

//PERCEPTRON GLOBAL CONTEXT (USING METHOD 4 SETUP)

int* value_cache;
int* value_cache_LRU;

void initialize_perceptron_context7()
{
	int i,j,k,l;

	value_history_table=(vht_entry*)calloc(value_hash_table_size, sizeof(vht_entry));
	global_value_index_history=(int*)calloc(value_context_history_size, sizeof(int));

	if (value_history_table==0 || global_value_index_history==0)
		fatal("Not enough memory for value predictor");

	for (i=0; i<value_hash_table_size; i++)
	{
		value_history_table[i].tag=-1;
		value_history_table[i].weight=(int**)calloc(log2(value_cache_size), sizeof(int*));
		if (value_history_table[i].weight==0)
			fatal("Not enough memory for value predictor");

		value_history_table[i].input_value_history=(int*)calloc(value_context_history_size, sizeof(int));
		if (value_history_table[i].input_value_history==0)
			fatal("Not enough memory for value predictor");

		for (j=0; j<value_context_history_size; j++)
			value_history_table[i].input_value_history[j]=-1;

		for (j=0; j<log2(value_cache_size); j++)
		{
			value_history_table[i].weight[j]=(int*)calloc(value_context_history_size+1, sizeof(int));
			if (value_history_table[i].weight[j]==0)
				fatal("Not enough memory for value predictor");
		}
		for (j=0; j<log2(value_cache_size); j++)
			for (k=0; k<value_context_history_size+1; k++)
				value_history_table[i].weight[j][k]=0;
	}
	for (i=0; i<value_context_history_size; i++)
	{
		global_value_index_history[i]=-1;
	}

	value_cache=(int*)calloc(value_cache_size, sizeof(int));
	if (value_cache==0)
		fatal("Not enough memory for value predictor");
	value_cache_LRU=(int*)calloc(value_cache_size, sizeof(int));
	if (value_cache_LRU==0)
		fatal("Not enough memory for value predictor");

	for (i=0; i<value_cache_size; i++)
	{
		value_cache[i]=0;
		value_cache_LRU[i]=0;
	}
}

int get_perceptron_context7(unsigned int PC)
{
	int entry,i,j,k,index,sum;

	entry=(PC>>3)%value_hash_table_size;

	//save the history
	for (i=0; i<value_context_history_size; i++)
	{
		value_history_table[entry].input_value_history[i]=global_value_index_history[i];
	}

	if (value_history_table[entry].tag!=(PC>>3)/value_hash_table_size)
		return 0;

	index=0;
	for (i=0; i<(int)log2(value_cache_size); i++)
	{
		sum=0;
		for (j=0; j<value_context_history_size; j++)
		{
			if (global_value_index_history[j]==-1)
				continue;

			if ((global_value_index_history[j]>>i)%2==1)
				sum+=value_history_table[entry].weight[i][j+1] * 1;
			else
				sum+=value_history_table[entry].weight[i][j+1] * -1;

		}
		sum+=value_history_table[entry].weight[i][0];
		if (sum>0)
			index+=(int)pow(2,i);
	}

	return value_cache[index];
}

int get_perceptron_context_confidence7(unsigned int PC)
{
	return 1;
}

void train_perceptron_context7(unsigned int PC, int actual, int predicted)
{
	int entry,i,j,k,l,sum,e;

	entry=(PC>>3)%value_hash_table_size;

	//set tag to current
	if (value_history_table[entry].tag!=(PC>>3)/value_hash_table_size)
		value_history_table[entry].tag=(PC>>3)/value_hash_table_size;

	//get value's index
	//have we seen this value before?  if not, swap it in
	for (i=0; i<value_cache_size; i++)
	{
		if (value_cache[i]==actual)
			break;
	}
	if (i==value_cache_size)
	{
		i=0;
		for (j=1; j<value_cache_size; j++)
		{
			if (value_cache_LRU[j]>value_cache_LRU[i])
				i=j;
		} 
		value_cache[i]=actual;
	}
	//update LRU info
	for (j=0; j<value_cache_size; j++)
	{
		if (i==j)
			continue;
		if (value_cache_LRU[j]<=value_cache_LRU[i])
			value_cache_LRU[j]++;
	}
	value_cache_LRU[i]=0;


	for (k=0; k<(int)log2(value_cache_size); k++)
	{
		sum=0;
		//first get predicted value for this bit
		for (j=0; j<value_context_history_size; j++)
		{
			if (value_history_table[entry].input_value_history[j]==-1)
				continue;

			if ((value_history_table[entry].input_value_history[j]>>k)%2==1)
				sum+=value_history_table[entry].weight[k][j+1] * 1;
			else
				sum+=value_history_table[entry].weight[k][j+1] * -1;
		}
		sum+=value_history_table[entry].weight[k][0];

		//train bit
		for (j=0; j<value_context_history_size; j++)
		{

			if (value_history_table[entry].input_value_history[j]==-1)
				continue;

			train_perceptron_weight(((value_history_table[entry].input_value_history[j]>>k)%2==1)? 1:-1,&value_history_table[entry].weight[k][j+1], (sum>0)? 1:0, (i>>k)%2);
		}


		//train bias
		train_perceptron_weight(1,&value_history_table[entry].weight[k][0], (sum>0)? 1:0, (i>>k)%2);
	}

	//update pattern history - shift everyone over
	update_global_value_history_table(PC,i);
}

//BITWISE PREDICTION - Based off of context7 above

unsigned int total_values_11=0;
unsigned int total_values_unseen_locally_11=0;
unsigned int total_values_unseen_locally_globally_11=0;

void initialize_perceptron_context8()
{
	int i,j,k,l;

	value_history_table=(vht_entry*)calloc(value_hash_table_size, sizeof(vht_entry));
	global_value_index_history=(int*)calloc(value_context_history_size, sizeof(int));

	if (value_history_table==0 || global_value_index_history==0)
		fatal("Not enough memory for value predictor");

	for (i=0; i<value_hash_table_size; i++)
	{
		value_history_table[i].tag=-1;
		value_history_table[i].weight=(int**)calloc(32, sizeof(int*));
		if (value_history_table[i].weight==0)
			fatal("Not enough memory for value predictor");

		for (j=0; j<32; j++)
		{
			value_history_table[i].weight[j]=(int*)calloc(value_context_history_size+1, sizeof(int));
			if (value_history_table[i].weight[j]==0)
				fatal("Not enough memory for value predictor");
		}
		for (j=0; j<32; j++)
			for (k=0; k<value_context_history_size+1; k++)
				value_history_table[i].weight[j][k]=0;

		value_history_table[i].input_value_history=(int*)calloc(value_context_history_size, sizeof(int));
		if (value_history_table[i].input_value_history==0)
			fatal("Not enough memory for value predictor");

		for (j=0; j<value_context_history_size; j++)
			value_history_table[i].input_value_history[j]=0;

		value_history_table[i].past_value=(int*)calloc(value_context_history_size, sizeof(int));
		if (value_history_table[i].past_value==0)
			fatal("Not enough memory for value predictor");

		for (j=0; j<value_context_history_size; j++)
			value_history_table[i].past_value[j]=0;
	}
	for (i=0; i<value_context_history_size; i++)
	{
		global_value_index_history[i]=0;
	}
}

int get_perceptron_context8(unsigned int PC)
{
	int entry,i,j,k,index,sum;

	entry=(PC>>3)%value_hash_table_size;

	//save the history
	for (i=0; i<value_context_history_size; i++)
	{
		value_history_table[entry].input_value_history[i]=global_value_index_history[i];
	}

	if (value_history_table[entry].tag!=(PC>>3)/value_hash_table_size)
		return 0;

	index=0;
	for (i=0; i<32; i++)
	{
		sum=0;
		for (j=0; j<value_context_history_size; j++)
		{
			if ((global_value_index_history[j]>>i)%2==1)
				sum+=value_history_table[entry].weight[i][j+1] * 1;
			else
				sum+=value_history_table[entry].weight[i][j+1] * -1;

		}
		sum+=value_history_table[entry].weight[i][0];
		if (sum>0)
			index+=(int)pow(2,i);
	}
	return index;
}

int get_perceptron_context_confidence8(unsigned int PC)
{
	return 1;
}

void train_perceptron_context8(unsigned int PC, int actual, int predicted)
{
	int entry,i,j,k,l,sum,e;

	entry=(PC>>3)%value_hash_table_size;

	//set tag to current
	if (value_history_table[entry].tag!=(PC>>3)/value_hash_table_size)
		value_history_table[entry].tag=(PC>>3)/value_hash_table_size;

	i=actual;

	for (k=0; k<32; k++)
	{
		sum=0;
		//first get predicted value for this bit
		for (j=0; j<value_context_history_size; j++)
		{
			if ((value_history_table[entry].input_value_history[j]>>k)%2==1)
				sum+=value_history_table[entry].weight[k][j+1] * 1;
			else
				sum+=value_history_table[entry].weight[k][j+1] * -1;
		}
		sum+=value_history_table[entry].weight[k][0];

		//train bit
		for (j=0; j<value_context_history_size; j++)
		{
			if (value_history_table[entry].input_value_history[j]==-1)
				continue;

			train_perceptron_weight(((value_history_table[entry].input_value_history[j]>>k)%2==1)? 1:-1,&value_history_table[entry].weight[k][j+1], (sum>0)? 1:0, (i>>k)%2);
		}

		//train bias
		train_perceptron_weight(1,&value_history_table[entry].weight[k][j+1], (sum>0)? 1:0, (i>>k)%2);
	}

	total_values_11++;
	if (predicted==actual)
	{
		for (j=0; j<value_context_history_size; j++)
		{
			if (value_history_table[entry].past_value[j]==predicted)
				break;
		}
		if (j==value_context_history_size)
		{
			total_values_unseen_locally_11++;
			for (j=0; j<value_context_history_size; j++)
			{
				if (global_value_index_history[j]==predicted)
					break;
			}
			if (j==value_context_history_size)
				total_values_unseen_locally_globally_11++;
		}
	}

	//update pattern history - shift everyone over
	update_global_value_history_table(PC,i);

	//save local as well for study
	for (j=value_context_history_size-1; j>0; j--)
		value_history_table[entry].past_value[j]=value_history_table[entry].past_value[j-1];
	value_history_table[entry].past_value[0]=actual;
}


//STUDY 1: HOW MANY INST PRODUCE VALUES THAT WERE SEEN GLOBALLY BUT NOT LOCALLY?
typedef struct
{
	int local[50];
	int tag;
	int local_valid_entries;
} Vp_inst_data;

Vp_inst_data *vp_inst_data;
int global_value_history_1[50];
int global_value_history_valid_entries_1=0;
unsigned int total_value_1=0;
unsigned int in_local_only_1=0;
unsigned int in_global_only_1=0;
unsigned int in_global_and_local_1=0;
unsigned int cold_local_value_1=0;

void initialize_study_1()
{
	int i;

	vp_inst_data=(Vp_inst_data*)calloc(value_hash_table_size, sizeof(Vp_inst_data));
	if (vp_inst_data==0)
		fatal("Cannot initialize value predictor");

	for (i=0; i<value_hash_table_size; i++)
		vp_inst_data[i].tag=-1;

	for (i=0; i<50; i++)
		global_value_history_1[i]=0;
}

void tally_study_1(unsigned int PC, int value)
{
	int entry,tag,i,g=0,l=0;
	entry=(PC>>3)%value_hash_table_size;

	//eliminate aliasing by checking tag
	tag=(PC>>3)/value_hash_table_size;
	if (tag!=vp_inst_data[entry].tag)
	{
		vp_inst_data[entry].tag=tag;
		vp_inst_data[entry].local_valid_entries=0;
		cold_local_value_1++;
	}

	total_value_1++;

	//check if in local history
	for (i=0; i<vp_inst_data[entry].local_valid_entries; i++)
	{
		if (value==vp_inst_data[entry].local[i])
		{
			l=1;
			break;
		}
	}
	//check if in global history
	for (i=0; i<global_value_history_valid_entries_1; i++)
	{
		if (value==global_value_history_1[i])
		{
			g=1;
			break;
		}
	}
	if (l==1 && g==0)
		in_local_only_1++;
	if (g==1 && l==0)
		in_global_only_1++;
	if (g==1 && l==1)
		in_global_and_local_1++;

	//save the global value history
	for (i=49; i>=1; i--)
		global_value_history_1[i]=global_value_history_1[i-1];
	global_value_history_1[0]=value;
	global_value_history_valid_entries_1++;
	if (global_value_history_valid_entries_1>50)
		global_value_history_valid_entries_1=50;

	//save the local value history
	for (i=49; i>=1; i--)
		vp_inst_data[entry].local[i]=vp_inst_data[entry].local[i-1];
	vp_inst_data[entry].local[0]=value;
	vp_inst_data[entry].local_valid_entries++;
	if (vp_inst_data[entry].local_valid_entries>50)
		vp_inst_data[entry].local_valid_entries=50;
}

//STUDY 2: ARE THERE CORRELATIONS BETWEEN GLOBAL VALUES?
//Questions:
//For each PC:
//Was there a past PC for which
//there was more than one past value seen more than 5 times, and:
//1) each past value seen more than 5 times correlated with a local value 100% of the time?
//2) each past value seen more than 5 times correlated with a local value 90% of the time?
//3) at least one of the past values seen more than 5 times correlated with a local value 100% of the time?
//4) did the count of those past values that correlated 100% exceed 50% of the values seen more than 5 times?

typedef struct
{
	int** tally;
	int tag;
	int* local;

} Vp_correlation;

Vp_correlation *vp_correlation;
int* global_value_history_2;
unsigned int* global_pc_history_2;

void initialize_study_2()
{
	int i;
	vp_correlation=(Vp_correlation*)calloc(value_hash_table_size,sizeof(Vp_correlation));
	global_value_history_2=(int*)calloc(value_context_history_size,sizeof(int));
	global_pc_history_2=(unsigned int*)calloc(value_context_history_size,sizeof(unsigned int));
	if (vp_correlation==0 || global_value_history_2==0 || global_pc_history_2==0)
		fatal("Not enough memory for value predictor");

	for (i=0; i<value_hash_table_size; i++)
	{
		vp_correlation[i].tag=-1;
	}
}

//total pcs
unsigned int total_pcs_2=0;
//total pcs that had a past pc for which there were 2 or more values seen 5 or more times
unsigned int total_pcs_more_than_5_2=0;
unsigned int total_pcs_question1_2=0;
unsigned int total_pcs_question2_2=0;
unsigned int total_pcs_question3_2=0;
unsigned int total_pcs_question4_2=0;

unsigned int total_values_2=0;
unsigned int total_values_more_than_5_2=0;

void get_stats_study_2(int entry)
{
	int i,j,k,sum,total,total2,moreten,question1,question2,question3,question4;

/*printf("Entry %x: ",entry);
for (k=0; k<value_context_history_size; k++)
for (i=0; i<past_value_number; i++)
	for (j=0; j<past_value_number; j++)
		printf("%i ",vp_correlation[entry].tally[k][i*past_value_number+j]);
printf("\n\n");
*/

	//look at each past pc
	moreten=0;
	question1=0;
	question2=0;
	question3=0;
	question4=0;

	total_pcs_2++;

	for (k=0; k<value_context_history_size; k++)
		for (i=0; i<past_value_number*past_value_number; i++)
			total_values_2+=vp_correlation[entry].tally[k][i];

	//question -1: does the current entry have more than one value seen at least 5 times?
	for (k=0; k<value_context_history_size; k++)
	{
		total=0;
		for (i=0; i<past_value_number; i++)
		{
			sum=0;
			for (j=0; j<past_value_number; j++)
			{
				sum+=vp_correlation[entry].tally[k][j*past_value_number+i];
			}
			if (sum>=5)
				total++;
		}
		if (total>=2)
			break;
	}
	if (k==value_context_history_size)
		return;

	for (i=0; i<value_context_history_size; i++)
	{
		//question 0: was there more than one past value seen more than 5 times?
		total=0;
		for (j=0; j<past_value_number; j++)
		{
			sum=0;
			for (k=0; k<past_value_number; k++)
			{
				sum+=vp_correlation[entry].tally[i][j*past_value_number+k];
			}
			if (sum>=5)
				total++;
		}
		if (total<2)
			continue;
		else
			moreten=1;

		//question 1: did each past value seen more than 5 times correlate with a local value 100%?
		for (j=0; j<past_value_number; j++)
		{
			sum=0;
			for (k=0; k<past_value_number; k++)
				sum+=vp_correlation[entry].tally[i][j*past_value_number+k];
			if (sum<5)
				continue;
			for (k=0; k<past_value_number; k++)
				if (vp_correlation[entry].tally[i][j*past_value_number+k]>0 && vp_correlation[entry].tally[i][j*past_value_number+k]<sum)
					break;
			if (k<past_value_number)
				break;
		}
		if (j==past_value_number)
			question1=1;

		//question 2: did each past value seen more than 5 times correlate with a local value 90%?
		for (j=0; j<past_value_number; j++)
		{
			sum=0;
			for (k=0; k<past_value_number; k++)
				sum+=vp_correlation[entry].tally[i][j*past_value_number+k];
			if (sum<5)
				continue;
			for (k=0; k<past_value_number; k++)
				if (vp_correlation[entry].tally[i][j*past_value_number+k]>sum*0.1 && vp_correlation[entry].tally[i][j*past_value_number+k]<sum*0.9)
					break;
			if (k<past_value_number)
				break;
		}
		if (j==past_value_number)
			question2=1;

		//question 3: was there a value seen more than 5 times that correlated with a local value 100%?
		//question 4: did the total count of those values seen more than 5 times that correlated with a local value 100%
		//	exceed 50% of all the values seen more than 5 times?
		total=0;
		total2=0;
		for (j=0; j<past_value_number; j++)
		{
			sum=0;
			for (k=0; k<past_value_number; k++)
				sum+=vp_correlation[entry].tally[i][j*past_value_number+k];
			if (sum<5)
				continue;
			for (k=0; k<past_value_number; k++)
				if (vp_correlation[entry].tally[i][j*past_value_number+k]>0 && vp_correlation[entry].tally[i][j*past_value_number+k]<sum)
					break;
			if (k==past_value_number)
				total+=sum;
			total2+=sum;
		}
		if (total>0)
			question3=1;
		if (total*2>=total2)
			question4=1;
	}
	if (moreten>0)
		total_pcs_more_than_5_2++;
	if (question1>0)
		total_pcs_question1_2++;
	if (question2>0)
		total_pcs_question2_2++;
	if (question3>0)
		total_pcs_question3_2++;
	if (question4>0)
		total_pcs_question4_2++;

	if (moreten>0)
	{
		for (k=0; k<value_context_history_size; k++)
			for (i=0; i<past_value_number*past_value_number; i++)
				total_values_more_than_5_2+=vp_correlation[entry].tally[k][i];
	}
}

void tally_study_2(unsigned int PC, int value)
{
	int entry,tag,p,l,pc,i,j;
	entry=(PC>>3)%value_hash_table_size;

	tag=(PC>>3)/value_hash_table_size;
	if (tag!=vp_correlation[entry].tag)
	{
		//different PC to same spot - tally old entries and clear
		if (vp_correlation[entry].tag!=-1)
		{
			get_stats_study_2(entry);
			for (i=0; i<value_context_history_size; i++)
				for (j=0; j<past_value_number*past_value_number; j++)
					vp_correlation[entry].tally[i][j]=0;
		}
		//brand new PC - allocate memory for tally
		else
		{
			vp_correlation[entry].tally=(int**)calloc(value_context_history_size, sizeof(int*));
			if (vp_correlation[entry].tally==0)
				fatal("Not enough memory for value predictor");
			for (i=0; i<value_context_history_size; i++)
			{
				vp_correlation[entry].tally[i]=(int*)calloc(past_value_number*past_value_number, sizeof(int));
				if (vp_correlation[entry].tally[i]==0)
					fatal("Not enough memory for value predictor");

				for (j=0; j<past_value_number*past_value_number; j++)
					vp_correlation[entry].tally[i][j]=0;
			}
		}
		vp_correlation[entry].tag=tag;
	}

//if (entry==0x659)
//printf("Entry 659: %i\n",value);
	
	//tally correlation
	//for each past PC
	for (i=0; i<value_context_history_size; i++)
	{
		//find the appropriate past and local values (just use MOD past_value_number)
		pc=(global_pc_history_2[i]>>3) % value_context_history_size;
		p=abs(global_value_history_2[i]) % past_value_number;
		if (p<0)
			p=-p;
		l=abs(value) % past_value_number;
		if (l<0)
			l=-l;

		vp_correlation[entry].tally[pc][p*past_value_number+l]++;
//if (entry==0x659)
//printf("Tally[%i,%i,%i]=%i\n",pc,p,l,vp_correlation[entry].tally[pc][p*past_value_number+l]);
	}

	//shift global history
	for (i=value_context_history_size-1; i>=0; i--)
	{
		global_pc_history_2[i]=global_pc_history_2[i-1];
		global_value_history_2[i]=global_value_history_2[i-1];
	}
	global_pc_history_2[0]=PC;
	global_value_history_2[0]=value;

//printf("%x %i, %i\n",PC,value,vp_correlation[entry].tally[pc][p*past_value_number+l]);
}

//GENERAL ROUTINES

void initialize_value_predictor()
{
//initialize_study_2();

	if (value_predict==0)
		return;

//	load_predict=(value_predict+1)%2;
//	value_predict=(value_predict+1)>>1;
	load_predict=0;

	if (value_predict==3)
		context_stride_hybrid=1;

	if (value_predict==1)
		initialize_stride();
	if (value_predict==2)
		initialize_context();
	if (value_predict==3)
		initialize_context();
	if (value_predict==4)
		initialize_perceptron_context1();
	if (value_predict==5)
		initialize_perceptron_context2();
	if (value_predict==6)
		initialize_perceptron_context3();
	if (value_predict==7)
		initialize_perceptron_context4();
	if (value_predict==8)
		initialize_perceptron_context5();
	if (value_predict==9)
		initialize_perceptron_context6();
	if (value_predict==10)
		initialize_perceptron_context7();
	if (value_predict==11)
		initialize_perceptron_context8();
	if (value_predict==12)
		initialize_perceptron_context_piecewise();
	if (value_predict==13)
		initialize_perceptron_context_spechist();
}

int get_value_prediction(unsigned int PC, int inst_type)
{
	if (value_predict==0)
		return 0;

	if (load_predict==1 && inst_type!=4)
		return 0;
	//stride
	if (value_predict==1)
		return get_stride(PC);
	//context
	if (value_predict==2)
		return get_context(PC);
	//context-stride hybrid
	if (value_predict==3)
		return get_context(PC);
	//perceptron context
	if (value_predict==4)
		return get_perceptron_context1(PC);
	if (value_predict==5)
		return get_perceptron_context2(PC);
	if (value_predict==6)
		return get_perceptron_context3(PC);
	if (value_predict==7)
		return get_perceptron_context4(PC);
	if (value_predict==8)
		return get_perceptron_context5(PC);
	if (value_predict==9)
		return get_perceptron_context6(PC);
	if (value_predict==10)
		return get_perceptron_context7(PC);
	if (value_predict==11)
		return get_perceptron_context8(PC);
	if (value_predict==12)
		return get_perceptron_context_piecewise(PC);
	if (value_predict==13)
		return get_perceptron_context_spechist(PC);

	return 0;
}

int get_value_prediction_confidence(unsigned int PC, int inst_type)
{
	if (value_predict==0)
		return 0;

	if (load_predict==1 && inst_type!=4)
		return 0;

	//stride
	if (value_predict==1)
		return get_stride_confidence(PC);
	//context
	if (value_predict==2)
		return get_context_confidence(PC);
	//context-stride hybrid
	if (value_predict==3)
		return get_context_confidence(PC);
	//perceptron context
	if (value_predict==4)
		return get_perceptron_context_confidence1(PC);
	if (value_predict==5)
		return get_perceptron_context_confidence2(PC);
	if (value_predict==6)
		return get_perceptron_context_confidence3(PC);
	if (value_predict==7)
		return get_perceptron_context_confidence4(PC);
	if (value_predict==8)
		return get_perceptron_context_confidence5(PC);
	if (value_predict==9)
		return get_perceptron_context_confidence6(PC);
	if (value_predict==10)
		return get_perceptron_context_confidence7(PC);
	if (value_predict==11)
		return get_perceptron_context_confidence8(PC);
	if (value_predict==12)
		return get_perceptron_context_confidence_piecewise(PC);
	if (value_predict==13)
		return get_perceptron_context_confidence_spechist(PC);

	return 0;
}

void train_value_predictor(unsigned int PC, int inst_type, int predicted, int actual, int prediction_made)
{
	int i,j;

	if (value_predict==0)
		return;

	if (load_predict==1 && inst_type!=4)
		return;

	//stride
	if (value_predict==1)
	{
		train_stride(PC,actual);
		train_stride_confidence(PC,actual,predicted);
	}
	//context
	if (value_predict==2)
	{
		train_context(PC,actual,predicted);
	}
	//context-stride hybrid
	if (value_predict==3)
	{
		train_context(PC,actual,predicted);
	}
	//perceptron context
	if (value_predict==4)
	{
		train_perceptron_context1(PC,actual,predicted);
	}
	if (value_predict==5)
	{
		train_perceptron_context2(PC,actual,predicted);
	}
	if (value_predict==6)
	{
		train_perceptron_context3(PC,actual,predicted);
	}
	if (value_predict==7)
	{
		train_perceptron_context4(PC,actual,predicted);
	}
	if (value_predict==8)
	{
		train_perceptron_context5(PC,actual,predicted);
	}
	if (value_predict==9)
	{
		train_perceptron_context6(PC,actual,predicted);
	}
	if (value_predict==10)
	{
		train_perceptron_context7(PC,actual,predicted);
	}
	if (value_predict==11)
	{
		train_perceptron_context8(PC,actual,predicted);
	}
	if (value_predict==12)
	{
		train_perceptron_context_piecewise(PC,actual,predicted);
	}
	if (value_predict==13)
	{
		train_perceptron_context_spechist(PC,actual,predicted);
	}

	//tally stats
	if (predicted==actual && prediction_made==1)
		vp_correct_made++;
	if (predicted!=actual && prediction_made==1)
		vp_wrong_made++;
	if (predicted==actual && prediction_made==0)
		vp_correct_notmade++;
	if (predicted!=actual && prediction_made==0)
		vp_wrong_notmade++;
/*
if (PC==0x414490)
{
printf("%x: %i, %i: ",PC,predicted,actual);
for (i=0; i<value_context_history_size; i++)
	printf("%i ",global_value_index_history[i]);
printf("\n\n");

for (i=0; i<value_context_history_size+1; i++)
{
	for (j=0; j<log2(past_value_number); j++)
		printf("%i,",value_history_table[(PC>>3)%value_hash_table_size].weight[j][i*value_context_history_size+1]);
	printf(" ");
}
printf("\n\n");
}
*/

}

void dump_value_prediction_stats()
{
	int i;

	if (value_predict==11)
	{
		printf("Total predictable values: %u\n",total_values_11);
		printf("Total correct values obtained that have not been seen before locally: %u\n",total_values_unseen_locally_11);
		printf("Total correct values obtained that have not been seen before locally or globally: %u\n",total_values_unseen_locally_globally_11);
	}

	if (tally_perceptron_weights>0)
		dump_weight_tally();

/*	//Study 1:
	printf("Total predictable values: %u\n", total_value_1);
	printf("Cold values (cannot be predicted locally due to first time): %u\n",cold_local_value_1);
	printf("Local only: %u\n", in_local_only_1);
	printf("Global only: %u\n", in_global_only_1);
	printf("Global and local: %u\n", in_global_and_local_1);
	printf("Neither global nor local: %u\n", total_value_1-in_local_only_1-in_global_only_1-in_global_and_local_1);
*/
/*
	//Study 2:
	for (i=0; i<value_hash_table_size; i++)
	{
		if (vp_correlation[i].tag!=-1)
			get_stats_study_2(i);
	}

	//1) each past value seen more than 5 times correlated with a local value 100% of the time?
	//2) each past value seen more than 5 times correlated with a local value 90% of the time?
	//3) at least one of the past values seen more than 5 times correlated with a local value 100% of the time?
	//4) did the count of those past values that correlated 100% exceed 50% of the values seen more than 5 times?
	printf("Total different instructions: %u\n",total_pcs_2);
	printf("Total instructions for which there was a past PC who had 2 values seen over 5 times: %u\n",total_pcs_more_than_5_2);
	printf("Total of those for which each past value seen more than 5 times correlated with a local value 100: %u\n",total_pcs_question1_2);
	printf("Total of those for which each past value seen more than 5 times correlated with a local value 90: %u\n",total_pcs_question2_2);
	printf("Total for which at least one of the past values correlated 100: %u\n",total_pcs_question3_2);
	printf("Total for which the count of those past values that correlated 100 exceed 50 of all values seen over 5 times: %u\n",total_pcs_question4_2);

	printf("Total dynamic instructions eligible for value prediction: %u\n",total_values_2);
	printf("Total dynamic instructions with 2 values seen over 5 times: %u\n",total_values_more_than_5_2);
*/
}
//myloader.h
//Michael Black, 2006
//
//these data types were taken from simplescalar so as to be compatible

#define STACK_BASE 0x7fffc000
#define MAX_PARAMS 16384

//file header
struct filehdr   
{
        //magic number identifying the code as a binary - should be 0x0162 for little endian
        unsigned short f_magic;
        //number of sections in file
        unsigned short f_nscns;
        int f_timdat;
        int f_symptr;
        int f_nsyms;
        //address of optional (aout) header
        unsigned short f_opthdr;
        unsigned short f_flags;
};
  
//AOUT header - holds info on how big various sections are
struct aouthdr  
{
  short magic;
  short vstamp;
  int tsize;
  int dsize;
  int bsize;
  int entry;
  int text_start;
  int data_start;
  int bss_start;
  int gprmask;
  int cprmask[4];
  int gp_value;
};

//info on each section
struct scnhdr
{
  char s_name[8];
  int s_paddr;
  int s_vaddr;  
  int s_size;
  int s_scnptr;
  int s_relptr;   
  int s_lnnoptr;
  unsigned short s_nreloc;
  unsigned short s_nlnno;
  int s_flags;
};
//mymemory.h
//Michael Black, 2006

#define PAGE_SIZE 4096			//simplescalar uses 4096
#define PHYSICAL_MEMORY_PAGES 1048576
#define VIRTUAL_MEMORY_PAGES 1048576	//address space of 2^32 bytes

//mysim.h
//Michael Black, 2006

#define NUM_REGS 32
#define SP 29
#define GP 28   
#define FP 30

#define IDIV(A, B)      (((B) == 0) ? 0 : ((A) / (B)))
#define IMOD(A, B)      (((B) == 0) ? 0 : ((A) % (B)))

//floating point registers can be used as integers, floats, or doubles
//Ftype is used to hold the floating point register set
typedef union
{
        int l[NUM_REGS+1];
        float f[NUM_REGS+1];
        double d[NUM_REGS+1];
} Ftype;

//fpr is the type used for passing floating point values through the pipeline
typedef union
{
	int l;
	int ll[2];
	float f;
	double d;
} fpr;
//mysimoutorder.h
//Michael Black, 2006

//instruction entry
typedef struct
{
	unsigned int addr;
	int inst_upper;
	int inst_lower;
	char name[5];
	//type is:
	//0 for low latency integer arithmetic (add, sub)
	//1 for high latency integer arithmetic (mult, div)
	//2 for low latency float arithmetic (add.s, sub.s)
	//3 for high latency float arithmetic (mult.d, div.d)
	//4 for load
	//5 for store
	//6 for unconditional jump
	//7 for jump to register
	//8 for conditional branch
	//9 for syscall
	int type;

	//rtype determines which functional unit to use
	//0 for integer arithmetic
	//1 for integer mult/div
	//2 for float arithmetic
	//3 for float mult/div
	//4 for loads/stores
	//5 for anything else (that doesn't need a functional unit)
	int rtype;

	int sinks_rs;
	int sinks_rt;
	int sinks_rt2;
	int sinks_HI;
	int sinks_LO;
	int sinks_fs;
	int sinks_fs2;
	int sinks_ft;
	int sinks_ft2;
	int sinks_FCC;

	int sources_rt;
	int sources_rt2;
	int sources_rd;
	int sources_HI;
	int sources_LO;
	int sources_ra;
	int sources_fs;
	int sources_fs2;
	int sources_ft;
	int sources_ft2;
	int sources_fd;
	int sources_fd2;
	int sources_FCC;

	int is_stall;
} Inst;

//reservation station entry
typedef struct
{
	Inst instruction;
	int occupied;
	int rob_place;
	int type;

	//busy tells the state of the instruction in the reservation station
	//2 = not all operands available
	//1 = currently processing
	//0 = output ready
	int busy;
	
	//how long, in cycles, an instruction of this type takes to execute
	int latency;

	//how many cycles have elapsed since execution began
	int time_left;

	//input register values
	int r_rs;
	int r_rt;
	int r_rt2;
	int r_HI;
	int r_LO;

	fpr f_fs;
	fpr f_fs2;
	fpr f_ft;
	fpr f_ft2;
	int r_FCC;

	//output register values
	int r_rtout;
	int r_rt2out;
	int r_rdout;
	int r_HIout;
	int r_LOout;
	int r_raout;

	fpr f_fsout;
	fpr f_fs2out;
	fpr f_ftout;
	fpr f_ft2out;
	fpr f_fdout;
	fpr f_fd2out;
	int r_FCCout;

	//-2 means register not needed;
	//-1 means register value already obtained
	//otherwise, number of reservation station sourcing register
	int rs_available;
	int rt_available;
	int rt2_available;
	int HI_available;
	int LO_available;
	int fs_available;
	int fs2_available;
	int ft_available;
	int ft2_available;
	int FCC_available;

	//if a resstat is sourcing register, which output register is the source?
	//0=rt,1=rt2,2=rd,3=ra
	int rs_dreg;
	int rt_dreg;
	int rt2_dreg;
	int ra_dreg;

	//0=fs,1=fs2,2=ft,3=ft2,4=fd,4=fd2
	int fs_dreg;
	int fs2_dreg;
	int ft_dreg;
	int ft2_dreg;

	//0 means not available
	//1 means available
	int rtout_available;
	int rt2out_available;
	int rdout_available;
	int HIout_available;
	int LOout_available;
	int raout_available;
	int fsout_available;
	int fs2out_available;
	int ftout_available;
	int ft2out_available;
	int fdout_available;
	int fd2out_available;
	int FCCout_available;

	int lsq_order;

	unsigned int PC;
	unsigned int predicted_PC;
	int branch_prediction_made;

	int value_prediction;
	int value_prediction_made;
	int value_prediction_used;

	int guessed_critical;

} Reservation_Station;

//ROB entry
typedef struct
{
	Inst instruction;
	int res_stat;

	int r_rs;
	int r_rt;
	int r_rt2;

	fpr f_ft;
	fpr f_ft2;

	int r_rtout;
	int r_rt2out;
	int r_rdout;
	int r_HIout;
	int r_LOout;
	int r_raout;

	fpr f_fsout;
	fpr f_fs2out;
	fpr f_ftout;
	fpr f_ft2out;
	fpr f_fdout;
	fpr f_fd2out;
	int r_FCCout;

	int ready;

	int time_left;

	//criticality info
	//# cycles at busy state 2 - waiting on somebody's results
	//gives necessary info for QOld and QOldDep
	int cycles_notready;
	//# cycles in ROB
	//gives necessary info for AlOld
	int cycles_in_ROB;
	//number of instructions dependent on result
	int insts_using_output;
	//flags
	int QOLDset, QOLDDEPset, ALOLDset, QCONSset;
	int QOLDeverset, QOLDDEPeverset, ALOLDeverset, QCONSeverset;

} Reorder_Buffer_Entry;
//mysyscall.h
//Michael Black, 2006
//
//most of these structures were taken from simplescalar

struct mstatbuf
{
        short           mst_dev;
        unsigned int   mst_ino;
        unsigned short  mst_mode;
        short           mst_nlink;
        short           mst_uid;
        short           mst_gid;
        short           mst_rdev;
        int             mst_size;
        int             mst_atime;
        int             mst_spare1;
        int             mst_mtime;
        int             mst_spare2;
        int             mst_ctime;
        int             mst_spare3;
        int            mst_blksize;
        int            mst_blocks;
        unsigned int   mst_gennum;
        int            mst_spare4;
};

struct my_timezone
{
	int my_tz_minuteswest;
	int my_tz_dsttime;
};

struct my_timeeval
{
	int my_tv_sec;
	int my_tv_usec;
};

struct my_rusage
{
	struct my_timeeval my_ru_utime;
	struct my_timeeval my_ru_stime;
	int my_ru_maxrss;
        int my_ru_ixrss;
        int my_ru_idrss;
        int my_ru_isrss;
        int my_ru_minflt;
        int my_ru_majflt;
        int my_ru_nswap;
        int my_ru_inblock;
        int my_ru_oublock;
        int my_ru_msgsnd;
        int my_ru_msgrcv;
        int my_ru_nsignals;
        int my_ru_nvcsw;
        int my_ru_nivcsw;
};
