################ Infrastructure to get initial solutions ###############

# Extract the model parameters from an initial solution
# I - the data matrix
# 	Format: rows are genes (or parcels, or voxels); columns are time points of many patients
#	Column name format: subject_name;TR:X, where X is the time point number
# selected_bic_rows = the rows of the initial biclusters
# selected_bic_cols = the columns of the initial biclusters
get_initial_solution_I_as_input<-function(I,selected_bic_rows,selected_bic_cols,window_size=1,start_tr=0){
	# create the objects of our model
	Z_cols_to_subjects = sapply(colnames(I),function(x){strsplit(x,split=';')[[1]][1]})
	n_subj = length(unique(Z_cols_to_subjects))
	n_tr = sum(Z_cols_to_subjects==Z_cols_to_subjects[1])
	n_voxels = nrow(I)
	# H
	H = rep(0,n_voxels);names(H) = rownames(I)
	H[selected_bic_rows] = 1
	# Ps
	P = rep(1,n_subj);names(P) = unique(Z_cols_to_subjects)
	# C
	C = matrix(nrow=n_subj,ncol=n_tr);C[,]=0
	colnames(C) = paste("TR",(1+start_tr):(n_tr+start_tr),sep='');rownames(C) = unique(Z_cols_to_subjects)
	C[t(sapply(selected_bic_cols,function(x){strsplit(x,split=';')[[1]]}))]=1
	# HS
	HS = matrix(nrow=n_voxels,ncol=n_subj);HS[,] = 0
	rownames(HS) = rownames(I);colnames(HS) = unique(Z_cols_to_subjects)
	HS[selected_bic_rows,]=1
	# set the hyper-parameters of H and C
	pi_v = length(selected_bic_rows)/nrow(I)
	pi_1_1 = sum(C[,1])/nrow(C)
	C_counts = apply(C,1,analyze_C_row,window_size=window_size)
	pi_1_to_1 = sum(C_counts[1,])/sum(C_counts[3,])
	pi_0_to_1 = sum(C_counts[2,])/(length(C)-nrow(C)-sum(C_counts[3,]))
	return(list(I=I,C=C,H=H,HS=HS,pi_1_1=pi_1_1,w=window_size
		,pi_v=pi_v,pi_1_to_1=pi_1_to_1,pi_0_to_1=pi_0_to_1,C_counts=C_counts))
}

# Extract time points model parameters from a solution vector x
# x is a binary vector of a specific subject. A value of 1 means that the specific time
# point is in the module
analyze_C_row<-function(x,window_size=1){
	pi_0_to_1 = 0; pi_1_to_1 = 0; pi_0_to_0 = 0; pi_1_to_0 =0
	n_window_has_1 = 0
	#print(x)
	for (i in 1:length(x)){
		if (i==1){next}
		wind_ind = i - window_size; wind_ind = max(wind_ind,1)
		if (sum(x[wind_ind:(i-1)]) > 0){
			pi_1_to_1 = pi_1_to_1 + x[i]
			pi_1_to_0 = pi_1_to_0 + (1-x[i])
		}
		else{
			pi_0_to_1 = pi_0_to_1 + x[i]
			pi_0_to_0 = pi_0_to_0 + (1-x[i])
		}
	}
	return (c(pi_1_to_1,pi_0_to_1,pi_1_to_0,pi_0_to_0))
}

# Set the default (non-informative) priors for the hyperparameters
get_default_hyper_parameter_priors <- function(n_tr=100,n_v=500,n_subj=5){
	pr_params = list()
	pr_params[["pi_v"]] = c(1,1)
	pr_params[['pi_1_1']] = c(1,1)
	pr_params[['pi_0_to_1']] = c(1,1)
	pr_params[['pi_1_to_1']] = c(1,1)
	pr_params[['pi_s']] = c(1,1)
	pr_params[['pi_s_c']] = c(1,1)
	pr_params[['pi_lc']] = c(1,1)
	pr_params[['pi_lc_c']] = c(1,1)
	# mu0, p0, v0, ss0
	pr_params[['norm_bg']] = c(0,0,1,1)
	pr_params[['norm_bic']] = c(0,0,1,1)
	return (pr_params)
}

get_hyper_parameter_modes_from_prior <- function(sol,pr_params){
	sol$pi_v = get_hp_mode(pr_params[['pi_v']])
	sol$pi_1_1 = get_hp_mode(pr_params[['pi_1_1']])
	sol$pi_0_to_1 = get_hp_mode(pr_params[['pi_0_to_1']])
	sol$pi_1_to_1 = get_hp_mode(pr_params[['pi_1_to_1']])
	sol$pi_s = rep(get_hp_mode(pr_params[['pi_s']]),ncol(sol$HS))
	sol$pi_s_c = get_hp_mode(pr_params[['pi_s_c']])
	sol$pi_lc = get_hp_mode(pr_params[['pi_lc']])
	sol$pi_lc_c = get_hp_mode(pr_params[['pi_lc_c']])
	sol$norm_bg = get_norm_dist_hp_mode(pr_params[['norm_bg']])
	sol$norm_bic = get_norm_dist_hp_mode(pr_params[['norm_bic']])
	return (sol)
}

get_hp_mode<-function(params){
	if (all(params==1)){return (0.5)}
	return((params[1]-1)/(sum(params)-2))
}
# params: # mu0, p0, v0, ss0
get_norm_dist_hp_mode<-function(params){
	precision = params[3]/params[4]
	return (c(params[1],sqrt(1/(precision*params[3]))))
}


################ The Gibbs process starts here ###############

# 1. sample the hyper parameters
sample_pi_v <- function(sol,pr_params,reps=10){
	return (rbeta(reps,pr_params[['pi_v']][1]+sum(sol$H),pr_params[['pi_v']][2]+sum(1-sol$H)))
}
sample_pi_1_1<-function(sol,pr_params,reps=10){
	return (rbeta(reps,pr_params[['pi_1_1']][1]+sum(sol$C[,1]),pr_params[['pi_1_1']][2]+sum(1-sol$C[,1])))
}
sample_pi_1_to_1<-function(sol,pr_params,reps=10){
	return (rbeta(reps,pr_params[['pi_1_to_1']][1]+sum(sol$C_counts[1,]),pr_params[['pi_1_to_1']][2]+sum(sol$C_counts[3,])))
}
sample_pi_0_to_1<-function(sol,pr_params,reps=10){
	return (rbeta(reps,pr_params[['pi_0_to_1']][1]+sum(sol$C_counts[2,]),pr_params[['pi_0_to_1']][2]+sum(sol$C_counts[4,])))
}

sample_pi_lc<-function(sol,pr_params,reps=1,return_counts=F){
	count1 = 0; count2 = 0;
	# go over subjects
	for (s in 1:nrow(sol$C)){
		currs = rownames(sol$C)[s]
		curr_rows = which(sol$HS[,s] == 1)
		currtr = which(sol$C[s,] == 1)
		curr_cols = (s-1)*ncol(sol$C)+currtr
		count1 = count1+sum(sol$I[curr_rows,curr_cols])
		count2 = count2+sum(1-sol$I[curr_rows,curr_cols])

	}
	if (return_counts){return (c(count1,count2))}
	print (paste(count1,count2,rbeta(reps,pr_params[['pi_lc']][1]+count1,pr_params[['pi_lc']][2]+count2),sep='    '))
	return (rbeta(reps,pr_params[['pi_lc']][1]+count1,pr_params[['pi_lc']][2]+count2))
}
sample_pi_lc_c<-function(sol,pr_params,reps=1){
	pi_lc_counts = sample_pi_lc(sol,pr_params,reps=1,return_counts=T)
	count1 = sum(c(sol$I))- pi_lc_counts[1]
	count2 = sum(1-c(sol$I))- pi_lc_counts[2]
	return (rbeta(reps,pr_params[['pi_lc_c']][1]+count1,pr_params[['pi_lc_c']][2]+count2))
}

sample_norm_bic<-function(sol,pr_params,reps=1,return_vecs = F){
	# go over subjects
	x = c()
	for (s in 1:nrow(sol$C)){
		currs = rownames(sol$C)[s]
		curr_rows = which(sol$HS[,s] == 1)
		currtr = which(sol$C[s,] == 1)
		curr_cols = (s-1)*ncol(sol$C)+currtr
		x = c(x,sol$I[curr_rows,curr_cols])
	}
	if (length(x) == 0){return(get_norm_dist_hp_mode(pr_params[['norm_bic']]))}
	p0 = pr_params[['norm_bic']][2]
	n = length(x)
	m0 = pr_params[['norm_bic']][1]
	v0 = pr_params[['norm_bic']][3]
	ss0 = pr_params[['norm_bic']][4]
	meanx = mean(x)

	p_n = p0+n
	m_n = (n*meanx + p0*m0)/p_n
	v_n = v0 + n
	ss_n = ss0 + sum((x-meanx)^2) + ((n*p0)*(meanx - m0)^2)/p_n
	print (paste(m_n,p_n,v_n,ss_n))

	new_prec = rgamma(1,v_n/2,ss_n/2)
	new_prec = (v_n - 2)/(ss_n)
	new_sd = sqrt(1/(new_prec*p_n))
	new_mean = rnorm(1,m_n,new_sd)
	return (c(new_mean,sqrt(1/(new_prec))))
}

sample_norm_bg<-function(sol,pr_params,reps=1,return_vecs = F){
	# go over subjects
	x = c()
	for (s in 1:nrow(sol$C)){
		currs = rownames(sol$C)[s]
		curr_rows = which(sol$HS[,s] != 1)
		currtr = which(sol$C[s,] != 1)
		curr_cols = (s-1)*ncol(sol$C)+currtr
		x = c(x,sol$I[curr_rows,])
		x = c(x,sol$I[-curr_rows,curr_cols])
	}
	p0 = pr_params[['norm_bg']][2]
	n = length(x)
	m0 = pr_params[['norm_bg']][1]
	v0 = pr_params[['norm_bg']][3]
	ss0 = pr_params[['norm_bg']][4]
	meanx = mean(x)
	p_n = p0+n
	m_n = (n*meanx + p0*m0)/p_n
	v_n = v0 + n
	ss_n = ss0 + sum((x-meanx)^2) + ((n*p0)*(meanx - m0)^2)/p_n
	
	new_prec = rgamma(1,v_n/2,ss_n/2)
	new_prec = (v_n - 2)/(ss_n)
	new_sd = sqrt(1/(new_prec*p_n))
	new_mean = rnorm(1,m_n,new_sd)
	return (c(new_mean,sqrt(1/(new_prec))))
}

sample_pi_s <- function(s,sol,pr_params,reps=1){
	count1 = sum(sol$H*sol$HS[,s])
	count2 = sum(sol$H*(1-sol$HS[,s]))
	return (rbeta(reps,pr_params[['pi_s']][1]+count1,pr_params[['pi_s']][2]+count2))
}
sample_pi_s_c <- function(sol,pr_params,reps=1){
	count1 = 0; count2 = 0;
	for (i in which(sol$H==0)){
		count1 = count1 + sum(sol$HS[i,])
		count2 = count2 + sum(1-sol$HS[i,])
	}
	return (rbeta(reps,pr_params[['pi_s_c']][1]+count1,pr_params[['pi_s_c']][2]+count2))
}
sample_Hv_posterior<-function(sol,v,reps=1){
	p_Hv1_HSv = sol$pi_v*prod(sol$pi_s^sol$HS[v,])*prod((1-sol$pi_s)^(1-sol$HS[v,]))
	p_Hv0_HSv = (1-sol$pi_v)*(sol$pi_s_c^sum(sol$HS[v,]))*((1-sol$pi_s_c)^sum(1-sol$HS[v,]))
	p = p_Hv1_HSv / (p_Hv1_HSv+p_Hv0_HSv)
	return (rbinom(reps,1,prob=p))
}

sample_HS_posterior<-function(sol,v,s,reps=1){
	curr_p_s = sol$pi_s[s]
	if (sol$H[v]==0){curr_p_s = sol$pi_s_c}
	Cts_inds = which(sol$C[s,]==1)
	currI = sol$I[v,grepl(pattern=rownames(sol$C)[s],colnames(sol$I))][Cts_inds]
	p_HSvs1_Ivs = curr_p_s
	p_HSvs0_Ivs = (1-curr_p_s)
	log_p_HSvs1_Ivs = log(curr_p_s)
	log_p_HSvs0_Ivs = log(1-curr_p_s)

	if (length(currI)>0){
		an = get_two_group_analysis(currI,sol,method=sol$method,use.log=F)
		p_HSvs1_Ivs = curr_p_s*an[1]
		p_HSvs0_Ivs = (1-curr_p_s)*an[2]

		an = get_two_group_analysis(currI,sol,method=sol$method,use.log=T)
		log_p_HSvs1_Ivs = log(curr_p_s)+an[1]
		log_p_HSvs0_Ivs = log(1-curr_p_s)+an[2]

	}
	p = p_HSvs1_Ivs/(p_HSvs1_Ivs+p_HSvs0_Ivs)
	ratio1 = exp(log_p_HSvs0_Ivs - log_p_HSvs1_Ivs)
	p1 = exp(-1*log(1+ratio1))
	if (is.na(p) || is.na(rbinom(reps,1,prob=p))){p=p1}
	if (is.na(p)){print (paste(c("####", p, p_HSvs1_Ivs,p_HSvs0_Ivs)))}
	return (rbinom(reps,1,prob=p))
}

sample_C_1_posterior<-function(sol,s,reps=1){
	# first window of this sample, only inds with HS==1
	currI = sol$I[sol$HS[,s]==1,which(grepl(pattern=paste(rownames(sol$C)[s],';',sep=''),colnames(sol$I)))[1]]
	an = get_two_group_analysis(currI,sol,method=sol$method,use.log=F)
	p_Cts1_window_Its = an[1]; p_Cts0_window_Its = an[2]
	an = get_two_group_analysis(currI,sol,method=sol$method,use.log=T)
	log_p_Cts1_window_Its = an[1]; log_p_Cts0_window_Its = an[2]

	curr_w = sol$C[s,2:(1+sol$w)]
	p_C1s1_window_I1s = p_Cts1_window_Its*sol$pi_1_1*(sol$pi_1_to_1^sum(curr_w))*((1-sol$pi_1_to_1)^sum(1-curr_w))
	p_C1s0_window_I1s = p_Cts0_window_Its*(1-sol$pi_1_1)

	log_p_C1s1_window_I1s = log_p_Cts1_window_Its + log(sol$pi_1_1) + sum(curr_w)*log(sol$pi_1_to_1)+ sum(1-curr_w)*log((1-sol$pi_1_to_1))
	log_p_C1s0_window_I1s = p_Cts0_window_Its + log(1-sol$pi_1_1)

	curr_sum =	sum(curr_w)
	# if the window contains only zeroes
	if (curr_sum == 0){
		p_C1s0_window_I1s = p_C1s0_window_I1s*((1-sol$pi_0_to_1)^sol$w)
		log_p_C1s0_window_I1s = log_p_C1s0_window_I1s + sol$w*log(1-sol$pi_0_to_1)

	}
	# if the window contains only ones
	if (curr_sum == sol$w){
		p_C1s0_window_I1s = p_C1s0_window_I1s*sol$pi_0_to_1*(sol$pi_1_to_1^(sol$w-1))
		log_p_C1s0_window_I1s = log_p_C1s0_window_I1s + log(sol$pi_0_to_1) + (sol$w-1)*log(sol$pi_1_to_1)
	}
	# here we know we have at least one 1, and not all are 1's
	if (curr_sum > 0 && curr_sum < sol$w){
		l = min(which(curr_w==1))
		curr_w = curr_w[-c(1:l)]
		p_C1s0_window_I1s = p_C1s0_window_I1s*
					((1-sol$pi_0_to_1)^(l-1))*sol$pi_0_to_1*
					(sol$pi_1_to_1^sum(curr_w))*((1-sol$pi_1_to_1)^sum(1-curr_w))
		log_p_C1s0_window_I1s = log_p_C1s0_window_I1s +
					(l-1)*log((1-sol$pi_0_to_1)) + log(sol$pi_0_to_1) +
					sum(curr_w)*log(sol$pi_1_to_1) + sum(1-curr_w)*log(1-sol$pi_1_to_1)

	}
	p = p_C1s1_window_I1s / (p_C1s0_window_I1s + p_C1s1_window_I1s)
	ratio1 = exp(log_p_Cts0_window_Its - log_p_Cts1_window_Its)
	p1 = exp(-1*log(1+ratio1))
	if (is.na(p) || is.na(rbinom(reps,1,prob=p))){p=p1}
	if (is.na(p)){p = as.numeric(p_Cts1_window_Its>p_Cts0_window_Its)}	
	return (rbinom(reps,1,prob=p))
}

sample_Ct_posterior<-function(sol,t,s,reps=1){
	currI = sol$I[sol$HS[,s]==1,which(grepl(pattern=paste(rownames(sol$C)[s],';',sep=''),colnames(sol$I)))[t]]
	#print (dim(currI));print (length(currI))
	upper_tr = min(t+sol$w,ncol(sol$C))
	curr_w_size = upper_tr - t

	an = get_two_group_analysis(currI,sol,method=sol$method,use.log=F)
	p_Cts1_window_Its = an[1]; p_Cts0_window_Its = an[2]
	an = get_two_group_analysis(currI,sol,method=sol$method,use.log=T)
	log_p_Cts1_window_Its = an[1]; log_p_Cts0_window_Its = an[2]
	
	previous_w = sol$C[s,max(1,t-sol$w):(t-1)]
	if(any(is.na(sol$C))){print('C has NAs :(');print (table(is.na(sol$C)))}
	#print (previous_w)

	previous_has_1 = sum(previous_w)>0
	initial_p1 = sol$pi_1_to_1
	if (!previous_has_1){initial_p1 = sol$pi_0_to_1}

	p_Cts1_window_Its = p_Cts1_window_Its*initial_p1
	p_Cts0_window_Its = p_Cts0_window_Its*(1-initial_p1)
	log_p_Cts1_window_Its = log_p_Cts1_window_Its+log(initial_p1)
	log_p_Cts0_window_Its = log_p_Cts0_window_Its+log(1-initial_p1)
	
	if(curr_w_size>0){
		curr_w = sol$C[s,(t+1):upper_tr]
		curr_w_has_1 = rep(1,length(curr_w))
		p_Cts1_window_Its = p_Cts1_window_Its*(sol$pi_1_to_1^sum(curr_w))*((1-sol$pi_1_to_1)^sum(1-curr_w))
		for(nextt in (t+1):(t+curr_w_size)){
			if (sum(sol$C[s,max(1,nextt-sol$w):(nextt-1)])>0 && curr_w[nextt-t]==1){
				curr_w_has_1[nextt-t]=sol$pi_1_to_1
			}
			if (sum(sol$C[s,max(1,nextt-sol$w):(nextt-1)])>0 && curr_w[nextt-t]==0){
				curr_w_has_1[nextt-t]=1-sol$pi_1_to_1
			}
			if (sum(sol$C[s,max(1,nextt-sol$w):(nextt-1)])==0 && curr_w[nextt-t]==1){
				curr_w_has_1[nextt-t]=sol$pi_0_to_1
			}
			if (sum(sol$C[s,max(1,nextt-sol$w):(nextt-1)])==0 && curr_w[nextt-t]==0){
				curr_w_has_1[nextt-t]=1-sol$pi_0_to_1
			}
		}
		p_Cts0_window_Its = p_Cts0_window_Its * prod(curr_w_has_1)
		log_p_Cts0_window_Its = log_p_Cts0_window_Its + sum(log(curr_w_has_1))
	}
	
	p = p_Cts1_window_Its/(p_Cts0_window_Its+p_Cts1_window_Its)
	ratio1 = exp(log_p_Cts0_window_Its - log_p_Cts1_window_Its)
	p1 = exp(-1*log(1+ratio1))
	if (is.na(p) || is.na(rbinom(reps,1,prob=p))){p=p1}
	if (is.na(p)){p = as.numeric(p_Cts1_window_Its>p_Cts0_window_Its)}	
	if (is.na(p)){print (paste('#####',p,ratio1,p1,log_p_Cts0_window_Its,log_p_Cts1_window_Its))}
	if (is.nan(p)){print (paste('#####',p,ratio1,p1,log_p_Cts0_window_Its,log_p_Cts1_window_Its))}
	return (rbinom(reps,1,prob=p))
}

# This is the basic function that calculates the probability/frequency
# of the two group model
get_two_group_analysis <- function(currI, sol, method = 'binary', use.log = F){
	# initial tests
	if (length(currI) == 0 && !use.log){return(c(1,1))}
	if (length(currI) == 0 && use.log){return(c(0,0))}

	if (method == 'norm' && !use.log){
		p_Cts1_window_Its = prod(dnorm(currI,mean = sol$norm_bic[1],sd = sol$norm_bic[2]))
		p_Cts0_window_Its = prod(dnorm(currI,mean = sol$norm_bg[1],sd = sol$norm_bg[2]))
		return(c(p_Cts1_window_Its,p_Cts0_window_Its))

	}

	if (method == 'norm' && use.log){
		p_Cts1_window_Its = sum(dnorm(currI,mean = sol$norm_bic[1],sd = sol$norm_bic[2],log=T))
		p_Cts0_window_Its = sum(dnorm(currI,mean = sol$norm_bg[1],sd = sol$norm_bg[2],log=T))
		return(c(p_Cts1_window_Its,p_Cts0_window_Its))
	}

	# return the default calculations - the binary model
	if (!use.log){
		p_Cts1_window_Its = (sol$pi_lc^sum(currI))*((1-sol$pi_lc)^sum(1-currI))
		p_Cts0_window_Its = (sol$pi_lc_c^sum(currI))*((1-sol$pi_lc_c)^sum(1-currI))
		return(c(p_Cts1_window_Its,p_Cts0_window_Its))
	}else{
		log_p_Cts1_window_Its = sum(currI)*log(sol$pi_lc) + sum(1-currI)*log(1-sol$pi_lc)
		log_p_Cts0_window_Its = sum(currI)*log(sol$pi_lc_c) + sum(1-currI)*log(1-sol$pi_lc_c)
		return (c(log_p_Cts1_window_Its,log_p_Cts0_window_Its))
	}
}

perform_gibbs_runs_fast<-function(sol, reps1=1, reps2=100, pr_params, method = 'binary',use.log = F){
	iters = list()
	iters[[1]] = sol
	if (reps2 < 2){return(iters)}
	for (i in 2:(reps2+1)){
		print (paste("########   iteration  ",i,"    ########"))
		currsol = iters[[i-1]]
		# define the new object
		newsol = list()
		newsol$H = currsol$H ; newsol$HS = currsol$HS; newsol$C = currsol$C; newsol$I = currsol$I; newsol$w = currsol$w;
		newsol$method = method;newsol$use.log = use.log
		# sample hyper-parameters		
		newsol$pi_v = sample_pi_v(currsol, pr_params, reps = reps1)
		if (is.na(newsol$pi_v)){print ('newsol$pi_v is na');break}
		newsol$pi_1_1 = sample_pi_1_1(currsol, pr_params, reps = reps1)
		if (is.na(newsol$pi_1_1)){print ('newsol$pi_1_1 is na');break}
		newsol$pi_1_to_1 = sample_pi_1_to_1(currsol, pr_params, reps = reps1)
		if (is.na(newsol$pi_1_to_1)){print ('newsol$pi_1_to_1 is na');break}
		newsol$pi_0_to_1 = sample_pi_0_to_1(currsol, pr_params, reps = reps1)
		if (is.na(newsol$pi_0_to_1)){print ('newsol$pi_0_to_1 is na');break}
		
		if (method == 'binary'){
			newsol$pi_lc = sample_pi_lc(currsol, pr_params, reps = reps1)
			newsol$pi_lc_c = sample_pi_lc_c(currsol, pr_params, reps = reps1)
			#print(newsol$pi_lc);print(newsol$pi_lc_c)
		}
		if (method == 'norm'){
			newsol$norm_bic = sample_norm_bic(currsol, pr_params, reps = reps1)
			newsol$norm_bg = sample_norm_bg(currsol, pr_params, reps = reps1)
			print (newsol$norm_bic);print(newsol$norm_bg)
			if (any(is.na(newsol$norm_bic))){print('within normal dist has na');break}
			if (any(is.na(newsol$norm_bg))){print('within normal dist has na');break}
		}
		newsol$pi_s = currsol$pi_s
		newsol$pi_s = sapply(1:length(currsol$pi_s),sample_pi_s,sol=currsol,pr_params = pr_params,reps = reps1)
		if (any(is.na(newsol$pi_s))){print('newsol$pi_s has nas');break}

		newsol$pi_s_c = sample_pi_s_c(currsol, pr_params, reps = reps1)
		newsol$H = sapply(1:length(newsol$H),function(x,sol,reps){sample_Hv_posterior(sol,x,reps=reps)},sol = newsol, reps = reps1)
		for (l in 1:ncol(newsol$HS)){
			newsol$HS[,l] = sapply(1:nrow(newsol$HS),function(x,sol,l, reps){sample_HS_posterior(sol, x, l, reps = reps)},sol=newsol,l=l, reps=reps1)
		}
		if (any(is.na(newsol$HS))){print('newsol$HS has nas');break}

		newsol$C[,1] = sapply(1:nrow(newsol$C),function(x,sol, reps){sample_C_1_posterior(sol,x,reps=reps)},sol=newsol,reps=reps1)
		for (k in 1:nrow(newsol$C)){
			newsol$C[k,2:ncol(newsol$C)] = sapply(2:ncol(newsol$C),function(x,sol,k,reps){sample_Ct_posterior(sol, x, k, reps = reps)},sol=newsol,k=k,reps=reps1)
		}
		if (any(is.na(newsol$C))){print('newsol$C has nas');break}

		newsol$C_counts = apply(newsol$C,1,analyze_C_row,window_size=currsol$w)
		print (table(c(newsol$C),useNA='always'))
		if (sum(newsol$C)==0 || sum(newsol$HS)==0){print (paste ('Gibbs iteration',i,' got an empty bic'))}
		iters[[i]] = newsol
	}
	return (iters)
}

##################### Implementation of the wrapper ##############

wrapper2_algo <-function(I,initial_algo = 'bimax',debi_thr1 = 0.5,debi_thr2 = 0.8,...){
	mode(I) = 'numeric'
	if ( initial_algo == 'bimax'){
		initial_sol = run_bimax(I,bimax_min_row=10,bimax_min_col = 20,number=2000)
	}
	else{
		initial_sol = run_isa(I,min_row=10,min_col = 10)
	}
	initial_sol_red = initial_sol
	if (length(initial_sol_red$rows)>1){
		initial_sol_red = reduce_overlaps_debi(initial_sol$rows,initial_sol$cols,thr=debi_thr1)
	}
	initial_improved = multisample_biclustering_wrapper(I,initial_sol_red$rows,initial_sol_red$cols,...)
	initial_improved_red = reduce_overlaps_debi(initial_improved[[1]],initial_improved[[2]],thr = debi_thr2)
	initial_improved_red[[3]] = initial_improved[[3]][names(initial_improved_red[[1]])]
	initial_improved_red[[4]] = initial_improved[[4]][names(initial_improved_red[[1]])]
	return (initial_improved_red)
}

multisample_biclustering_wrapper <- function(Z,bic_rows_list,bic_cols_list,model='binary',fchange_thr = 0.5,
		 w=1,gibbs_reps = 50,min_row = 5,min_col = 10,save_iters=T,...){
	
	# reduce overlaps
	reduced_overlaps = reduce_overlaps_debi(bic_rows_list,bic_cols_list,thr=0.5)
	bic_rows_list = reduced_overlaps$rows
	bic_cols_list = reduced_overlaps$cols
	print (paste('number of bics before gibbs',length(bic_rows_list)))

	#if (gibbs_reps <1){
	#	return (list(bic_rows = bic_rows_list , bic_cols = bic_cols_list))
	#}

	mode(Z) = 'numeric'
	bic_rows = list();bic_cols = list();bic_mats = list();bics_params = list()
	for (n in names(bic_cols_list)){
		ptm <- proc.time()
		algo_out = improve_bicluster(Z,bic_rows_list[[n]],bic_cols_list[[n]],model=model,gibbs_reps=gibbs_reps,window_size=w)
		if (model=='binary'){
			curr_pi_lc = mean(sapply(algo_out$iters,function(x){x$pi_lc}))
			curr_pi_lc_c = mean(sapply(algo_out$iters,function(x){x$pi_lc_c}))
			print (paste('curr bic pi_lc: ' , curr_pi_lc,curr_pi_lc_c))
			if (gibbs_reps>0 && (curr_pi_lc / curr_pi_lc_c < fchange_thr || curr_pi_lc < 0.4)) {break}
		}
		if (model=='norm' && gibbs_reps > 0){
			curr_norm_dist = rowMeans(sapply(algo_out$iters,function(x){x$norm_bic}))
			if (curr_norm_dist[1] < fchange_thr){next}
		}
		gibbs_solution = algo_out$gibbs_solution
		if (length(gibbs_solution$bic_rows) < min_row){next}
		if (length(gibbs_solution$bic_cols) < min_col){next}

		print(paste("Found a bicluster, it took (sec): ",proc.time() - ptm)[3])
		print (paste("bicluster size:",length(gibbs_solution$bic_rows),length(gibbs_solution$bic_cols),sep = "   " ))
		if (length(gibbs_solution$bic_rows) < min_row){break}
		if (length(gibbs_solution$bic_cols) < min_col){break}
		bic_rows[[n]] = gibbs_solution$bic_rows
		bic_cols[[n]] = gibbs_solution$bic_cols
		bic_mats[[n]] = gibbs_solution$bic_I
		if (save_iters){bics_params[[n]] = algo_out$iters}
	}
	return (list(bic_rows = bic_rows , bic_cols = bic_cols, bic_mats = bic_mats,bics_params=bics_params))
}


reduce_overlaps_debi<-function(bic_rows_list,bic_cols_list,thr=0.5,max_num_bics=10){
	#print (paste('reducing overlaps, num bics is: ',length(bic_rows_list)))
	#print (class(bic_rows_list))
	if (length(bic_rows_list)==0){return (list(rows=bic_rows_list,cols=bic_cols_list))}
	in_pool = rep(T,length(bic_rows_list))
	bic_size = sapply(bic_rows_list,length) * sapply(bic_cols_list,length)
	kept_bic_rows = list()
	kept_bic_cols = list()
	while (T){
		if (length(kept_bic_rows) == max_num_bics){break}
		if (sum(in_pool) == 0){break}
		# next bic is the largest
		curr_size = max(bic_size[in_pool])
		next_ind = which(in_pool & bic_size == curr_size)[1]
		kept_bic_rows[[as.character(next_ind)]] = bic_rows_list[[next_ind]]
		kept_bic_cols[[as.character(next_ind)]] = bic_cols_list[[next_ind]]
		in_pool[next_ind] = F
		# go over remaining bics and remove from loop overlaps
		for (i in 1:length(in_pool)){
			if (!in_pool[i]){next}
			curr_rows_overlap = intersect(bic_rows_list[[next_ind]],bic_rows_list[[i]])
			curr_cols_overlap = intersect(bic_cols_list[[next_ind]],bic_cols_list[[i]])
			overlap_size = length(curr_rows_overlap) * length(curr_cols_overlap)
			if (overlap_size / bic_size[i] > thr){in_pool[i] = F}
		}
	}
	# if the bics had names then keep them
	if (!is.null(names(bic_rows_list))){
		kept_bic_rows = bic_rows_list[as.numeric(names(kept_bic_rows))]
		kept_bic_cols = bic_cols_list[as.numeric(names(kept_bic_cols))]
	}
	return (list(rows = kept_bic_rows, cols = kept_bic_cols))
}

improve_bicluster<-function(Z,rows,cols,model='binary',window_size=1,start_tr=0,gibbs_reps=50){
	sol = get_initial_solution_I_as_input(Z,rows,cols,window_size=window_size)
	pr_params = get_default_hyper_parameter_priors(ncol(sol$C),nrow(sol$HS),nrow(sol$C))
	initial_sol = get_hyper_parameter_modes_from_prior(sol,pr_params)
	initial_sol$method = model
	iters = perform_gibbs_runs_fast(initial_sol,reps2=gibbs_reps,pr_params=pr_params,method=model)
	gibbs_solution = extract_gibbs_solution(Z,iters,ignore_first = 0,eps2=0.5,eps1=0.5)
	if (model=='binary'){print (table(Z[gibbs_solution$bic_rows,gibbs_solution$bic_cols])/length(Z[gibbs_solution$bic_rows,gibbs_solution$bic_cols]))}
	print (dim(Z[gibbs_solution$bic_rows,gibbs_solution$bic_cols]))
	if (gibbs_reps <=1){gibbs_solution = extract_gibbs_solution(Z,iters[1])}
	return (list(iters = iters,gibbs_solution = gibbs_solution))
}

### helper methods to run extant algorithms
run_bimax <-function(I,number = 1000,bimax_min_row=10,bimax_min_col=20){
	if (!all(I==1 | I==0)){I =  I>= quantile(c(I),0.9)}

	bimax_sol = biclust(I,method=BCBimax(), minr = bimax_min_row, minc = bimax_min_col, number = number) 
	print ('after running bimax')
	bics = bicluster(I,bimax_sol)
	bics = bics[sapply(bics,length)>0]
	if(length(bics)==0){
		print ('bimax could not find an initial solution, trying smaller bics')
		bimax_min_col = bimax_min_col - 2
		while (length(bics)==0 && bimax_min_col >= 6){
			print (paste(bimax_min_row,bimax_min_col,number))
			bimax_sol = biclust(I,method=BCBimax(), minr = bimax_min_row, minc = bimax_min_col, number = number) 
			bics = bicluster(I,bimax_sol)
			bics = bics[sapply(bics,length)>0]
			bimax_min_col = bimax_min_col - 2
		}
	}
	if(length(bics)==0){
		bimax_min_col = 8
		while (length(bics)==0 && bimax_min_col >= 5){
			bimax_min_row = bimax_min_col
			print (paste(bimax_min_row,bimax_min_col,number))
			bimax_sol = biclust(I,method=BCBimax(), minr = bimax_min_row, minc = bimax_min_col, number = number) 
			bics = bicluster(I,bimax_sol)
			bics = bics[sapply(bics,length)>0]
			bimax_min_col = bimax_min_col - 1
		}
	}

	rows = list();cols = list()
	if (length(bics)==0){return (list(rows=rows,cols=cols))}
	for (i in 1:length(bics)){
		rows[[i]] = rownames(bics[[i]]);cols[[i]] = colnames(bics[[i]])
	}
	return (list(rows=rows,cols=cols))
}

run_isa<-function(I,min_row,min_col,direction = c('up','up')){
	isa.result=isa(I,direction = direction,thr.row = c(0.5,1,1.5,2),thr.col = c(0.5,1,1.5,2),no.seeds = 100)
	rows_mat = isa.result$rows
	cols_mat = isa.result$columns
	to_keep = apply(rows_mat>0,2,sum) >= min_row & apply(cols_mat>0,2,sum) >= min_col
	rows_mat = rows_mat[,to_keep]
	cols_mat = cols_mat[,to_keep]
	rows = list(); cols = list()
	if (ncol(rows_mat)==0){return (list(rows=rows,cols=cols))}
	for (j in 1:ncol(rows_mat)){
		rows[[j]] = rownames(I)[rows_mat[,j]>0]
		cols[[j]] = colnames(I)[cols_mat[,j]>0]
	}
	return (list(rows=rows,cols=cols))
}

##################### Implementation of the masker (this is the complete TWIGS algorithm) ##############

# "musbic" = multi-sample biclustering
# initial_method = how to get the initial solution
# ... parameters for the gibbs sampler
one_musbic_step<-function(I,min_row = 5, min_col = 5, gibbs_reps = 50,initial_method='bimax', method = 'binary', initial_method_thr = 1,bimax_min_col=20,...){
	if (initial_method == 'bimax'){
		first_b = get_bicluster_bimax(I,method=method,thr=initial_method_thr,bimax_min_col=bimax_min_col)
	}
	else if (initial_method == 'plaid'){
		first_b = get_bicluster_plaid(I,method=method,thr=initial_method_thr)
	}
	else{
		first_b = get_bicluster_ISA(I,min_row,min_col)
	}
	print (paste('initial solution size is : ',nrow(first_b),ncol(first_b)))
	#print (mode(I))
	#print (I[rownames(first_b),colnames(first_b)])
	if (length(first_b)==0){return (NULL)}
	sol = get_initial_solution_I_as_input(I,rownames(first_b),colnames(first_b),...)
	pr_params = get_default_hyper_parameter_priors(ncol(sol$C),nrow(sol$HS),nrow(sol$C))
	initial_sol = get_hyper_parameter_modes_from_prior(sol,pr_params)
	initial_sol$method = method
	iters = perform_gibbs_runs_fast(initial_sol,reps2=gibbs_reps,pr_params=pr_params,method=method)
	gibbs_solution = extract_gibbs_solution(I,iters,ignore_first = 0,eps2=0.5,eps1=0.5)
	if (method=='binary'){print (table(I[gibbs_solution$bic_rows,gibbs_solution$bic_cols])/length(I[gibbs_solution$bic_rows,gibbs_solution$bic_cols]))}
	print (dim(I[gibbs_solution$bic_rows,gibbs_solution$bic_cols]))
	if (gibbs_reps <=1){gibbs_solution = extract_gibbs_solution(I,iters[1])}
	return (list(iters = iters,gibbs_solution = gibbs_solution))
}

# Assumption: gibbs_reps > 1
one_musbic_step_recursive_normal<-function(I,min_row = 5, min_col = 5, gibbs_reps = 50,initial_method='bimax',initial_method_thr = 1,...){
	# TODO: enforce method='norm'
	initial_sol = one_musbic_step(I,min_row,min_col,gibbs_reps,initial_method,initial_method_thr,...)
	curr_norm_dist = rowMeans(sapply(initial_sol$iters,function(x){x$norm_bic}))
	curr_bg_dist = rowMeans(sapply(initial_sol$iters,function(x){x$norm_bg}))
	# rerun the flow
	nextI = I[initial_sol$gibbs_solution$bic_rows,]
	hist(nextI)
	next_sol = one_musbic_step(nextI,min_row,min_col,gibbs_reps,initial_method,initial_method_thr,...)
	# inspect the solution
	next_norm_dist = rowMeans(sapply(next_sol$iters,function(x){x$norm_bic}))
	next_bg_dist = rowMeans(sapply(next_sol$iters,function(x){x$norm_bg}))
	if (abs(next_bg_dist[1]- next_norm_dist[1])<0.3){return(initial_sol)}
	return (next_sol)
}


run_complete_twigs_algorithm<-function(I, fchange_thr = 0.5 ,min_row = 10,min_col = 10,max_num_bics = 20,gibbs_reps=50,...){
	mode(I) = 'numeric'
	bic_rows = list();bic_cols = list();bic_mats = list();bics_params = list()
	count = 1
	while (T){
		ptm <- proc.time()
		algo_out = one_musbic_step(I,min_row = min_row, min_col = min_col,gibbs_reps=gibbs_reps,...)
		if (is.null(algo_out)){break}
		curr_pi_lc = mean(sapply(algo_out$iters,function(x){x$pi_lc}))
		curr_pi_lc_c = mean(sapply(algo_out$iters,function(x){x$pi_lc_c}))
		print (paste('curr bic pi_lc: ' , curr_pi_lc,curr_pi_lc_c))
		if (gibbs_reps>0 && curr_pi_lc < max(curr_pi_lc_c*fchange_thr,0.5)) {break}
		gibbs_solution = algo_out$gibbs_solution
		print(paste("Found a bicluster, it took (sec): ",proc.time() - ptm)[3])
		print (paste("bicluster size:",length(gibbs_solution$bic_rows),length(gibbs_solution$bic_cols),sep = "   " ))
		if (length(gibbs_solution$bic_rows) < min_row){break}
		if (length(gibbs_solution$bic_cols) < min_col){break}
		bic_rows[[count]] = gibbs_solution$bic_rows
		bic_cols[[count]] = gibbs_solution$bic_cols
		bic_mats[[count]] = gibbs_solution$bic_I
		bics_params[[count]] = algo_out$iters
		# calculate the residual 
		HS = gibbs_solution$HS; C = gibbs_solution$C
		for (i in 1:nrow(C)){
			curr_subj = rownames(C)[i]
			curr_TRs = colnames(I)[grepl(pattern = paste(curr_subj,';',sep=''),colnames(I))]
			curr_TRs = curr_TRs[C[curr_subj,]==1]
			I[HS[,curr_subj]==1,curr_TRs] = 0
		}
		I[bic_rows[[count]],bic_cols[[count]]] = 0
		count = count + 1
		if (count > max_num_bics){break}
	}
	return (list(bic_rows = bic_rows , bic_cols = bic_cols, bic_mats = bic_mats,bics_params=bics_params))
}

run_complete_twigs_algorithm_normal<-function(Z,mean_thr = 0.3 ,min_row = 5,min_col = 10,max_num_bics = 10,gibbs_reps=50,...){
	bic_rows = list();bic_cols = list();bic_mats = list();bics_params = list()
	count = 1
	while (T){
		ptm <- proc.time()
		algo_out = one_musbic_step(Z,min_row = min_row, min_col = min_col, method = 'norm',gibbs_reps=gibbs_reps,...)
		if (is.null(algo_out)){break}
		curr_norm_dist = rowMeans(sapply(algo_out$iters,function(x){x$norm_bic}))
		curr_bg_dist = rowMeans(sapply(algo_out$iters,function(x){x$norm_bg}))
		print (paste(c('curr bic within distribution: ' , curr_norm_dist)))
		if (gibbs_reps>0 && curr_norm_dist[1] < mean_thr) {	break	}
		gibbs_solution = algo_out$gibbs_solution
		print(paste("Found a bicluster, it took (sec): ",proc.time() - ptm)[3])
		print (paste("bicluster size:",length(gibbs_solution$bic_rows),length(gibbs_solution$bic_cols),sep = "   " ))
		if (length(gibbs_solution$bic_rows) < min_row){break}
		if (length(gibbs_solution$bic_cols) < min_col){break}
		bic_rows[[count]] = gibbs_solution$bic_rows
		bic_cols[[count]] = gibbs_solution$bic_cols
		bic_mats[[count]] = gibbs_solution$bic_I
		bics_params[[count]] = algo_out$iters
		# calculate the residual matrix
		HS = gibbs_solution$HS; C = gibbs_solution$C
		for (i in 1:nrow(C)){
			curr_subj = rownames(C)[i]
			curr_TRs = colnames(Z)[grepl(pattern = paste(curr_subj,';',sep=''),colnames(Z))]
			curr_TRs = curr_TRs[C[curr_subj,]==1]
			if (length(curr_TRs) == 0 || sum(HS[,curr_subj]) == 0){next}
			Z[HS[,curr_subj]==1,curr_TRs] = Z[HS[,curr_subj]==1,curr_TRs] - curr_norm_dist[1]
			# alternative normalization, added May 2015
			#currmean = curr_norm_dist[1]
			#currsig = curr_norm_dist[2]
			#currbgsig = curr_bg_dist[2]
			#Z[HS[,curr_subj]==1,curr_TRs] = ((Z[HS[,curr_subj]==1,curr_TRs] - currmean)/currsig)*currbgsig
		}
		count = count + 1
		if (count > max_num_bics){break}
	}
	return (list(bic_rows = bic_rows , bic_cols = bic_cols, bic_mats = bic_mats,bics_params=bics_params))
}

extract_gibbs_solution<-function(I,iters,eps1 = 0.5,eps2=0.5,ignore_first = 0){
	if (length(iters) > ignore_first && ignore_first > 1){iters = iters[-c(1:ignore_first)]}
	H = iters[[1]]$H;	HS = iters[[1]]$HS;C = iters[[1]]$C
	mode(H) = 'numeric';mode(HS) = 'numeric';	mode(C) = 'numeric'
	pi_1_1 = mean(sapply(1:length(iters),function(x,iters){iters[[x]]$pi_1_1},iters=iters))
	pi_1_to_1 = mean(sapply(1:length(iters),function(x,iters){iters[[x]]$pi_1_to_1},iters=iters))
	pi_0_to_1 = mean(sapply(1:length(iters),function(x,iters){iters[[x]]$pi_0_to_1},iters=iters))
	pi_lc = mean(sapply(1:length(iters),function(x,iters){iters[[x]]$pi_lc},iters=iters))
	pi_lc_c = mean(sapply(1:length(iters),function(x,iters){iters[[x]]$pi_lc_c},iters=iters))
	pi_s = rowMeans(sapply(1:length(iters),function(x,iters){iters[[x]]$pi_s},iters=iters))
	pi_s_c = mean(sapply(1:length(iters),function(x,iters){iters[[x]]$pi_s_c},iters=iters))
	if (length(iters)>2){
		n = length(iters)
		for (i in 2:n){
			H = H + iters[[i]]$H
			HS = HS + iters[[i]]$HS
			C = C + iters[[i]]$C
		}
		H = H/n;C = C/n;HS = HS/n
	}
	bic_cols = c()
	bic_rows = c()
	for (i in 1:nrow(C)){
		for (j in 1:ncol(C)){
			if (C[i,j] > eps2){
				bic_cols = c(bic_cols,paste(rownames(C)[i],';',colnames(C)[j],sep=''))
			}
		}
	}
	bic_rows = rownames(I)[(which(H>eps1))]
	bic_I = I[bic_rows,bic_cols]
	HS = HS > eps1; H = H > eps1; C = C > eps2
	return(list(H=H,HS=HS,C=C,bic_rows=bic_rows,bic_cols=bic_cols,bic_I=bic_I,pi_1_1=pi_1_1,pi_1_to_1 = pi_1_to_1,
			pi_0_to_1 = pi_0_to_1, pi_lc=pi_lc, pi_lc_c = pi_lc_c, pi_s=pi_s, pi_s_c = pi_s_c))
}

get_bicluster_bimax<-function(I,bimax_min_row=10,bimax_min_col=10,number = 1000, method = 'binary', thr = 1){
	min_row = bimax_min_row; min_col = bimax_min_col
	if (method != 'binary'){
		I =  I>= max(quantile(c(I),0.9),thr)
		print(paste("bimax thr is:", max(thr,quantile(c(I),0.9))))
	}
	bimax_sol = biclust(I,method=BCBimax(), minr = bimax_min_row, minc = bimax_min_col, number = number) 
	print ('after running bimax')
	bics = bicluster(I,bimax_sol)
	bics = bics[sapply(bics,length)>0]
	if(length(bics)==0){
		print ('bimax could not find an initial solution, trying smaller bics')
		bimax_min_col = bimax_min_col - 2
		while (length(bics)==0 && bimax_min_col >= 6){
			print (paste(bimax_min_row,bimax_min_col,number))
			bimax_sol = biclust(I,method=BCBimax(), minr = bimax_min_row, minc = bimax_min_col, number = number) 
			bics = bicluster(I,bimax_sol)
			bics = bics[sapply(bics,length)>0]
			bimax_min_col = bimax_min_col - 2
		}
	}
	if(length(bics)==0){
		bimax_min_col = 8
		while (length(bics)==0 && bimax_min_col >= 5){
			bimax_min_row = bimax_min_col
			print (paste(bimax_min_row,bimax_min_col,number))
			bimax_sol = biclust(I,method=BCBimax(), minr = bimax_min_row, minc = bimax_min_col, number = number) 
			bics = bicluster(I,bimax_sol)
			bics = bics[sapply(bics,length)>0]
			bimax_min_col = bimax_min_col - 1
		}
	}

	print (length(bics))
	if(length(bics)==0){
		print ('bimax could not find an initial solution, setting a random starting point')
		bimax_first_b = I[sample(rownames(I))[1:min_row],sample(colnames(I))[1:min_col]]
	}
	else{
		bic_sizes = sapply(bics,length)
		bic_num_samples = sapply(bics,function(x){length(unique(sapply(colnames(x),getSubjFromColName)))})
		#print (sapply(bics,function(x){unique(strsplit(colnames(x),split=';')[[1]][1])}))
		plot (bic_sizes,bic_num_samples)
		bic_sizes = sapply(bics,ncol)
		#print (bic_sizes)
		ind = which(bic_sizes == max(bic_sizes))[1]
		#ind = which(bic_num_samples == max(bic_num_samples))[1]
		bimax_first_b = bics[[ind]]
	}
	return (bimax_first_b)
}

getSubjFromColName <- function (x){ strsplit(x,split=';')[[1]][1] }

get_bicluster_ISA<-function(I,min_row,min_col){
	isa.result=isa(I,direction = c('up','up'),thr.row = c(0.5,1,1.5),thr.col = c(0.5,1,1.5),no.seeds = 100)
	curr_rows = isa.result$rows > 0
	curr_cols = isa.result$columns > 0
	if (!is.null(dim(curr_cols)) && length(curr_rows) > 0){
		print (dim(curr_cols));print(dim(curr_rows))
		to_keep = colSums(curr_rows) > min_row & colSums(curr_cols) > min_col
		if (all(!to_keep)){
			curr_rows = NULL; curr_cols = NULL
		}
		else{
			curr_rows = curr_rows[,to_keep]; curr_cols = curr_cols[,to_keep]
			#print (paste(colSums(curr_rows),colSums(curr_cols),sep='...'))
		}
	}
	print (length(curr_rows))
	isa_b = NULL
	if (! is.null(dim(curr_cols))){
		# get the largest bic
		bicluster_sizes = apply(curr_rows,2,sum) * apply(curr_cols,2,sum)
		bicluster_sizes = apply(curr_cols,2,sum)
		ind = which(bicluster_sizes == max(bicluster_sizes))[1]
		isa_b = I[curr_rows[,ind],curr_cols[,ind]]
	}
	if (is.null(dim(curr_cols)) && length(curr_rows) > 0){
		isa_b = I[curr_rows,curr_cols]
	}
	if(length(isa_b)==0){
		print ('isa could not find an initial solution, setting a random starting point')
		isa_b = I[sample(rownames(I))[1:min_row],sample(colnames(I))[1:min_col]]
	}
	return (isa_b)
}