updated optimization gradient descent algorithm

David Mayerich
1 parent 5435830b
Showing 2 changed files with 115 additions and 24 deletions Show diff stats
stim/envi/binary.h
stim/envi/bsq.h
@@ -21,26 +21,31 @@ namespace stim{
 ///		minimizing the dependent parameter bps (bytes per second)
 class stream_optimizer{
 protected:
-	size_t Bps;							//bytes per second for the previous batch
+	size_t Bps[2];							//bytes per second for the previous batch
 	size_t interval_B;					//number of bytes processed this interval
 	size_t interval_ms;					//number of milliseconds spent in the current interval
 	size_t n[2];							//current batch size (in bytes)
+	size_t h;							//spacing used for finite difference calculations
 	size_t dn;							//delta value (in bytes) for setting the batch size (minimum change in batch parameter)
 	size_t maxn;						//maximum value for the batch size
  
+	double alpha;						//alpha value controls the factor of the gradient that is used to calculate the next point (speed of convergence)
+
 	bool sample_step;					//calculating the derivative (this class alternates between calculating dBps and B)
 	bool forward_diff;					//evaluate the derivative using forward differences
  
 	size_t window_ms;					//size of the interval (in milliseconds) integrated to get a reliable bps value
  
 	// This function rounds x to the nearest value within dB
-	size_t round_limit(size_t n0){
-		if(n0 > maxn) n0 = maxn;		//limit the returned size of x to within the specified bounds
-		if(n0 < dn) n0 = dn;
+	size_t round_limit(double n0){
+		if(n0 < 0) return dn;					//if n0 is less than zero, return the lowest possible n
+
+		size_t new_n = (size_t)(n0 + 0.5);		//now n0 must be positive, so round it to the nearest integer
+		if(new_n > maxn) new_n = maxn;			//limit the returned size of x to within the specified bounds
  
-		size_t lowest = n0 / dn;
+		size_t lowest = new_n / dn;
 		size_t highest = lowest + dn;
-		size_t diff[2] = {n0 - lowest, highest - n0};	//calculate the two differences
+		size_t diff[2] = {new_n - lowest, highest - new_n};	//calculate the two differences
 		if(diff[0] < diff[1])
 			return lowest;
 		return highest;
@@ -49,19 +54,79 @@ protected:
 public:
  
 	//constructor initializes a stream optimizer
-	stream_optimizer(size_t current_batch_size, size_t min_batch_size, size_t max_batch_size, size_t window = 1000){
-		Bps = 0;						//initialize to zero bytes per second processed
+	stream_optimizer(size_t min_batch_size, size_t max_batch_size, double a = 0.0001, size_t window = 1000){
+		//Bps = 0;						//initialize to zero bytes per second processed
+		Bps[0] = Bps[1] = 0;			//initialize the bits per second to 0
 		interval_B = 0;					//zero bytes have been processed at initialization
 		interval_ms = 0;				//no time has been spent on the batch so far
 		dn = min_batch_size;			//set the minimum batch size as the minimum change in batch size
 		maxn = max_batch_size;			//set the maximum batch size
-		n[0] = current_batch_size;			//set B
+		n[0] = max_batch_size;			//set B
+		h = (max_batch_size / min_batch_size) / 10 * dn;
+		std::cout<<"h = "<<h<<std::endl;
+		if(h < dn) h = dn;
+		alpha = a;
+		//n[0] = round_limit( (max_batch_size - min_batch_size)/2 );
 		window_ms = window;		//minimum integration interval (for getting a reliable bps measure)
 		sample_step = true;					//the first step is to calculate the derivative
 		forward_diff = true;			//start with the forward difference (since we start at the maximum batch size)
 	}
  
-	// this function updates the optimizer, given the number of bytes processed in an interval and time spent processing
+	size_t update(size_t bytes_processed, size_t ms_spent){
+		interval_B += bytes_processed;		//increment the number of bytes processed
+		interval_ms += ms_spent;			//increment the number of milliseconds spent processing
+
+		//if we have sufficient information to evaluate the optimization function at this point
+		if(interval_ms < window_ms){					//if insufficient time has passed to get a reliable Bps measurement
+			return n[0];
+		}
+		else{											//if we have collected enough information for a reliable Bps estimate
+			size_t new_Bps = interval_B / interval_ms;	//calculate the current Bps
+
+			if(Bps[0] == 0){							//if n[0] hasn't been evaluated yet, this is the first step
+				Bps[0] = new_Bps;						//set the initial Bps value
+				n[1] = n[0] - h;						//set the position of the next sample point
+				std::cout<<"Bps value at n = "<<n[0]<<" is "<<Bps[0]<<" Bps, probing n = "<<n[1]<<std::endl;
+				return n[1];							//return the probe point
+			}
+			else{
+				Bps[1] = new_Bps;						//set the Bps for the current point (n[1])
+
+				double Bps_p;							//allocate a variable for the derivative
+				//calculate the derivative
+				if(n[0] < n[1]){						//if the current point is less than the previous one (probably the most common)
+					Bps_p = ((double)Bps[1] - (double)Bps[0]) / (double)h;		//calculate the derivative using the forward finite difference
+				}
+				else{
+					Bps_p = ((double)Bps[0] - (double)Bps[1]) / (double)h;		//calculate the derivative using the backward finite difference
+				}
+
+				std::cout<<"     probed n = "<<n[1]<<" with "<<Bps[1]<<" Bps, gradient = "<<Bps_p<<" Bps"<<std::endl;
+
+				double new_n_precise = n[0] + alpha * Bps_p;			//calculate the next point (snap to closest integer)
+				size_t new_n_nearest = round_limit(new_n_precise);		//calculate the next point (given batch parameters)
+
+				if(new_n_nearest == n[0]){								//if the newest point is the same as the original point
+					Bps[0] = Bps[1];									//update the Bps
+					//if(n[0] == dn) n[1] = n[0] + h;					//if we're on the left edge, probe forward
+					//else n[1] = n[0] - h;								//otherwise probe backwards
+					std::cout<<"     staying at n = "<<n[0]<<" for now"<<std::endl;
+					//return n[1];										//return the probe point
+
+					Bps[0] = 0;											//reset the Bps for the current point
+					return n[0];										//return the current point for a re-calculation
+				}
+				else{													//if the newest point is different from the original point
+					n[0] = new_n_nearest;								//move to the new point
+					Bps[0] = 0;											//set the Bps to zero (point hasn't been tested)
+					std::cout<<"     moving to n = "<<n[0]<<std::endl;
+					return n[0];										//return the new point
+				}
+			}
+		}
+	}
+
+	/*// this function updates the optimizer, given the number of bytes processed in an interval and time spent processing
 	size_t update(size_t bytes_processed, size_t ms_spent){
 		interval_B += bytes_processed;		//increment the number of bytes processed
 		interval_ms += ms_spent;			//increment the number of milliseconds spent processing
@@ -98,11 +163,33 @@ public:
 		}
 		if(sample_step) return n[0];
 		return n[1];										//insufficient information, keep the same batch size
-	}
+	}*/
+
+	/*size_t update(size_t bytes_processed, size_t ms_spent){
+		interval_B += bytes_processed;		//increment the number of bytes processed
+		interval_ms += ms_spent;			//increment the number of milliseconds spent processing
+
+		//if( Bps[0] == 0 ){				//if the left boundary hasn't been processed
+
+
+		//if we have sufficient information to evaluate the optimization function at this point
+		if(interval_ms >= window_ms){
+			size_t new_Bps = interval_B / interval_ms;	//calculate the current Bps
+
+			if(Bps[0] == 0)							//if the left interval Bps hasn't been calculated
+				Bps[0] = interval_B / interval_ms;	//that is the interval being processed
+			else
+				Bps[1] = interval_B / interval_ms;	//otherwise the right interval is being processed
+
+			if(Bps[0] != 0 && Bps[1] != 0){			//if both intervals have been processed
+
+
+		}
+	}*/
  
 	size_t update(size_t bytes_processed, size_t ms_spent, size_t& data_rate){
 		size_t time = update(bytes_processed, ms_spent);
-		data_rate = Bps;
+		data_rate = Bps[0];
 		return time;
 	}
 };
@@ -392,17 +392,24 @@ public:
 	bool bil(std::string outname, bool PROGRESS = false, bool VERBOSE = false){
  
 		const size_t buffers = 4;													//number of buffers required for this algorithm
+		
 		size_t mem_per_batch = binary<T>::buffer_size / buffers;					//calculate the maximum memory available for a batch
  
 		size_t slice_bytes = X() * Z() * sizeof(T);									//number of bytes in an input batch slice (Y-slice in this case)
 		size_t max_slices_per_batch = mem_per_batch / slice_bytes;					//maximum number of slices we can process in one batch given memory constraints
+
+		if(VERBOSE){
+			std::cout<<"maximum memory available for processing: "<<(double)binary<T>::buffer_size/(double)1000000<<" MB"<<std::endl;
+			std::cout<<"     this supports a batch size of "<<max_slices_per_batch<<" Y-axis slices"<<std::endl;
+		}
+
 		if(max_slices_per_batch == 0){														//if there is insufficient memory for a single slice, throw an error
 			std::cout<<"error, insufficient memory for stim::bsq::bil()"<<std::endl;
 			exit(1);
 		}
 		size_t max_batch_bytes = max_slices_per_batch * slice_bytes;				//calculate the amount of memory that will be allocated for all four buffers
  
-		stream_optimizer O(max_slices_per_batch, 1, max_slices_per_batch);
+		stream_optimizer O(1, max_slices_per_batch);
  
 		T* src[2];																	//source double-buffer for asynchronous batching
 		src[0] = (T*) malloc(max_batch_bytes);
@@ -421,10 +428,6 @@ public:
 		std::future<size_t> rthread;
 		std::future<std::ostream&> wthread;										//create asynchronous threads for reading and writing
  
-		//readlines(src[0], 0, N[0]);												//read the first batch into the 0 source buffer
-		//y_load += N[0];															//increment the loaded slice counter
-		//int b = 1;
-
 		std::chrono::high_resolution_clock::time_point t_start, pt_start;						//high-resolution timers
 		std::chrono::high_resolution_clock::time_point t_end, pt_end;
 		size_t t_batch;																//number of milliseconds to process a batch
@@ -435,15 +438,15 @@ public:
 		size_t data_rate;
  
 		rt_total += readlines(src[0], 0, N[0]);					//read the first batch into the 0 source buffer
-		y_load += N[0];								//increment the loaded slice counter
-		int b = 1;								//initialize the double buffer to 0
+		y_load += N[0];											//increment the loaded slice counter
+		int b = 1;												//initialize the double buffer to 0
 		while(y_proc < Y()){													//while there are still slices to be processed
 			t_start = std::chrono::high_resolution_clock::now();					//start the timer for this batch
 			if(y_load < Y()){													//if there are still slices to be loaded, load them
-				if(y_proc > 0){
-					N[b] = O.update(N[!b] * slice_bytes, t_batch, data_rate);					//set the batch size based on optimization
-					std::cout<<"New N = "<<N[b]<<" at "<<(double)data_rate / 1000000<<" MB/s"<<std::endl;
-				}
+				//if(y_proc > 0){
+					
+					
+				//}
 				if(y_load + N[b] > Y()) N[b] = Y() - y_load;					//if the next batch would process more than the total slices, adjust the batch size
 				rthread = std::async(std::launch::async, &stim::bsq<T>::readlines, this, src[b], y_load, N[b]);
 				rt_total += rthread.get();
@@ -452,7 +455,6 @@ public:
  
 			b = !b;																//swap the double-buffer
 			pt_total += binary<T>::permute(dst[b], src[b], X(), N[b], Z(), 0, 2, 1);		//permute the batch to a BIL file
-			//target.write((char*)dst[b], N[b] * slice_bytes);					//write the permuted data to the output file
 			wt_total += writeblock(&target, dst[b], N[b] * slice_bytes);			//write the permuted data to the output file
 			y_proc += N[b];														//increment the counter of processed pixels
 			if(PROGRESS) progress = (double)( y_proc + 1 ) / Y() * 100;			//increment the progress counter based on the number of processed pixels
@@ -460,6 +462,8 @@ public:
 			t_batch = std::chrono::duration_cast<std::chrono::milliseconds>(t_end-t_start).count();
 			t_total += t_batch;
 			//if(y_load < Y()) rt_total += rthread.get();					//if a new batch was set to load, make sure it loads after calculations
+			N[b] = O.update(N[!b] * slice_bytes, t_batch, data_rate);					//set the batch size based on optimization
+			//std::cout<<"New N = "<<N[!b]<<" selected with "<<(double)data_rate / 1000000<<" MB/s"<<std::endl;
 		}
  
 		free(src[0]);															//free buffer resources