/*
 * Author: Andrei Zavada <johnhommer@gmail.com>
 *         building on original work by Thomas Nowotny <tnowotny@ucsd.edu>
 *
 * License: GPL-2+
 *
 * Initial version: 2008-08-02
 *
 * CModel top cycle
 */


#include <iostream>
#include <ctime>
#include <cstdlib>

#include "../libstilton/gcc-builtins.hh"

#include "integrate-rk65.hh"
#include "model.hh"

#include "config.h"


using namespace std;


/*--------------------------------------------------------------------------
  Implementation of a 6-5 Runge-Kutta method with adaptive time step
  mostly taken from the book "The numerical analysis of ordinary differential
  equations - Runge-Kutta and general linear methods" by J.C. Butcher, Wiley,
  Chichester, 1987 and a free adaptation to a 6 order Runge Kutta method
  of an ODE system with additive white noise
--------------------------------------------------------------------------*/

inline namespace {

double __Butchers_a[9][8] = {
	{ },
	{ 1./9 },
	{ .5/9,	.5/9 },
	{ 0.416666666666667e-1,	0., 0.125 },
	{ 1./6, 0., -0.5, 2./3 },
	{ 0.1875e+1, 0., -0.7875e+1, 0.7e+1, -0.5 },
	{ -0.4227272727272727e+1, 0., 0.176995738636364e+2, -0.142883522727273e+2, 0.522017045454545, 0.104403409090909e+1 },
	{ 0.840622673179752e+1, 0., -0.337303717185049e+2, 0.271460231129622e+2, 0.342046929709216, -0.184653767923258e+1, 0.577349465373733 },
	{ 0.128104575163399, 0., 0., -0.108433734939759, 0.669375, -0.146666666666667, 0.284444444444444, 0.173176381998583 },
};


double __Butchers_b[9] = {
	0.567119155354449e-1,
	0.,
	0.,
	0.210909572355356,
	0.141490384615385,
	0.202051282051282,
	0.253186813186813,
	0.843679809736684e-1,
	0.512820512820513e-1
};
} // inline namespace



void
CNRun::CIntegrateRK65::
prepare()
{
	for ( unsigned short i = 0; i < 9; ++i )
		Y[i].resize( model->_var_cnt), F[i].resize( model->_var_cnt);
	y5.resize( model->_var_cnt);

	if ( model->standalone_unit_cnt() > 0 )
		if ( _dt_max > model->_discrete_dt ) {
			_dt_max = model->_discrete_dt;
			if ( model->verbosely > 1 )
				cout << "CIntegrateRK65: Set dt_max to model->discrete_dt: " << _dt_max << endl;
		}
}


void
__attribute__ ((hot))
CNRun::CIntegrateRK65::
cycle()
{
      // omp stuff found inapplicable due to considerable overhead in sys time
      // (thread creation)
	unsigned int i, j, k;

	double	aF;

      // calculate iterative terms rk65_Y[__i] and rk65_F[__i] (to sixth order)
	for ( i = 0; i < 9; ++i ) {
//#pragma omp parallel for schedule(static,model->_var_cnt/2+1) firstprivate(aF,j,i)
		for ( k = 0; k < model->_var_cnt; ++k ) {
			aF = 0.0;
			for ( j = 0; j < i; ++j )
				aF += __Butchers_a[i][j] * F[j][k];
			Y[i][k] = model->V[k] + dt * aF;
		}
	      // see to this vector's dt
		F[i][0] = 1.;

//#pragma omp consider...
		for_model_hosted_neurons (model,N)
			(*N) -> derivative( Y[i], F[i]);
		for_model_hosted_synapses (model,S)
			(*S) -> derivative( Y[i], F[i]);
	}

      // sum up Y[i] and F[i] to build 5th order scheme -> y5
//#pragma omp parallel for private(aF,j)
	for ( k = 0; k < model->_var_cnt; ++k ) {
		aF = 0.0;
		for ( j = 0; j < 8; ++j )
			aF += __Butchers_a[8][j] * F[j][k];
		y5[k] = model->V[k] + dt * aF;
	}

      // sum up Y[i] and F[i] to build 6th order scheme -> W
//#pragma omp parallel for schedule(static,model->_var_cnt/2+1) private(aF,j)
	for ( k = 0; k < model->_var_cnt; ++k ) {
		aF = 0.0;
		for ( j = 0; j < 9; ++j )
			aF += __Butchers_b[j] * F[j][k];
		model->W[k] = model->V[k] + dt * aF;
	}

      // kinkiness in synapses causes dt to rocket
	double	dtx = min( _dt_max, dt * _dt_max_cap);

      // determine minimal necessary new dt to get error < eps based on the
      // difference between results in y5 and W
	double try_eps, delta, try_dt;
      // exclude time (at index 0)
//#pragma omp parallel for private(try_eps,delta,try_dtj)
	for ( k = 1; k < model->_var_cnt; ++k ) {
		try_eps = max (_eps_abs, min (_eps, abs(_eps_rel * model->W[k])));
		delta = abs (model->W[k] - y5[k]);
		if ( delta > DBL_EPSILON * y5[k] ) {
			try_dt = exp( (log(try_eps) - log(delta)) / 6) * dt;
			if ( try_dt < dtx )
				dtx = try_dt;
		}
	}
      // make sure we don't grind to a halt
	if ( dtx < _dt_min )
		dtx = _dt_min;

      // set the new step
	dt = dtx;
}








// -------------- CModel::advance and dependents

volatile sig_atomic_t chris_at_kbd;
void
ctrl_c_handler( int signum)
{
	chris_at_kbd = true;
}


unsigned int
CNRun::CModel::
advance( double dist, double *cpu_time_used_p)
{
	chris_at_kbd = 0;
	signal( SIGINT, ctrl_c_handler);

	if ( unit_list.size() == 0 ) {
		fprintf( stderr, "Model is empty\n");
		return 0;
	}
	if ( _status & CN_MDL_NOTREADY )
		prepare_advance();

	bool	have_hosted_units = (hosted_unit_cnt() > 0),
		have_standalone_units = (standalone_unit_cnt() > 0),
		have_ddtbound_units = (ddtbound_unit_cnt() > 0);

	if ( have_hosted_units && !have_standalone_units && !have_ddtbound_units )
		return _do_advance_on_pure_hosted( dist, cpu_time_used_p);
	if ( !have_hosted_units && have_standalone_units && !have_ddtbound_units )
		return _do_advance_on_pure_standalone( dist, cpu_time_used_p);
	if ( !have_hosted_units && !have_standalone_units && have_ddtbound_units )
		return _do_advance_on_pure_ddtbound( dist, cpu_time_used_p);

	unsigned int retval = _do_advance_on_mixed( dist, cpu_time_used_p);
	signal( SIGINT, SIG_IGN);
	return retval;
}

void
__attribute__ ((hot))
CNRun::CModel::
_setup_schedulers()
{
	regular_periods.clear();
	regular_periods_last_checked.clear();
	if ( units_with_periodic_sources.size() > 0 ) { // determine period(s) at which to wake up reader update loop
		for_all_units_with_periodic_sources (U)
			for ( auto S = (*U) -> sources.begin(); S != (*U)->sources.end(); ++S )
				regular_periods.push_back( (reinterpret_cast<CSourcePeriodic*>(S->source)) -> period);
		regular_periods.unique();
		regular_periods.sort();
		regular_periods_last_checked.resize( regular_periods.size());
	}

	if ( verbosely > 2 && regular_periods.size() > 0 ) {
		printf( "%zd timepoint(s) in scheduler_update_periods: ", regular_periods.size());
		auto I = regular_periods.begin();
		for ( size_t i = 0; i < regular_periods.size()-1; ++i, ++I )
			printf( "%g, ", *I);
		printf( "%g\n\n", regular_periods.back());
	}

      // ensure all schedulers are effective at the beginning, too
	for_all_units_with_periodic_sources (U)
		(*U) -> apprise_from_sources();
}


void
CNRun::CModel::
prepare_advance()
{
	if ( _status & CN_MDL_LOGDT && !_dt_logger ) {
		string	fname = name + ".dt";
		_dt_logger = new ofstream( fname.data());
	}
	if ( _status & CN_MDL_LOGSPIKERS && !_spike_logger ) {
		string	fname = name + ".spikes";
		_spike_logger = new ofstream( fname.data());
	}

	_setup_schedulers();

	if ( !hosted_unit_cnt() )
		_integrator->dt = _discrete_dt;

	if ( ddtbound_unit_cnt() )
		_status |= CN_MDL_HAS_DDTB_UNITS;
	else
		_status &= ~CN_MDL_HAS_DDTB_UNITS;

	_status &= ~CN_MDL_NOTREADY;

	if ( verbosely > 5 )
		fprintf( stderr, "Model prepared\n");
}



// comment concerning for_all_conscious_neurons loop:
// these have no next_time_E or suchlike, have `fixate' implicit herein; also,
// conscious neurons fire irrespective of whatever happens elsewhere in the model, and
// they, logically, have no inputs

#define _DO_ADVANCE_COMMON_INLOOP_BEGIN \
	if ( chris_at_kbd ) {		\
		printf( "\nInterrupted\n");	\
		break;			\
	}								\
	for_all_units_with_contiuous_sources (U)			\
		(*U)->apprise_from_sources();				\
	{								\
		auto I = regular_periods.begin(); \
		auto Ic = regular_periods_last_checked.begin(); \
		for ( ; I != regular_periods.end(); ++I, ++Ic ) \
			if ( unlikely(model_time() >= *I * (*Ic + 1)) ) { \
				(*Ic)++;				\
				for_all_units_with_periodic_sources (U)	\
					(*U)->apprise_from_sources();	\
			}						\
	}								\
	for_all_conscious_neurons (N)					\
	        (*N) -> possibly_fire();		       		\
									\
	for ( auto Yc = mx_syn_list.begin(); Yc != mx_syn_list.end(); ++Yc ) \
		if ( (*Yc)->_source )					\
			(*Yc) -> update_queue();


#define _DO_ADVANCE_COMMON_INLOOP_MID \
	if ( have_listeners ) {						\
		if ( have_discrete_listen_dt ) {			\
			if ( model_time() - last_made_listen >= listen_dt ) { \
				for_all_listening_units (U)		\
					(*U) -> tell();			\
				last_made_listen += listen_dt;		\
			}						\
		} else							\
			for_all_listening_units (U)			\
				(*U) -> tell();				\
	}								\
	if ( unlikely (_status & CN_MDL_LOGDT) )			\
		(*_dt_logger) << model_time() << "\t" << dt() << endl;	\
									\
	for_all_spikelogging_neurons (N) {				\
		(*N) -> do_detect_spike_or_whatever();			\
		if ( !(_status & CN_MDL_DISKLESS) &&			\
		     (*N)->n_spikes_in_last_dt() &&			\
		     _status & CN_MDL_LOGSPIKERS ) {			\
			(*_spike_logger) << model_time() << "\t";	\
			if ( _status & CN_MDL_LOGUSINGID )		\
				(*_spike_logger) << (*N)->_serial_id << endl; \
			else						\
				(*_spike_logger) << (*N)->_label << endl; \
		}							\
	}


#define _DO_ADVANCE_COMMON_INLOOP_END \
	++_cycle;							\
	++steps;							\
	if ( verbosely != 0 ) {						\
		if ( unlikely (((double)(clock() - cpu_time_lastchecked)) / CLOCKS_PER_SEC > 2) ) { \
			cpu_time_lastchecked = clock();			\
			if ( _status & CN_MDL_DISPLAY_PROGRESS_PERCENT && !(_status & CN_MDL_DISPLAY_PROGRESS_TIME) ) \
				fprintf( stderr, "\r\033[%dC%4.1f%%\r", \
					 (verbosely < 0) ? -(verbosely+1)*8 : 0, \
					 100 - (model_time() - time_ending) / (time_started - time_ending) * 100); \
			else if ( _status & CN_MDL_DISPLAY_PROGRESS_TIME && !(_status & CN_MDL_DISPLAY_PROGRESS_PERCENT) ) \
				fprintf( stderr, "\r\033[%dC%'6.0fms\r", \
					 (verbosely < 0) ? -(verbosely+1)*16 : 0, \
					 model_time());			\
			else if ( _status & CN_MDL_DISPLAY_PROGRESS_PERCENT && _status & CN_MDL_DISPLAY_PROGRESS_TIME ) \
				fprintf( stderr, "\r\033[%dC%'6.0fms %4.1f%%\r", \
					 (verbosely < 0) ? -(verbosely+1)*24 : 0, \
					 model_time(),			\
					 100 - (model_time() - time_ending) / (time_started - time_ending) * 100); \
			fflush( stderr);				\
		}							\
	}


#define _DO_ADVANCE_COMMON_EPILOG \
	cpu_time_ended = clock();					\
	double cpu_time_taken_seconds = ((double) (cpu_time_ended - cpu_time_started)) / CLOCKS_PER_SEC; \
	if ( cpu_time_used_p )						\
		*cpu_time_used_p = cpu_time_taken_seconds;		\
	if ( verbosely > 0 || verbosely <= -1 ) {			\
		fprintf( stderr, "\r\033[K");				\
		fflush( stderr);					\
	}								\
	if ( verbosely > 0 )						\
		printf( "@%.1fmsec (+%.1f in %lu cycles in %.2f sec CPU time:" \
			" avg %.3g \302\265s/cyc, ratio to CPU time %.2g)\n\n", \
			model_time(), dist, steps, cpu_time_taken_seconds, \
			model_time()/_cycle * 1e3, model_time() / cpu_time_taken_seconds / 1e3);





unsigned int
__attribute__ ((hot))
CNRun::CModel::
_do_advance_on_pure_hosted( double dist, double *cpu_time_used_p)
{
	bool	have_listeners = (lisn_unit_list.size() > 0),
		have_discrete_listen_dt = (listen_dt > 0.);

	clock_t	cpu_time_started = clock(),
		cpu_time_ended,
		cpu_time_lastchecked = cpu_time_started;

	double	time_started = model_time(),
		time_ending = time_started + dist,
		last_made_listen = time_started;

	unsigned long steps = 0;
	do {
		_DO_ADVANCE_COMMON_INLOOP_BEGIN

		_integrator->cycle();

		_DO_ADVANCE_COMMON_INLOOP_MID

	      // fixate
		_integrator->fixate();

		_DO_ADVANCE_COMMON_INLOOP_END

	      // model_time is advanced implicitly in _integrator->cycle()
	} while ( model_time() < time_ending );

	_DO_ADVANCE_COMMON_EPILOG

	return steps;
}



unsigned int
__attribute__ ((hot))
CNRun::CModel::
_do_advance_on_pure_standalone( double dist, double *cpu_time_used_p)
{
	bool	have_listeners = (lisn_unit_list.size() > 0),
		have_discrete_listen_dt = (listen_dt > 0.);

	clock_t	cpu_time_started = clock(),
		cpu_time_ended,
		cpu_time_lastchecked = cpu_time_started;

	double	time_started = model_time(),
		time_ending = time_started + dist,
		last_made_listen = time_started;

	unsigned long steps = 0;
	do {
		_DO_ADVANCE_COMMON_INLOOP_BEGIN

	      // service simple units w/out any vars on the integration vector V
		for_all_standalone_neurons (N)
			if ( !(*N)->is_conscious() )
				(*N) -> preadvance();
		for_all_standalone_synapses (Y)
			(*Y) -> preadvance();

	      // even in the case of n_hosted_{neurons,units} == 0, we would need _integrator->cycle() to advance V[0],
	      // which is our model_time(); which is kind of expensive, so here's a shortcut
		V[0] += _discrete_dt;
		// _discrete_time += _discrete_dt;  // not necessary

		_DO_ADVANCE_COMMON_INLOOP_MID

	      // fixate
		for_all_standalone_neurons (N)
			if ( !(*N)->is_conscious() )
				(*N) -> fixate();
		for_all_standalone_synapses (Y)
			(*Y) -> fixate();

		_DO_ADVANCE_COMMON_INLOOP_END

	} while ( model_time() < time_ending );

	_DO_ADVANCE_COMMON_EPILOG

	return steps;
}







unsigned int
__attribute__ ((hot))
CNRun::CModel::
_do_advance_on_pure_ddtbound( double dist, double *cpu_time_used_p)
{
	bool	have_listeners = (lisn_unit_list.size() > 0),
		have_discrete_listen_dt = (listen_dt > 0.);

	clock_t	cpu_time_started = clock(),
		cpu_time_ended,
		cpu_time_lastchecked = cpu_time_started;

	double	time_started = model_time(),
		time_ending = time_started + dist,
		last_made_listen = time_started;

	unsigned long steps = 0;
	do {
		_DO_ADVANCE_COMMON_INLOOP_BEGIN

	      // lastly, service units only serviceable at discrete dt
		for_all_ddtbound_neurons (N)
			if ( !(*N)->is_conscious() )
				(*N) -> preadvance();
		for_all_ddtbound_synapses (Y)
			(*Y) -> preadvance();

		V[0] += _discrete_dt;
		_discrete_time += _discrete_dt;

		_DO_ADVANCE_COMMON_INLOOP_MID

	      // fixate
		for_all_ddtbound_neurons (N)
			if ( !(*N)->is_conscious() )
				(*N) -> fixate();
		for_all_ddtbound_synapses (Y)
			(*Y) -> fixate();

		_DO_ADVANCE_COMMON_INLOOP_END

	} while ( model_time() < time_ending );

	_DO_ADVANCE_COMMON_EPILOG

	return steps;
}





unsigned int
__attribute__ ((hot))
CNRun::CModel::
_do_advance_on_mixed( double dist, double *cpu_time_used_p)
{
	bool	have_hosted_units = (hosted_unit_cnt() > 0),
		is_discrete_dt_bound = _status & CN_MDL_HAS_DDTB_UNITS,
		have_listeners = (lisn_unit_list.size() > 0),
		have_discrete_listen_dt = (listen_dt > 0.),
		need_fixate_ddtbound_units;

	clock_t	cpu_time_started = clock(),
		cpu_time_ended,
		cpu_time_lastchecked = cpu_time_started;

	double	time_started = model_time(),
		time_ending = time_started + dist,
		last_made_listen = time_started;

	unsigned long steps = 0;
	do {
		_DO_ADVANCE_COMMON_INLOOP_BEGIN

		_integrator->cycle();

	      // service simple units w/out any vars on the integration vector V
		for_all_standalone_neurons (N)
			if ( !(*N)->is_conscious() )
				(*N) -> preadvance();
		for_all_standalone_synapses (Y)
			(*Y) -> preadvance();

	      // lastly, service units only serviceable at discrete dt
		if ( is_discrete_dt_bound && model_time() >= _discrete_time ) {
			for_all_ddtbound_neurons (N)
				if ( !(*N)->is_conscious() )
					(*N) -> preadvance();
			for_all_ddtbound_synapses (Y)
				(*Y) -> preadvance();

			_discrete_time += _discrete_dt;
			need_fixate_ddtbound_units = true;
		} else
			need_fixate_ddtbound_units = false;

		if ( !have_hosted_units )
			V[0] += _discrete_dt;


		_DO_ADVANCE_COMMON_INLOOP_MID


	      // fixate
		_integrator->fixate();

		for_all_standalone_neurons (N)
			if ( !(*N)->is_conscious() )
				(*N) -> fixate();
		for_all_standalone_synapses (Y)
			(*Y) -> fixate();

		if ( need_fixate_ddtbound_units ) {
			for_all_ddtbound_neurons (N)
				if ( !(*N)->is_conscious() )
					(*N) -> fixate();
			for_all_ddtbound_synapses (Y)
				(*Y) -> fixate();
		}


		_DO_ADVANCE_COMMON_INLOOP_END

	} while ( model_time() < time_ending );

	_DO_ADVANCE_COMMON_EPILOG

	return steps;
}


// eof
