00001 /************************************************************************ 00002 * Verve * 00003 * Copyright (C) 2004-2006 * 00004 * Tyler Streeter tylerstreeter@gmail.com * 00005 * All rights reserved. * 00006 * Web: http://verve-agents.sourceforge.net * 00007 * * 00008 * This library is free software; you can redistribute it and/or * 00009 * modify it under the terms of EITHER: * 00010 * (1) The GNU Lesser General Public License as published by the Free * 00011 * Software Foundation; either version 2.1 of the License, or (at * 00012 * your option) any later version. The text of the GNU Lesser * 00013 * General Public License is included with this library in the * 00014 * file license-LGPL.txt. * 00015 * (2) The BSD-style license that is included with this library in * 00016 * the file license-BSD.txt. * 00017 * * 00018 * This library is distributed in the hope that it will be useful, * 00019 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 00020 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files * 00021 * license-LGPL.txt and license-BSD.txt for more details. * 00022 ************************************************************************/ 00023 00024 #ifndef VERVE_DEFINES_H 00025 #define VERVE_DEFINES_H 00026 00027 #include <iostream> 00028 #include <vector> 00029 #include <assert.h> 00030 #include <math.h> 00031 00032 #include "Platform.h" 00033 #include "Logger.h" 00034 00035 namespace verve 00036 { 00037 #ifdef VERVE_USE_DOUBLE 00038 typedef double real; 00039 #else 00040 typedef float real; 00041 #endif 00042 00043 const real VERVE_E = (real)2.71828182845904523536; 00044 00046 enum AgentArchitecture 00047 { 00049 RL, 00050 00053 MODEL_RL, 00054 00058 CURIOUS_MODEL_RL 00059 }; 00060 00062 namespace defaults 00063 { 00065 const real stepSize = (real)0.1; 00066 00067 // -------------------------------------------------------- 00068 // TIME CONSTANT NOTES 00069 // 00070 // These notes discuss the basics of time constants used in all 00071 // kinds of dynamic systems. The time constant is the amount of 00072 // time it takes for the output to decay to 37% of its initial value. 00073 // 00074 // Here, let 'T' represent a time constant. 00075 // 00076 // System response with zero input: 00077 // x(t) = e^(-t/T) * x(0) 00078 // The discrete time equation is: 00079 // x(t + dt) = e^(-dt/T) * x(t) 00080 // 00081 // System response with a constant input: 00082 // x(t) = e^(-t/T) * x(0) + (1 - e^(-t/T)) * h 00083 // where h is the resting level/equilibrium point/constant input 00084 // value. The discrete time equation is: 00085 // x(t + dt) = e^(-dt/T) * x(t) + (1 - e^(-dt/T) * h 00086 // -------------------------------------------------------- 00087 00088 // OLD... 00089 // Learning rates should be much slower than Neuron firing rate 00090 // decays (i.e. membrane time constants) to ensure a 00091 // steady state condition. See Porr and Worgotter, "Isotropic 00092 // Sequence Order Learning." Also, the average reinforcement 00093 // rate should change much slower than the learning rate. See 00094 // Nathaniel Daw's PhD dissertation. So the relative rate of 00095 // change 00096 // of these parameters is: 00097 // (slowest) 00098 // average reinforcement rate 00099 // learning rate 00100 // change in membrane potential 00101 // (fastest) 00102 00103 // The membrane time constant is a measure of how fast a Neuron's 00104 // membrane potential can change. It ranges from 1-100 ms 00105 // (0.001-0.1 s) in animals, according to Koch, 1998, "Biophysics 00106 // of Computation." Firing rates change at roughly the same rate 00107 // as membrane potentials. 00108 00110 const real eTraceTimeConstant = (real)0.1; 00111 00116 const real TDDiscountTimeConstant = (real)1.0; 00117 00121 const real valueFunctionLearningTimeConstant = (real)0.1; 00122 00125 const real policyLearningMultiplier = 5; 00126 00130 const real modelLearningTimeConstant = (real)0.001; 00131 00135 const real activeETraceThreshold = (real)0.01; 00136 00141 const real minActionSelectionProb = (real)0.0; 00142 00144 const AgentArchitecture agentArchitecture = RL; 00145 00147 const unsigned int maxNumPlanningSteps = 10; 00148 00151 const real planningUncertaintyThreshold = (real)0.1; 00152 } 00153 } 00154 00155 #endif