Verve: Defines.h Source File

00001 /************************************************************************
00002 * Verve                                                                 *
00003 * Copyright (C) 2004-2006                                               *
00004 * Tyler Streeter  tylerstreeter@gmail.com                               *
00005 * All rights reserved.                                                  *
00006 * Web: http://verve-agents.sourceforge.net                              *
00007 *                                                                       *
00008 * This library is free software; you can redistribute it and/or         *
00009 * modify it under the terms of EITHER:                                  *
00010 *   (1) The GNU Lesser General Public License as published by the Free  *
00011 *       Software Foundation; either version 2.1 of the License, or (at  *
00012 *       your option) any later version. The text of the GNU Lesser      *
00013 *       General Public License is included with this library in the     *
00014 *       file license-LGPL.txt.                                          *
00015 *   (2) The BSD-style license that is included with this library in     *
00016 *       the file license-BSD.txt.                                       *
00017 *                                                                       *
00018 * This library is distributed in the hope that it will be useful,       *
00019 * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
00020 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
00021 * license-LGPL.txt and license-BSD.txt for more details.                *
00022 ************************************************************************/
00023 
00024 #ifndef VERVE_DEFINES_H
00025 #define VERVE_DEFINES_H
00026 
00027 #include <iostream>
00028 #include <vector>
00029 #include <assert.h>
00030 #include <math.h>
00031 
00032 #include "Platform.h"
00033 #include "Logger.h"
00034 
00035 namespace verve
00036 {
00037         #ifdef VERVE_USE_DOUBLE
00038                 typedef double real;
00039         #else
00040                 typedef float real;
00041         #endif
00042 
00043         const real VERVE_E = (real)2.71828182845904523536;
00044 
00046         enum AgentArchitecture
00047         {
00049                 RL,
00050 
00053                 MODEL_RL,
00054 
00058                 CURIOUS_MODEL_RL
00059         };
00060 
00062         namespace defaults
00063         {
00065                 const real stepSize = (real)0.1;
00066 
00067                 // --------------------------------------------------------
00068                 // TIME CONSTANT NOTES
00069                 // 
00070                 // These notes discuss the basics of time constants used in all 
00071                 // kinds of dynamic systems.  The time constant is the amount of 
00072                 // time it takes for the output to decay to 37% of its initial value.
00073                 // 
00074                 // Here, let 'T' represent a time constant.
00075                 // 
00076                 // System response with zero input:
00077                 // x(t) = e^(-t/T) * x(0)
00078                 // The discrete time equation is:
00079                 // x(t + dt) = e^(-dt/T) * x(t)
00080                 // 
00081                 // System response with a constant input:
00082                 // x(t) = e^(-t/T) * x(0) + (1 - e^(-t/T)) * h
00083                 // where h is the resting level/equilibrium point/constant input 
00084                 // value.  The discrete time equation is: 
00085                 // x(t + dt) = e^(-dt/T) * x(t) + (1 - e^(-dt/T) * h 
00086                 // --------------------------------------------------------
00087 
00088                 // OLD...
00089                 // Learning rates should be much slower than Neuron firing rate 
00090                 // decays (i.e. membrane time constants) to ensure a 
00091                 // steady state condition.  See Porr and Worgotter, "Isotropic 
00092                 // Sequence Order Learning."  Also, the average reinforcement 
00093                 // rate should change much slower than the learning rate.  See 
00094                 // Nathaniel Daw's PhD dissertation.  So the relative rate of 
00095                 // change 
00096                 // of these parameters is:
00097                 // (slowest)
00098                 // average reinforcement rate
00099                 // learning rate
00100                 // change in membrane potential
00101                 // (fastest)
00102 
00103                 // The membrane time constant is a measure of how fast a Neuron's 
00104                 // membrane potential can change.  It ranges from 1-100 ms 
00105                 // (0.001-0.1 s) in animals, according to Koch, 1998, "Biophysics 
00106                 // of Computation."  Firing rates change at roughly the same rate 
00107                 // as membrane potentials.
00108 
00110                 const real eTraceTimeConstant = (real)0.1;
00111 
00116                 const real TDDiscountTimeConstant = (real)1.0;
00117 
00121                 const real valueFunctionLearningTimeConstant = (real)0.1;
00122 
00125                 const real policyLearningMultiplier = 5;
00126 
00130                 const real modelLearningTimeConstant = (real)0.001;
00131 
00135                 const real activeETraceThreshold = (real)0.01;
00136 
00141                 const real minActionSelectionProb = (real)0.0;
00142 
00144                 const AgentArchitecture agentArchitecture = RL;
00145 
00147                 const unsigned int maxNumPlanningSteps = 10;
00148 
00151                 const real planningUncertaintyThreshold = (real)0.1;
00152         }
00153 }
00154 
00155 #endif