00001 /************************************************************************ 00002 * Verve * 00003 * Copyright (C) 2004-2006 * 00004 * Tyler Streeter tylerstreeter@gmail.com * 00005 * All rights reserved. * 00006 * Web: http://verve-agents.sourceforge.net * 00007 * * 00008 * This library is free software; you can redistribute it and/or * 00009 * modify it under the terms of EITHER: * 00010 * (1) The GNU Lesser General Public License as published by the Free * 00011 * Software Foundation; either version 2.1 of the License, or (at * 00012 * your option) any later version. The text of the GNU Lesser * 00013 * General Public License is included with this library in the * 00014 * file license-LGPL.txt. * 00015 * (2) The BSD-style license that is included with this library in * 00016 * the file license-BSD.txt. * 00017 * * 00018 * This library is distributed in the hope that it will be useful, * 00019 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 00020 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files * 00021 * license-LGPL.txt and license-BSD.txt for more details. * 00022 ************************************************************************/ 00023 00024 #ifndef VERVE_AGENT_H 00025 #define VERVE_AGENT_H 00026 00027 #include "Globals.h" 00028 #include "AgentDescriptor.h" 00029 #include "Observation.h" 00030 00031 namespace verve 00032 { 00033 class Observation; 00034 class PredictiveModel; 00035 class RLModule; 00036 00039 class Agent 00040 { 00041 public: 00047 VERVE_DECL Agent(const AgentDescriptor& desc); 00048 00049 // TODO: VERVE_DECL Agent(const Agent& a); 00050 00051 // TODO: VERVE_DECL Agent(const std::string& filename); 00052 00057 VERVE_DECL virtual ~Agent(); 00058 00062 VERVE_DECL virtual void VERVE_CALL destroy(); 00063 00065 VERVE_DECL virtual void VERVE_CALL resetShortTermMemory(); 00066 00077 VERVE_DECL virtual unsigned int VERVE_CALL update( 00078 real reinforcement, const Observation& obs, real dt); 00079 00081 VERVE_DECL virtual unsigned int VERVE_CALL 00082 getNumDiscreteSensors()const; 00083 00085 VERVE_DECL virtual unsigned int VERVE_CALL 00086 getNumContinuousSensors()const; 00087 00090 VERVE_DECL virtual void VERVE_CALL setETraceTimeConstant( 00091 real timeConstant); 00092 00095 VERVE_DECL virtual void VERVE_CALL setTDDiscountTimeConstant( 00096 real timeConstant); 00097 00105 VERVE_DECL virtual void VERVE_CALL setTDLearningRate( 00106 real valueFunctionTimeConstant, real policyLearningMultiplier); 00107 00113 VERVE_DECL virtual void VERVE_CALL setModelLearningRate( 00114 real timeConstant); 00115 00119 VERVE_DECL virtual void VERVE_CALL setLearningEnabled( 00120 bool enabled); 00121 00124 VERVE_DECL virtual long unsigned int VERVE_CALL getAge()const; 00125 00129 VERVE_DECL virtual std::string VERVE_CALL getAgeString()const; 00130 00132 VERVE_DECL virtual real VERVE_CALL getTDError()const; 00133 00137 VERVE_DECL virtual real VERVE_CALL getModelMSE()const; 00138 00141 VERVE_DECL virtual unsigned int VERVE_CALL getLastPlanLength()const; 00142 00146 VERVE_DECL virtual real VERVE_CALL computeValueEstimation( 00147 const Observation& obs); 00148 00150 VERVE_DECL virtual const AgentDescriptor* VERVE_CALL 00151 getDescriptor()const; 00152 00164 VERVE_DECL virtual void VERVE_CALL saveValueData( 00165 unsigned int continuousResolution, 00166 const std::string& filename = ""); 00167 00174 VERVE_DECL virtual void VERVE_CALL saveStateRBFData( 00175 const std::string& filename = ""); 00176 00179 //virtual void VERVE_CALL internal_load(const std::string& filename); 00180 00185 //virtual void VERVE_CALL save(const std::string& filename = ""); 00186 00187 protected: 00190 void setStepSize(real value); 00191 00196 unsigned int planningSequence(const Observation& predCurrentObs, 00197 real predCurrentReward, real currentUncertainty); 00198 00200 void incrementAge(); 00201 00203 AgentDescriptor mDescriptor; 00204 00206 RLModule* mRLModule; 00207 00209 PredictiveModel* mPredictiveModel; 00210 00214 bool mFirstStep; 00215 00217 unsigned int mActionIndex; 00218 00221 Observation mActualPrevObs; 00222 00225 Observation mPredCurrentObs; 00226 00229 Observation mTempPlanningObs; 00230 00232 bool mLearningEnabled; 00233 00235 real mStepSize; 00236 00238 long unsigned int mAgeHours; 00239 00241 unsigned int mAgeMinutes; 00242 00244 real mAgeSeconds; 00245 00248 unsigned int mLastPlanningSequenceLength; 00249 }; 00250 } 00251 00252 #endif