@article{journals/tsmc/WieringH08, title = {Ensemble Algorithms in Reinforcement Learning.}, author = {Marco A. Wiering and H. van Hasselt}, journal = {IEEE Transactions on Systems, Man, and Cybernetics, Part B}, number = {4}, pages = {930-936}, url = {http://dblp.uni-trier.de/db/journals/tsmc/tsmcb38.html#WieringH08}, volume = {38}, year = {2008}, biburl = {http://www.bibsonomy.org/bibtex/26fd4ed0f2af0ef59271b272062f00519/dblp}, description = {dblp}, ee = {http://dx.doi.org/10.1109/TSMCB.2008.920231}, date = {2008-08-28}, keywords = {dblp } } @inproceedings{wiering:1996:pomdps, title = {Solving {POMDP}s with Levin search and {EIRA}}, address = {Bari, Italy}, author = {Marco Wiering and Juergen Schmidhuber}, booktitle = {Machine Learning: Procceedings of 13th International Conference}, url = {ftp://ftp.idsia.ch//pub//pub/marco/ml_levin_eira.ps.gz}, year = {1996}, biburl = {http://www.bibsonomy.org/bibtex/20e90d8c0013381a054b63a74be2de239/brazovayeye}, notes = {Details to GP list on Wed, 24 Jul 1996 13:57:22 +0200 To appear in Proc. ICML`96, 86 K, 252 K uncompressed. Another spin-off paper of the TR (schmidhuber:1996:spm?) above. It uses ``Levin's universal search through program space (LS)''. LS is theoretically `optimal' for a wide variety of search problems including many partially observable Markov decision problems (POMDPs). Experiments show that LS can solve partially observable mazes (`POMS') involving many more states and obstacles than those solved by various previous authors. An adaptive extension of LS (ALS) is introduced. ALS uses experience to increase probabilities of instructions occurring in successful programs found by LS. To deal with cases where ALS does not lead to long term performance improvement, we use the above-mentioned, novel paradigm (EIRA) to guarantee lifelong histories of reward accelerations. We show: (a) ALS can dramatically reduce the search time consumed by successive calls of LS. (b) Additional significant speedups can be obtained by combining ALS with EIRA.}, size = {9 pages}, keywords = {imported } } @techreport{schmidhuber:1996:spm, title = {Simple Principles of Metalearning}, address = {Corso Elvezia 36, CH-6900, Switzerland}, author = {Juergen Schmidhuber and Jieyu Zhao and Marco Wiering}, institution = {IDSIA, Lugano, Switzerland}, month = {June 27}, number = {IDSIA-69-96}, type = {Technical Report}, url = {ftp://ftp.idsia.ch//pub/juergen/meta.ps.gz}, year = {1996}, biburl = {http://www.bibsonomy.org/bibtex/2c59649b88eefad9f3485db8ccc6c4174/brazovayeye}, abstract = {The goal of metalearning is to generate useful shifts of inductive bias by adapting the current learning strategy in a {"}useful{"} way. Our learner leads a single life during which actions are continually executed according to the system's internal state and current policy (a modifiable, probabilistic algorithm mapping environmental inputs and internal states to outputs and new internal states). An action is considered a learning algorithm if it can modify the policy. Effects of learning processes on later learning processes are measured using reward/time ratios. Occasional backtracking enforces success histories of still valid policy modifications corresponding to histories of lifelong reward accelerations. The principle allows for plugging in a wide variety of learning algorithms. In particular, it allows for embedding the learner's policy modification strategy within the policy itself (self-reference). To demonstrate the principle's feasibility in cases where traditional reinforcement learning fails, we test it in complex, non-Markovian, changing environments ({"}POMDPs{"}). One of the tasks involves more than 10^13 states, two learners that both cooperate and compete, and strongly delayed reinforcement signals (initially separated by more than 300,000 time steps).}, size = {23 pages}, notes = {Details to GP list on Wed, 24 Jul 1996 13:57:22 +0200}, keywords = {metalearning } } @article{Salustowicz:97mlj, title = {Learning Team Strategies: Soccer Case Studies}, author = {R. P. Sa\l{}ustowicz and M. A. Wiering and J. Schmidhuber}, journal = {Machine Learning}, month = {12 November}, number = {2-3}, pages = {263--282}, url = {ftp://ftp.idsia.ch/pub/rafal/soccer.ps.gz}, volume = {33}, year = {1998}, biburl = {http://www.bibsonomy.org/bibtex/27594c44d60c4a6b489feb66dbab6c733/brazovayeye}, abstract = {We use simulated soccer to study multiagent learning. Each team's players (agents) share action set and policy, but may behave differently due to position-dependent inputs. All agents making up a team are rewarded or punished collectively in case of goals. We conduct simulations with varying team sizes, and compare several learning algorithms: TD-Q learning with linear neural networks (TD-Q), Probabilistic Incremental Program Evolution (PIPE), and a PIPE version that learns by coevolution (CO-PIPE). TD-Q is based on learning evaluation functions (EFs) mapping input/action pairs to expected reward. PIPE and CO-PIPE search policy space directly. They use adaptive probability distributions to synthesize programs that calculate action probabilities from current inputs. Our results show that linear TD-Q encounters several difficulties in learning appropriate shared EFs. PIPE and CO-PIPE, however, do not depend on EFs and find good policies faster and more reliably. This suggests that in some multiagent learning scenarios direct search in policy space can offer advantages over EF-based approaches.}, issn = {0885-6125}, notes = { }, keywords = {PIPE, TD-Q coevolution evaluation evolution, functions, incremental learning, multiagent probabilistic program reinforcement soccer, } } @inproceedings{Salustowicz:97icann, title = {On Learning Soccer Strategies}, author = {R. P. Sa\l{}ustowicz and M. A. Wiering and J. Schmidhuber}, booktitle = {Proceedings of the Seventh International Conference on Artificial Neural Networks (ICANN'97)}, editor = {W. Gerstner and A. Germond and M. Hasler and J.-D. Nicoud}, pages = {769--774}, publisher = {Springer-Verlag}, series = {Lecture Notes in Computer Science}, url = {ftp://ftp.idsia.ch/pub/rafal/ICANN_soccer.ps.gz}, volume = {1327}, year = {1997}, biburl = {http://www.bibsonomy.org/bibtex/2a3b38fa6ca54c62b7ecbf630fd5a0406/brazovayeye}, abstract = {We use simulated soccer to study multiagent learning. Each team's players (agents) share action set and policy but may behave differently due to position-dependent inputs. All agents making up a team are rewarded or punished collectively in case of goals. We conduct simulations with varying team sizes, and compare two learning algorithms: TD-Q learning with linear neural networks (TD-Q) and Probabilistic Incremental Program Evolution (PIPE). TD-Q is based on evaluation functions (EFs) mapping input/action pairs to expected reward, while PIPE searches policy space directly. PIPE uses an adaptive probability distribution to synthesize programs that calculate action probabilities from current inputs. Our results show that TD-Q has difficulties to learn appropriate shared EFs. PIPE, however, does not depend on EFs and finds good policies faster and more reliably.}, publisher_address = {Berlin Heidelberg}, size = {7 pages}, notes = { }, keywords = {PIPE } } @inproceedings{Salustowicz:97iconip, title = {Evolving Soccer Strategies}, author = {R. P. Sa\l{}ustowicz and M. A. Wiering and J. Schmidhuber}, booktitle = {Progress in Connectionist-based Information Systems: Proceedings of the Fourth International Conference on Neural Information Processing ICONIP'97}, editor = {N. Kasabov and R. Kozma and K. Ko and R. O'Shea and G. Coghill and T. Gedeon}, pages = {502--505}, publisher = {Springer-Verlag}, url = {ftp://ftp.idsia.ch/pub/rafal/ICONIP_soccer.ps.gz}, volume = {1}, year = {1997}, biburl = {http://www.bibsonomy.org/bibtex/2f7f3a571b64039aff5f7d47deec0a7f5/brazovayeye}, abstract = {We study multiagent learning in a simulated soccer scenario. Players from the same team share a common policy for mapping inputs to actions. They get rewarded or punished collectively in case of goals. For varying team sizes we compare the following learning algorithms: TD-Q learning with linear neural networks (TD-Q-LIN), with a neural gas network (TD-Q-NG), Probabilistic Incremental Program Evolution (PIPE), and a PIPE variant based on coevolution (CO-PIPE). TD-Q-LIN and TD-Q-NG try to learn evaluation functions (EFs) mapping input/action pairs to expected reward. PIPE and CO-PIPE search policy space directly. They use adaptive probability distributions to synthesize programs that calculate action probabilities from current inputs. We find that learning appropriate EFs is hard for both EF-based approaches. Direct search in policy space discovers more reliable policies and is faster.}, publisher_address = {Singapore}, size = {5 pages}, notes = { }, keywords = {PIPE } } @article{wiering:mlj98, title = {Fast Online {Q}(\$\lambda\$)}, author = {Marco Wiering and J{\"u}rgen Schmidhuber}, journal = {Machine Learning}, year = {in press}, biburl = {http://www.bibsonomy.org/bibtex/2ac048d392c14ddac51291e91390b14e5/idsia}, priority = {2}, citeulike-article-id = {2379776}, keywords = {nn } } @article{wiering:ab97, title = {H{Q}-Learning}, author = {Marco Wiering and Juergen Schmidhuber}, journal = {Adaptive Behavior}, number = {2}, pages = {219--246}, volume = {6}, year = {1997}, biburl = {http://www.bibsonomy.org/bibtex/2f112cf84948f435fbd9ed7cf598d71f1/idsia}, priority = {2}, citeulike-article-id = {2380355}, keywords = {inaki } } @inproceedings{wiering96solving, title = {Solving POMDPs using Levin search and EIRA}, author = {M. Wiering and J. Schmidhuber}, booktitle = {Proceedings of the 13th International Conference on Machine Learning (ICML)}, pages = {534--542}, year = {1996}, biburl = {http://www.bibsonomy.org/bibtex/2d818d767af413448cc26258756adbb60/idsia}, priority = {2}, citeulike-article-id = {2380581}, keywords = {evolutionary } } @incollection{Wiering:01book, title = {Model-based reinforcement learning for evolving soccer strategies}, author = {M. A. Wiering and R. P. Sa\l{}ustowicz and J. Schmidhuber}, booktitle = {Computational Intelligence in Games}, editor = {N. Baba and L. C. Jain}, publisher = {Springer Verlag, Berlin}, year = {2001}, biburl = {http://www.bibsonomy.org/bibtex/2f9b26c75f58290b2f5693cc20049ec5f/idsia}, priority = {2}, citeulike-article-id = {2380778}, keywords = {juergen } }