@inproceedings{stoneICRA04, added-at = {2008-02-26T12:05:08.000+0100}, author = {Kohl, Nate and Stone, Peter}, biburl = {http://www.bibsonomy.org/bibtex/28969aae0ed7d0829c17ce7411f2bcdc7/schaul}, booktitle = {Proceedings of the IEEE International Conference on Robotics and Automation}, citeulike-article-id = {2374754}, description = {idsia}, interhash = {facde69024609f5b5688764c07f9fb67}, intrahash = {8969aae0ed7d0829c17ce7411f2bcdc7}, keywords = {daanbib}, month = May, priority = {2}, timestamp = {2008-02-26T12:05:08.000+0100}, title = {Policy Gradient Reinforcement Learning for Fast Quadrupedal Locomotion}, year = 2004 } @article{stoneML05, added-at = {2008-02-26T12:05:08.000+0100}, author = {Whiteson, Shimon and Kohl, Nate and Miikkulainen, Risto and Stone, Peter}, biburl = {http://www.bibsonomy.org/bibtex/28192988ef1ed0e526026b14386579030/schaul}, citeulike-article-id = {2374755}, description = {idsia}, interhash = {3256b5cb6bfa3c05d402ae68bf89f326}, intrahash = {8192988ef1ed0e526026b14386579030}, journal = {Machine Learning}, keywords = {daanbib}, month = May, number = 1, pages = {5--30}, priority = {2}, timestamp = {2008-02-26T12:05:08.000+0100}, title = {Evolving Keepaway Soccer Players through Task Decomposition}, volume = 59, year = 2005 } @inproceedings{ghavamzadehICML03, added-at = {2008-02-26T12:05:08.000+0100}, author = {Ghavamzadeh, M. and Mahadevan, Sridhar}, biburl = {http://www.bibsonomy.org/bibtex/2ba7d96561c76b3ef903c9058197cd1c1/schaul}, booktitle = {Proceedings of the Twentieth Conference on Machine Learning (ICML-2003)}, citeulike-article-id = {2374756}, description = {idsia}, interhash = {dc1bdc231d932302d40ec5c2ebf194ce}, intrahash = {ba7d96561c76b3ef903c9058197cd1c1}, keywords = {daanbib}, priority = {2}, timestamp = {2008-02-26T12:05:08.000+0100}, title = {Hierarchical Policy Gradient Algorithms}, year = 2003 } @article{stonekeepawayAB2005, added-at = {2008-02-26T12:05:08.000+0100}, author = {Stone, Peter and Sutton, Richard S. and Kuhlmann, Gregory}, biburl = {http://www.bibsonomy.org/bibtex/297049a49146280b30b45e0115b08ac7d/schaul}, citeulike-article-id = {2374757}, description = {idsia}, interhash = {33514c099e3393ec266d55faa19bde65}, intrahash = {97049a49146280b30b45e0115b08ac7d}, journal = {Adaptive Behavior}, keywords = {daanbib}, number = 3, pages = {165--188}, priority = {2}, timestamp = {2008-02-26T12:05:08.000+0100}, title = {Reinforcement Learning for {R}obo{C}up-Soccer Keepaway}, volume = 13, year = 2005 } @inproceedings{bakker02reinforcement, added-at = {2008-02-26T12:05:08.000+0100}, author = {Bakker, B.}, biburl = {http://www.bibsonomy.org/bibtex/23e75c517832b18be3e327c098df1adfd/schaul}, booktitle = {Advances in Neural Information Processing Syst., 14}, citeulike-article-id = {2374768}, description = {idsia}, interhash = {b9258af25aceae8a5712c7346d0dcb5f}, intrahash = {3e75c517832b18be3e327c098df1adfd}, keywords = {daanbib}, priority = {2}, timestamp = {2008-02-26T12:05:08.000+0100}, title = {Reinforcement learning with long short-term memory}, url = {citeseer.ist.psu.edu/bakker02reinforcement.html}, year = 2002 } @inproceedings{petersnaturalpg, added-at = {2008-02-26T12:05:08.000+0100}, author = {Peters, J. and Vijayakumar, S. and Schaal, S.}, biburl = {http://www.bibsonomy.org/bibtex/2b16e56cb675bc395c0b868e659f6ed26/schaul}, booktitle = {Proceedings of the 16th European Conference on Machine Learning (ECML 2005)}, citeulike-article-id = {2374769}, description = {idsia}, interhash = {bb1667afc8b0c7c75ee696383d7cf91c}, intrahash = {b16e56cb675bc395c0b868e659f6ed26}, keywords = {daanbib}, pages = {280--291}, priority = {2}, timestamp = {2008-02-26T12:05:08.000+0100}, title = {Natural Actor-Critic}, year = 2005 } @incollection{schraudolph2006smdpg, added-at = {2008-02-26T12:05:08.000+0100}, address = {Cambridge, MA}, author = {Schraudolph, Nicol and Yu, Jin and Aberdeen, Douglas}, biburl = {http://www.bibsonomy.org/bibtex/2aa96bdfd16e9593aec3eab071e445966/schaul}, booktitle = {Advances in Neural Information Processing Systems 18}, citeulike-article-id = {2374770}, description = {idsia}, editor = {Weiss, Y. and Sch\"{o}lkopf, B. and Platt, J.}, interhash = {2db2f3178f868164ae905ca2f27be7bd}, intrahash = {aa96bdfd16e9593aec3eab071e445966}, keywords = {daanbib}, priority = {2}, publisher = {MIT Press}, timestamp = {2008-02-26T12:05:08.000+0100}, title = {Fast Online Policy Gradient Learning with SMD Gain Vector Adaptation}, year = 2006 } @misc{baird99reinforcement, added-at = {2008-02-26T12:05:08.000+0100}, author = {Baird, L.}, biburl = {http://www.bibsonomy.org/bibtex/2255f2c6da1a8995e57d60a697219992e/schaul}, citeulike-article-id = {2374771}, description = {idsia}, interhash = {24e2732d3fa83438db516730e095430b}, intrahash = {255f2c6da1a8995e57d60a697219992e}, keywords = {daanbib}, priority = {2}, timestamp = {2008-02-26T12:05:08.000+0100}, title = {Reinforcement Learning Through Gradient Descent}, url = {citeseer.ifi.unizh.ch/baird99reinforcement.html}, year = 1999 } @article{WilliamsZipser:89nc, added-at = {2008-02-26T12:05:08.000+0100}, author = {Williams, R. J. and Zipser, D.}, biburl = {http://www.bibsonomy.org/bibtex/241406f69681b40d610664492e3dc4d1c/schaul}, citeulike-article-id = {2374775}, description = {idsia}, interhash = {b0a3fc38f35d06d9ec9c135cb7d5798d}, intrahash = {41406f69681b40d610664492e3dc4d1c}, journal = {Neural Computation}, keywords = {daanbib}, number = 2, pages = {270--280}, priority = {2}, timestamp = {2008-02-26T12:05:08.000+0100}, title = {A learning algorithm for continually running fully recurrent networks}, volume = 1, year = 1989 } @article{Schmidhuber:02nc, added-at = {2008-02-26T12:05:08.000+0100}, author = {Schmidhuber, J. and Gers, F. and Eck, D.}, biburl = {http://www.bibsonomy.org/bibtex/22548e25c4316e6d365a1cda13910c7b1/schaul}, citeulike-article-id = {2374776}, description = {idsia}, interhash = {c31b2c52ac93f137d9d5f3d3a82a202e}, intrahash = {2548e25c4316e6d365a1cda13910c7b1}, journal = {Neural Computation}, keywords = {daanbib}, number = 9, pages = {2039--2041}, priority = {2}, timestamp = {2008-02-26T12:05:08.000+0100}, title = {Learning Nonregular Languages: A Comparison of Simple Recurrent Networks and {LSTM}}, volume = 14, year = 2002 } @incollection{Hochreiter:01book, added-at = {2008-02-26T12:05:08.000+0100}, author = {Hochreiter, S. and Bengio, Y. and Frasconi, P. and Schmidhuber, J.}, biburl = {http://www.bibsonomy.org/bibtex/279df6721c014a00bfac62abd7d5a9968/schaul}, booktitle = {A Field Guide to Dynamical Recurrent Neural Networks}, citeulike-article-id = {2374777}, description = {idsia}, editor = {Kremer, S. C. and Kolen, J. F.}, interhash = {485c1bd6a99186c9414c6b9ddaed42c9}, intrahash = {79df6721c014a00bfac62abd7d5a9968}, keywords = {daanbib}, priority = {2}, publisher = {IEEE Press}, timestamp = {2008-02-26T12:05:08.000+0100}, title = {Gradient flow in recurrent nets: the difficulty of learning long-term dependencies}, year = 2001 } @misc{Schmidhuber:04rnn, added-at = {2008-02-26T12:05:08.000+0100}, author = {Schmidhuber, J.}, biburl = {http://www.bibsonomy.org/bibtex/2825192e2b52551362862b4b20fbbea1c/schaul}, citeulike-article-id = {2374778}, comment = {http://www.idsia.ch/\~{ }juergen/rnn.html}, description = {idsia}, interhash = {ad9f85c1f11fb92526a0478f690e80a2}, intrahash = {825192e2b52551362862b4b20fbbea1c}, keywords = {daanbib}, priority = {2}, timestamp = {2008-02-26T12:05:08.000+0100}, title = {{RNN} overview}, year = 2004 } @misc{gullapalli92reinforcement, added-at = {2008-02-26T12:05:08.000+0100}, author = {Gullapalli, V.}, biburl = {http://www.bibsonomy.org/bibtex/2308dfb505e23a977417d69d6a94d1cad/schaul}, citeulike-article-id = {2374779}, description = {idsia}, interhash = {900aff0497fbc5ab09988948fef7ba7a}, intrahash = {308dfb505e23a977417d69d6a94d1cad}, keywords = {daanbib}, priority = {2}, timestamp = {2008-02-26T12:05:08.000+0100}, title = {Reinforcement learning and its application to control}, url = {citeseer.comp.nus.edu.sg/gullapalli92reinforcement.html}, year = 1992 } @inproceedings{schwefelnozzle, added-at = {2008-02-26T12:05:08.000+0100}, author = {Klockgether, J. and Schwefel, H. P.}, biburl = {http://www.bibsonomy.org/bibtex/2b03f900e582e764963efe48e4d76f0bc/schaul}, booktitle = {Proc. 11th Symp. Engineering Aspects of Magnetohydrodynamics}, citeulike-article-id = {2374790}, description = {idsia}, interhash = {8160a65c912a14027a72e2e025bdc095}, intrahash = {b03f900e582e764963efe48e4d76f0bc}, keywords = {daanbib}, pages = {141--148}, priority = {2}, timestamp = {2008-02-26T12:05:08.000+0100}, title = {Two-phase nozzle and hollow core jet experiments}, year = 1970 } @inproceedings{spallspsa, added-at = {2008-02-26T12:05:08.000+0100}, address = {New York, NY, USA}, author = {Spall, James C.}, biburl = {http://www.bibsonomy.org/bibtex/20a13668f6995399e22b001c9eeba0b7a/schaul}, booktitle = {WSC '99: Proceedings of the 31st conference on Winter simulation}, citeulike-article-id = {2374791}, description = {idsia}, doi = {http://doi.acm.org/10.1145/324138.324170}, interhash = {9f84438c34406fda3a2f79cdf51515bf}, intrahash = {0a13668f6995399e22b001c9eeba0b7a}, keywords = {daanbib}, pages = {101--109}, priority = {2}, publisher = {ACM}, timestamp = {2008-02-26T12:05:08.000+0100}, title = {Stochastic optimization and the simultaneous perturbation method}, year = 1999 } @book{swarmintelligence, added-at = {2008-02-26T12:05:08.000+0100}, address = {San Francisco, CA, USA}, author = {Kennedy, James and Eberhart, Russell C.}, biburl = {http://www.bibsonomy.org/bibtex/299396210934937d279f29a0e10f9ee8f/schaul}, citeulike-article-id = {2374792}, description = {idsia}, interhash = {06b86dd986fc1ea6acfa53c8220f5460}, intrahash = {99396210934937d279f29a0e10f9ee8f}, keywords = {daanbib}, priority = {2}, publisher = {Morgan Kaufmann Publishers Inc.}, timestamp = {2008-02-26T12:05:08.000+0100}, title = {Swarm intelligence}, year = 2001 } @article{kirkpatrick83optimization, added-at = {2008-02-26T12:05:08.000+0100}, author = {Kirkpatrick, S. and Gelatt, C. D. and Vecchi, M. P.}, biburl = {http://www.bibsonomy.org/bibtex/2bf34d745d798e80585c4590e621fcd37/schaul}, citeulike-article-id = {2374793}, description = {idsia}, interhash = {b1a283999e322021aa344033c34e6615}, intrahash = {bf34d745d798e80585c4590e621fcd37}, journal = {Science, Number 4598, 13 May 1983}, keywords = {daanbib}, pages = {671--680}, priority = {2}, timestamp = {2008-02-26T12:05:08.000+0100}, title = {Optimization by Simulated Annealing}, volume = {220, 4598}, year = 1983 } @article{amari98natural, added-at = {2008-02-26T12:05:08.000+0100}, author = {Amari, S.}, biburl = {http://www.bibsonomy.org/bibtex/204cab5fc779db34f662a2ae0f25e96ad/schaul}, citeulike-article-id = {2374794}, description = {idsia}, interhash = {a5cad2a0bad7028a732ae79e9fa6a4b2}, intrahash = {04cab5fc779db34f662a2ae0f25e96ad}, journal = {Neural Computation}, keywords = {daanbib}, number = 2, pages = {251--276}, priority = {2}, timestamp = {2008-02-26T12:05:08.000+0100}, title = {Natural Gradient Works Efficiently in Learning}, volume = 10, year = 1998 } @inproceedings{verenaESANN, added-at = {2008-02-26T12:05:08.000+0100}, author = {Meisner, Verena H. and Igel, Christian}, biburl = {http://www.bibsonomy.org/bibtex/2143b3278d469900d7b42be66c0611739/schaul}, booktitle = {To appear in: 16th European Symposium on Artificial Neural Networks (ESANN)}, citeulike-article-id = {2374798}, description = {idsia}, interhash = {e2a47ae370ff15998a32fce1dc97f442}, intrahash = {143b3278d469900d7b42be66c0611739}, keywords = {daanbib}, priority = {2}, timestamp = {2008-02-26T12:05:08.000+0100}, title = {Similarities and differences between policy gradient methods and evolution strategies}, year = 2008 } @inproceedings{crossentropy1, added-at = {2008-02-26T12:05:08.000+0100}, author = {Boer, P. De and Kroese and Mannor, S. and Rubinstein, R. Y.}, biburl = {http://www.bibsonomy.org/bibtex/26ed054d7babe82af8c357947ba30f660/schaul}, booktitle = {Annals of Operations Research}, citeulike-article-id = {2374799}, description = {idsia}, interhash = {58f40c83f41d8d982be79ac9d46de19b}, intrahash = {6ed054d7babe82af8c357947ba30f660}, keywords = {daanbib}, number = 1, pages = {19--67}, priority = {2}, timestamp = {2008-02-26T12:05:08.000+0100}, title = {A Tutorial on the Cross-Entropy Method}, volume = 134, year = 2004 }