\begin{thebibliography}{1}

\bibitem{viconvergence}
The convergence of a general value iteration process.
\newblock
  \url{http://jmlr.csail.mit.edu/papers/volume3/szita02a/html/node21.html}.

\bibitem{Auer:2002}
Peter Auer, Nicol\`{o} Cesa-Bianchi, and Paul Fischer.
\newblock Finite-time analysis of the multiarmed bandit problem.
\newblock {\em Machine Learning}, 47(2):235--256, May 2002.

\bibitem{citeulike:2976742}
Sylvain Gelly and David Silver.
\newblock Combining online and offline knowledge in uct.
\newblock In {\em ICML '07: Proceedings of the 24th international conference on
  Machine learning}, pages 273--280, New York, NY, USA, 2007. ACM.

\bibitem{Kocsis06banditbased}
Levente Kocsis and Csaba Szepesvári.
\newblock Bandit based monte-carlo planning.
\newblock In {\em In: ECML-06. Number 4212 in LNCS}, pages 282--293. Springer,
  2006.

\bibitem{ipodnorvig}
Peter Norvig.
\newblock Doing the martin shuffle (with your ipod).
\newblock \url{http://norvig.com/ipod.html}.

\bibitem{sail}
Robert~J. Vanderbei.
\newblock Sailing strategies: An application involving stochastics,
  optimization, and statistics (sos).
\newblock \url{http://www.orfe.princeton.edu/~rvdb/sail/sail.html}.

\end{thebibliography}