\relax \ifx\hyper@anchor\@undefined \global \let \oldcontentsline\contentsline \gdef \contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}} \global \let \oldnewlabel\newlabel \gdef \newlabel#1#2{\newlabelxx{#1}#2} \gdef \newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}} \AtEndDocument{\let \contentsline\oldcontentsline \let \newlabel\oldnewlabel} \else \global \let \hyper@last\relax \fi \@writefile{toc}{\contentsline {section}{\numberline {1}Introduction}{1}{section.1}} \citation{ipodnorvig} \@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Using an optimal policy $\pi $ to run a simulation of an MDP.}}{2}{figure.1}} \newlabel{mdpsim}{{1}{2}{Using an optimal policy $\pi $ to run a simulation of an MDP}{figure.1}{}} \@writefile{toc}{\contentsline {section}{\numberline {2}iPod example}{2}{section.2}} \@writefile{brf}{\backcite{ipodnorvig}{{2}{2}{section.2}}} \@writefile{toc}{\contentsline {section}{\numberline {3}Value iteration}{3}{section.3}} \citation{viconvergence} \@writefile{brf}{\backcite{viconvergence}{{4}{3}{section.3}}} \@writefile{toc}{\contentsline {subsection}{\numberline {3.1}Value iteration for the iPod example}{4}{subsection.3.1}} \citation{sail} \@writefile{toc}{\contentsline {section}{\numberline {4}Sailing}{5}{section.4}} \@writefile{brf}{\backcite{sail}{{5}{4}{section.4}}} \@writefile{toc}{\contentsline {subsection}{\numberline {4.1}Example of sailing}{6}{subsection.4.1}} \citation{Kocsis06banditbased} \@writefile{toc}{\contentsline {section}{\numberline {5}UCT: Upper Confidence bounds on Trees}{7}{section.5}} \newlabel{uctsec}{{5}{7}{UCT: Upper Confidence bounds on Trees\relax }{section.5}{}} \@writefile{brf}{\backcite{Kocsis06banditbased}{{7}{5}{figure.2}}} \@writefile{toc}{\contentsline {subsection}{\numberline {5.1}UCB: Upper Confidence Bounds}{7}{subsection.5.1}} \citation{Auer:2002} \citation{Auer:2002} \@writefile{brf}{\backcite{Auer:2002}{{8}{5.1}{subsection.5.1}}} \newlabel{thmUCB}{{5.1}{8}{UCB: Upper Confidence Bounds\relax }{theorem.5.1}{}} \@writefile{brf}{\backcite{Auer:2002}{{8}{5.1}{theorem.5.1}}} \citation{Kocsis06banditbased} \@writefile{toc}{\contentsline {subsection}{\numberline {5.2}UCT}{9}{subsection.5.2}} \newlabel{eqnuct}{{1}{9}{UCT\relax }{equation.1}{}} \@writefile{toc}{\contentsline {subsection}{\numberline {5.3}UCT on the sailing problem}{9}{subsection.5.3}} \@writefile{toc}{\contentsline {section}{\numberline {6}What else?}{9}{section.6}} \@writefile{brf}{\backcite{Kocsis06banditbased}{{9}{1}{Hfootnote.1}}} \citation{citeulike:2976742} \@writefile{brf}{\backcite{citeulike:2976742}{{10}{6}{section.6}}} \@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Pseudo-Python for Monte Carlo planning.}}{11}{figure.2}} \newlabel{mcplanning}{{2}{11}{Pseudo-Python for Monte Carlo planning}{figure.2}{}} \@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Comparing best and actual reward using the UCB1 strategy with $10$ machines. See Section\nobreakspace {}\ref {pythonucb} for details on the machines' reward distributions.}}{12}{figure.3}} \newlabel{figucbreward}{{3}{12}{Comparing best and actual reward using the UCB1 strategy with $10$ machines. See Section~\ref {pythonucb} for details on the machines' reward distributions}{figure.3}{}} \@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Regret and upper bound on regret given by Theorem\nobreakspace {}\ref {thmUCB}}}{12}{figure.4}} \newlabel{ucbregret}{{4}{12}{Regret and upper bound on regret given by Theorem~\ref {thmUCB}\relax }{figure.4}{}} \@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces Performance of plain Monte Carlo planning and UCT on the sailing problem.}}{13}{figure.5}} \newlabel{uctsailing}{{5}{13}{Performance of plain Monte Carlo planning and UCT on the sailing problem}{figure.5}{}} \@writefile{toc}{\contentsline {section}{\numberline {7}iPod value iteration}{14}{section.7}} \newlabel{secipodpython}{{7}{14}{iPod value iteration\relax }{section.7}{}} \@writefile{toc}{\contentsline {section}{\numberline {8}Value iteration for sailing}{19}{section.8}} \newlabel{pythonsailing}{{8}{19}{Value iteration for sailing\relax }{section.8}{}} \@writefile{toc}{\contentsline {section}{\numberline {9}UCB}{35}{section.9}} \newlabel{pythonucb}{{9}{35}{UCB\relax }{section.9}{}} \@writefile{toc}{\contentsline {section}{\numberline {10}Sailing MCT}{39}{section.10}} \newlabel{pythonmc}{{10}{39}{Sailing MCT\relax }{section.10}{}} \@writefile{toc}{\contentsline {section}{\numberline {11}UCT}{44}{section.11}} \newlabel{pythonuct}{{11}{44}{UCT\relax }{section.11}{}} \bibstyle{plain} \bibdata{mdp} \bibcite{viconvergence}{1} \bibcite{Auer:2002}{2} \bibcite{citeulike:2976742}{3} \bibcite{Kocsis06banditbased}{4} \bibcite{ipodnorvig}{5} \bibcite{sail}{6}