MDS
system:sage

{{{id=0|
"""
A Markov Decision Process (MDP) for the iPod problem 
described at http://norvig.com/ipod.html

Carlo Hamalainen <carlo.hamalainen@gmail.com>
"""

import random

def value_iteration(N, T, target):
    # The possible actions from any state:
    actions = ['sequential', 'shuffle']

    # Transition probabilities:
    # transitions[s, a, w] = probability of moving from s to w by action a.

    transitions = {}

    # Sequential strategy gets us to the target directly.
    for s in range(N):
        for w in range(N):
            if w == target:
                transitions[s, 'sequential', w] = 1.0
            else:
                transitions[s, 'sequential', w] = 0.0

    # The Shuffle strategy takes us to any state with equal
    # probability.
    for s in range(N):
        for w in range(N):
            transitions[s, 'shuffle', w] = 1.0/N

    # Cost of each action from any state s.
    cost = {}
    for s in range(N): cost[s, 'sequential'] = abs(target - s)
    for s in range(N): cost[s, 'shuffle'] = T

    epsilon = 0.001

    V1 = [0] * N
    V2 = [1] * N
    policy = [0] * N

    while max([abs(V1[i] - V2[i]) for i in range(N)]) > epsilon:
        for s in range(N):
            min_action = actions[0]
            min_action_cost = cost[s, actions[0]] \
                + sum([transitions[s, actions[0], w]*V1[w] for w in range(N)])

            for a in actions:
                this_cost = cost[s, a] + sum([transitions[s, a, w]*V1[w] \
                    for w in range(N)])

                if this_cost < min_action_cost:
                    min_action = a
                    min_action_cost = this_cost

            V2[s] = min_action_cost
            policy[s] = min_action

        V1, V2 = V2, V1   # swapsies


        #print V1

    #print V1
    #print policy
    #print

    return V1, policy

def simulate(N, T, initial_state, target, V, policy):
    s = initial_state
    total_cost = 0

    while s != target:
        if policy[s] == 'sequential':
            total_cost += abs(target - s)
            s = target
        else: # policy[s] == 'shuffle'
            total_cost += T
            s = random.randint(0, N - 1)

    return total_cost

random.seed(0)

# number of songs on the iPod:
N = 250

# The target state:
target = N/2

plots = []

for T in [1, 5, 10]:
    V, policy = value_iteration(N, T, target)

    p = 1 + target - min([w for w in range(N) if V[w] == abs(w - target)])

    print "shuffle if %d or more away" % p

    assert policy[target - p] == 'shuffle'
    assert policy[target + p] == 'shuffle'

    assert policy[target - p + 1] == 'sequential'
    assert policy[target + p - 1] == 'sequential'

    nr_iters = 10
    
    V_simulated = [0] * N
    for s in range(N):
        V_simulated[s] = float(sum([simulate(N, T, s, target, V, policy) \
            for _ in range(nr_iters)]))/float(nr_iters)
    
    diffs = [V[s] - V_simulated[s] for s in range(N)]
    plots.append(scatter_plot(zip(range(N), diffs)))
///

shuffle if 16 or more away
shuffle if 36 or more away
shuffle if 50 or more away
}}}

{{{id=7|
plots[0]
///

<html><font color='black'><img src='cell://sage0.png'></font></html>
}}}

{{{id=9|
plots[1]
///

<html><font color='black'><img src='cell://sage0.png'></font></html>
}}}

{{{id=10|
plots[2]
///

<html><font color='black'><img src='cell://sage0.png'></font></html>
}}}

{{{id=5|
plots
///

[<html><font color='black'><img src='cell://sage0.png'></font></html>
, <html><font color='black'><img src='cell://sage1.png'></font></html>
, <html><font color='black'><img src='cell://sage2.png'></font></html>
]
}}}

{{{id=6|

///
}}}