christian
/
skat


			
				
					
						
						
							
							{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE BlockArguments #-}
{-# LANGUAGE TypeSynonymInstances #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE InstanceSigs #-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE ImportQualifiedPost #-}
{-# LANGUAGE UndecidableInstances #-}

module Skat.AI.MonteCarlo where

import Control.Monad.State
import Control.Exception (assert)
import Control.Monad.Fail
import Data.Ord
import Text.Read (readMaybe)
import Data.List (maximumBy, minimumBy, sortBy, delete, intercalate)
import Debug.Trace
import Data.Ratio
import Data.Set (Set)
import qualified Data.Set as Set
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Bits
import Data.Vector (Vector)
import qualified Data.Vector as Vector
import System.Random (Random)
import qualified System.Random as Rand
import Text.Printf
import Data.List.Split

import Skat.AI.Base
import qualified Skat as S
import qualified Skat.Card as S
import qualified Skat.Operations as S
import qualified Skat.Pile as S
import qualified Skat.Player as S hiding (trumpColour, turnColour)
import qualified Skat.Render as S
import Skat.Utils
--import TestEnvs (env3, shuffledEnv2)

type WinCount = Float
type SimCount = Int

data Tree t s = Leaf    s Bool (WinCount, SimCount)
              | Node    s Bool (WinCount, SimCount) [Tree t s]
              | Pending s t

simruns :: Tree t s -> SimCount
simruns (Leaf _ _ d)   = snd d
simruns (Node _ _ d _) = snd d
simruns Pending{}      = 0

wins :: Tree t s -> WinCount
wins (Leaf _ _ d)   = fst d
wins (Node _ _ d _) = fst d
wins Pending{}      = 0

childrenwins :: Tree t s -> WinCount
childrenwins (Node _ _ _ cs) = sum $ fmap wins cs
childrenwins _               = 0

treestate :: Tree t s -> s
treestate (Leaf s _ _) = s
treestate (Node s _ _ _) = s
treestate (Pending s _) = s

isterminal :: Tree t s -> Bool
isterminal (Leaf _ b _)   = b
isterminal (Node _ b _ _) = b
isterminal Pending{}      = False

class Draw s where
    draw :: s -> String

instance Draw Int where
    draw = show

indent :: Int -> String -> String
indent n s = intercalate ("\n" ++ replicate n ' ') $ splitOn "\n" s

visualise :: (HasGameState t p d s, Draw s, Draw t) => Tree t s -> String
visualise (Node s _ d children) = printf "[%f/%d]: %s %s:\n%s" (fst d) (snd d) (show . maxing . current $ s) (indent 14 $ draw s) (intercalate "\n" $ fmap f children)
    where f c = printf "---%s" (indent 3 $ visualise c)
visualise (Leaf s _ d) = printf "[%f/%d]: %s" (fst d) (snd d) (indent 9 $ draw s)
visualise (Pending s t) = printf "[pend]: %s %s" (indent 9 $ draw s) (indent 9 $ draw t)

emptytree :: s -> Tree t s
emptytree s = Leaf s False (0, 0)

valuation :: Tree t s -> (WinCount, SimCount)
valuation (Leaf _ _ d)   = d
valuation (Node _ _ d _) = d
valuation Pending{}      = (0,0)

deriving instance (Show s, Show t) => Show (Tree t s)

class MonadRandom m where
    random :: Random a => m a
    chooser :: [a] -> m a

instance MonadRandom IO where
    random = Rand.randomIO
    chooser os = (os!!) <$> Rand.randomRIO (0, length os -1)

instance MonadRandom (State Rand.StdGen) where
    random = do
        gen <- get
        let (a, gen') = Rand.random gen
        put gen'
        return a
    chooser os = do
        gen <- get
        let (a, gen') = Rand.randomR (0, length os -1) gen
        put gen'
        return (os !! a)

{-
valuetonum  :: (Fractional a, Value v) => v -> a
valuetonum v
    | v == win  = 1
    | v == loss = 0
    | v == tie  = 0.5
-}

restoint :: (Player p, Value v) => p -> v -> Float
restoint p v = tonum $ if maxing p then v else invert v

{-
updateval :: (Player p, Value d) => p -> [d] -> (WinCount, SimCount) -> (WinCount, SimCount)
updateval team xs d =
    let newSimCount = snd d + fromIntegral (length xs)
        newWinCount = fst d + sum (fmap (tonum . cvt) xs)
        cvt = if maxing team then id else invert
    in (newWinCount, newSimCount)
-}

class (Player p, Value d) => HasGameState t p d s | s -> d, s -> p, s -> t where
    moves :: s -> [t]
    execute :: t -> s -> s
    monteevaluate :: s -> d
    current :: s -> p

montecarlo :: (Show s, Show t, Eq p, Show d, Monad m, HasGameState t p d s, MonadRandom m)
           => Tree t s
           -> m (Tree t s)
montecarlo (Pending state turn) = do
    let currentTeam = current state
        state' = execute turn state
    -- objectively get a final score of random playout (independent of perspective)
    values <- replicateM 1 (montesimulate state')
    let tr = if maxing (current state') then id else invert
        vs = fmap (tonum . tr) values
        n = sum vs / 1
    --let v = if maxing (current state') then value else invert value
    let val = (n, 1)
    pure $ Leaf state' False val
montecarlo (Leaf state terminal d)
   | terminal || length ms == 0 = pure $ Leaf state True d
   | otherwise = let children = map (Pending state) ms in pure $ Node state False d children
  where ms = moves state
montecarlo (Node state _ d []) = pure $ Leaf state True d
montecarlo n@(Node state True d children) = pure n
montecarlo n@(Node state _ d children)
  | all isterminal children =
        let d' = reevaluateminmax n
        in pure $ Node state True d' children
  | otherwise = do
        let myruns = snd d
            cmp c = if isterminal c then -1 else selectcoeff myruns $ valuation c
            (idx, bestChild) =
                maximumBy (comparing $ cmp . snd) $ zipWith (,) [0..] children
        updated <- montecarlo bestChild
        let cs = updateAt idx children updated
            newSimRuns = simruns updated - simruns bestChild + snd d
            diff = wins updated - wins bestChild
            diff2 = 
                if newSimRuns == snd d then 0
                else
                    if current state == current (treestate updated)
                            then diff
                            else 1 - diff
            newWins = diff2 + fst d
        --return $ trace ("updating node " ++ show diff2 ++ "\n" ++ show updated ++ "\n" ++ show bestChild) (Node state False (newWins, newSimRuns) cs)
        return $ Node state False (newWins, newSimRuns) cs

montesimulate :: (Monad m, MonadRandom m, HasGameState t p d s, Show d)
              => s
              -> m d
montesimulate state = case moves state of
    [] -> pure $ monteevaluate state
    allowed -> do
        turn <- chooser allowed
        montesimulate $ execute turn state

runmonte :: Int -> State Rand.StdGen (Tree t s) -> Tree t s
runmonte n action = evalState action (Rand.mkStdGen n)

{-
bestmove :: Tree s -> s
bestmove (Leaf s _ _) = s
bestmove (Node s _ _ cs) = treestate $ selection (comparing $ rate . valuation) cs
    where rate (w, s) = w / fromIntegral s
          mxing = maxing . current $ s
          selection = if mxing then maximumBy else minimumBy
-}
bestmove :: Tree t s -> s
bestmove (Leaf s _ _) = s
bestmove (Node s _ _ cs) = treestate $ maximumBy (comparing $ rate . valuation) cs
    where rate (w, s) = w / fromIntegral s

selectcoeff :: SimCount -> (WinCount, SimCount) -> Float
selectcoeff _ (_, 0) = 10000000
selectcoeff t (w, s) = w / fromIntegral s + explorationParam * sqrt (log (fromIntegral t) / fromIntegral s)
    where explorationParam = sqrt 2

reevaluate :: Tree t s -> (WinCount, SimCount)
reevaluate tree
    | isterminal tree = valuation tree
    | otherwise = case tree of
        (Pending{}) -> valuation tree
        (Leaf{}) -> valuation tree
        (Node _ _ _ children) -> let total = sum $ fmap simruns children
                                     wns = fromIntegral total - sum (fmap wins children)
                                 in (wns, total)

reevaluateminmax :: HasGameState t p d s => Tree t s -> (WinCount, SimCount)
reevaluateminmax tree
    | isterminal tree = valuation tree
    | otherwise = case tree of
        (Pending{}) -> valuation tree
        (Leaf{}) -> valuation tree
        (Node state _ _ children) ->
            let vals = fmap ((\(w, s) -> w / fromIntegral s) . valuation) children
        --        m = maxing . current $ state
                childrenMaxing = all (maxing . current . treestate) children
                selfMaxing = maxing . current $ state
                newval = if childrenMaxing /= selfMaxing then 1 - maximum vals else maximum vals
            in (newval, 1)

--playCLI :: (MonadFail m, Read t, Choose t m, PlayableGame t l v p m) => m ()