{-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE BlockArguments #-} {-# LANGUAGE TypeSynonymInstances #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FunctionalDependencies #-} {-# LANGUAGE TupleSections #-} {-# LANGUAGE InstanceSigs #-} {-# LANGUAGE StandaloneDeriving #-} {-# LANGUAGE ImportQualifiedPost #-} {-# LANGUAGE UndecidableInstances #-} module Skat.AI.MonteCarlo where import Control.Monad.State import Control.Exception (assert) import Control.Monad.Fail import Data.Ord import Text.Read (readMaybe) import Data.List (maximumBy, minimumBy, sortBy, delete, intercalate) import Debug.Trace import Data.Ratio import Data.Set (Set) import qualified Data.Set as Set import Data.Map (Map) import qualified Data.Map as Map import Data.Bits import Data.Vector (Vector) import qualified Data.Vector as Vector import System.Random (Random) import qualified System.Random as Rand import Text.Printf import Data.List.Split import Skat.AI.Base import qualified Skat as S import qualified Skat.Card as S import qualified Skat.Operations as S import qualified Skat.Pile as S import qualified Skat.Player as S hiding (trumpColour, turnColour) import qualified Skat.Render as S import Skat.Utils --import TestEnvs (env3, shuffledEnv2) type WinCount = Float type SimCount = Int data Tree t s = Leaf s Bool (WinCount, SimCount) | Node s Bool (WinCount, SimCount) [Tree t s] | Pending s t simruns :: Tree t s -> SimCount simruns (Leaf _ _ d) = snd d simruns (Node _ _ d _) = snd d simruns Pending{} = 0 wins :: Tree t s -> WinCount wins (Leaf _ _ d) = fst d wins (Node _ _ d _) = fst d wins Pending{} = 0 childrenwins :: Tree t s -> WinCount childrenwins (Node _ _ _ cs) = sum $ fmap wins cs childrenwins _ = 0 treestate :: Tree t s -> s treestate (Leaf s _ _) = s treestate (Node s _ _ _) = s treestate (Pending s _) = s isterminal :: Tree t s -> Bool isterminal (Leaf _ b _) = b isterminal (Node _ b _ _) = b isterminal Pending{} = False class Draw s where draw :: s -> String instance Draw Int where draw = show indent :: Int -> String -> String indent n s = intercalate ("\n" ++ replicate n ' ') $ splitOn "\n" s visualise :: (HasGameState t p d s, Draw s, Draw t) => Tree t s -> String visualise (Node s _ d children) = printf "[%f/%d]: %s %s:\n%s" (fst d) (snd d) (show . maxing . current $ s) (indent 14 $ draw s) (intercalate "\n" $ fmap f children) where f c = printf "---%s" (indent 3 $ visualise c) visualise (Leaf s _ d) = printf "[%f/%d]: %s" (fst d) (snd d) (indent 9 $ draw s) visualise (Pending s t) = printf "[pend]: %s %s" (indent 9 $ draw s) (indent 9 $ draw t) emptytree :: s -> Tree t s emptytree s = Leaf s False (0, 0) valuation :: Tree t s -> (WinCount, SimCount) valuation (Leaf _ _ d) = d valuation (Node _ _ d _) = d valuation Pending{} = (0,0) deriving instance (Show s, Show t) => Show (Tree t s) class MonadRandom m where random :: Random a => m a chooser :: [a] -> m a instance MonadRandom IO where random = Rand.randomIO chooser os = (os!!) <$> Rand.randomRIO (0, length os -1) instance MonadRandom (State Rand.StdGen) where random = do gen <- get let (a, gen') = Rand.random gen put gen' return a chooser os = do gen <- get let (a, gen') = Rand.randomR (0, length os -1) gen put gen' return (os !! a) {- valuetonum :: (Fractional a, Value v) => v -> a valuetonum v | v == win = 1 | v == loss = 0 | v == tie = 0.5 -} restoint :: (Player p, Value v) => p -> v -> Float restoint p v = tonum $ if maxing p then v else invert v {- updateval :: (Player p, Value d) => p -> [d] -> (WinCount, SimCount) -> (WinCount, SimCount) updateval team xs d = let newSimCount = snd d + fromIntegral (length xs) newWinCount = fst d + sum (fmap (tonum . cvt) xs) cvt = if maxing team then id else invert in (newWinCount, newSimCount) -} class (Player p, Value d) => HasGameState t p d s | s -> d, s -> p, s -> t where moves :: s -> [t] execute :: t -> s -> s monteevaluate :: s -> d current :: s -> p montecarlo :: (Show s, Show t, Eq p, Show d, Monad m, HasGameState t p d s, MonadRandom m) => Tree t s -> m (Tree t s) montecarlo (Pending state turn) = do let currentTeam = current state state' = execute turn state -- objectively get a final score of random playout (independent of perspective) values <- replicateM 1 (montesimulate state') let tr = if maxing (current state') then id else invert vs = fmap (tonum . tr) values n = sum vs / 1 --let v = if maxing (current state') then value else invert value let val = (n, 1) pure $ Leaf state' False val montecarlo (Leaf state terminal d) | terminal || length ms == 0 = pure $ Leaf state True d | otherwise = let children = map (Pending state) ms in pure $ Node state False d children where ms = moves state montecarlo (Node state _ d []) = pure $ Leaf state True d montecarlo n@(Node state True d children) = pure n montecarlo n@(Node state _ d children) | all isterminal children = let d' = reevaluateminmax n in pure $ Node state True d' children | otherwise = do let myruns = snd d cmp c = if isterminal c then -1 else selectcoeff myruns $ valuation c (idx, bestChild) = maximumBy (comparing $ cmp . snd) $ zipWith (,) [0..] children updated <- montecarlo bestChild let cs = updateAt idx children updated newSimRuns = simruns updated - simruns bestChild + snd d diff = wins updated - wins bestChild diff2 = if newSimRuns == snd d then 0 else if current state == current (treestate updated) then diff else 1 - diff newWins = diff2 + fst d --return $ trace ("updating node " ++ show diff2 ++ "\n" ++ show updated ++ "\n" ++ show bestChild) (Node state False (newWins, newSimRuns) cs) return $ Node state False (newWins, newSimRuns) cs montesimulate :: (Monad m, MonadRandom m, HasGameState t p d s, Show d) => s -> m d montesimulate state = case moves state of [] -> pure $ monteevaluate state allowed -> do turn <- chooser allowed montesimulate $ execute turn state runmonte :: Int -> State Rand.StdGen (Tree t s) -> Tree t s runmonte n action = evalState action (Rand.mkStdGen n) {- bestmove :: Tree s -> s bestmove (Leaf s _ _) = s bestmove (Node s _ _ cs) = treestate $ selection (comparing $ rate . valuation) cs where rate (w, s) = w / fromIntegral s mxing = maxing . current $ s selection = if mxing then maximumBy else minimumBy -} bestmove :: Tree t s -> s bestmove (Leaf s _ _) = s bestmove (Node s _ _ cs) = treestate $ maximumBy (comparing $ rate . valuation) cs where rate (w, s) = w / fromIntegral s selectcoeff :: SimCount -> (WinCount, SimCount) -> Float selectcoeff _ (_, 0) = 10000000 selectcoeff t (w, s) = w / fromIntegral s + explorationParam * sqrt (log (fromIntegral t) / fromIntegral s) where explorationParam = sqrt 2 reevaluate :: Tree t s -> (WinCount, SimCount) reevaluate tree | isterminal tree = valuation tree | otherwise = case tree of (Pending{}) -> valuation tree (Leaf{}) -> valuation tree (Node _ _ _ children) -> let total = sum $ fmap simruns children wns = fromIntegral total - sum (fmap wins children) in (wns, total) reevaluateminmax :: HasGameState t p d s => Tree t s -> (WinCount, SimCount) reevaluateminmax tree | isterminal tree = valuation tree | otherwise = case tree of (Pending{}) -> valuation tree (Leaf{}) -> valuation tree (Node state _ _ children) -> let vals = fmap ((\(w, s) -> w / fromIntegral s) . valuation) children -- m = maxing . current $ state childrenMaxing = all (maxing . current . treestate) children selfMaxing = maxing . current $ state newval = if childrenMaxing /= selfMaxing then 1 - maximum vals else maximum vals in (newval, 1) --playCLI :: (MonadFail m, Read t, Choose t m, PlayableGame t l v p m) => m ()