|
- {-# LANGUAGE MultiParamTypeClasses #-}
- {-# LANGUAGE BlockArguments #-}
- {-# LANGUAGE TypeSynonymInstances #-}
- {-# LANGUAGE FlexibleInstances #-}
- {-# LANGUAGE FlexibleContexts #-}
- {-# LANGUAGE FunctionalDependencies #-}
- {-# LANGUAGE TupleSections #-}
- {-# LANGUAGE InstanceSigs #-}
- {-# LANGUAGE StandaloneDeriving #-}
- {-# LANGUAGE ImportQualifiedPost #-}
- {-# LANGUAGE UndecidableInstances #-}
-
- module Skat.AI.MonteCarlo where
-
- import Control.Monad.State
- import Control.Exception (assert)
- import Control.Monad.Fail
- import Data.Ord
- import Text.Read (readMaybe)
- import Data.List (maximumBy, minimumBy, sortBy, delete, intercalate)
- import Debug.Trace
- import Data.Ratio
- import Data.Set (Set)
- import qualified Data.Set as Set
- import Data.Map (Map)
- import qualified Data.Map as Map
- import Data.Bits
- import Data.Vector (Vector)
- import qualified Data.Vector as Vector
- import System.Random (Random)
- import qualified System.Random as Rand
- import Text.Printf
- import Data.List.Split
-
- import Skat.AI.Base
- import qualified Skat as S
- import qualified Skat.Card as S
- import qualified Skat.Operations as S
- import qualified Skat.Pile as S
- import qualified Skat.Player as S hiding (trumpColour, turnColour)
- import qualified Skat.Render as S
- import Skat.Utils
- --import TestEnvs (env3, shuffledEnv2)
-
- type WinCount = Float
- type SimCount = Int
-
- data Tree t s = Leaf s Bool (WinCount, SimCount)
- | Node s Bool (WinCount, SimCount) [Tree t s]
- | Pending s t
-
- simruns :: Tree t s -> SimCount
- simruns (Leaf _ _ d) = snd d
- simruns (Node _ _ d _) = snd d
- simruns Pending{} = 0
-
- wins :: Tree t s -> WinCount
- wins (Leaf _ _ d) = fst d
- wins (Node _ _ d _) = fst d
- wins Pending{} = 0
-
- childrenwins :: Tree t s -> WinCount
- childrenwins (Node _ _ _ cs) = sum $ fmap wins cs
- childrenwins _ = 0
-
- treestate :: Tree t s -> s
- treestate (Leaf s _ _) = s
- treestate (Node s _ _ _) = s
- treestate (Pending s _) = s
-
- isterminal :: Tree t s -> Bool
- isterminal (Leaf _ b _) = b
- isterminal (Node _ b _ _) = b
- isterminal Pending{} = False
-
- class Draw s where
- draw :: s -> String
-
- instance Draw Int where
- draw = show
-
- indent :: Int -> String -> String
- indent n s = intercalate ("\n" ++ replicate n ' ') $ splitOn "\n" s
-
- visualise :: (HasGameState t p d s, Draw s, Draw t) => Tree t s -> String
- visualise (Node s _ d children) = printf "[%f/%d]: %s %s:\n%s" (fst d) (snd d) (show . maxing . current $ s) (indent 14 $ draw s) (intercalate "\n" $ fmap f children)
- where f c = printf "---%s" (indent 3 $ visualise c)
- visualise (Leaf s _ d) = printf "[%f/%d]: %s" (fst d) (snd d) (indent 9 $ draw s)
- visualise (Pending s t) = printf "[pend]: %s %s" (indent 9 $ draw s) (indent 9 $ draw t)
-
- emptytree :: s -> Tree t s
- emptytree s = Leaf s False (0, 0)
-
- valuation :: Tree t s -> (WinCount, SimCount)
- valuation (Leaf _ _ d) = d
- valuation (Node _ _ d _) = d
- valuation Pending{} = (0,0)
-
- deriving instance (Show s, Show t) => Show (Tree t s)
-
- class MonadRandom m where
- random :: Random a => m a
- chooser :: [a] -> m a
-
- instance MonadRandom IO where
- random = Rand.randomIO
- chooser os = (os!!) <$> Rand.randomRIO (0, length os -1)
-
- instance MonadRandom (State Rand.StdGen) where
- random = do
- gen <- get
- let (a, gen') = Rand.random gen
- put gen'
- return a
- chooser os = do
- gen <- get
- let (a, gen') = Rand.randomR (0, length os -1) gen
- put gen'
- return (os !! a)
-
- {-
- valuetonum :: (Fractional a, Value v) => v -> a
- valuetonum v
- | v == win = 1
- | v == loss = 0
- | v == tie = 0.5
- -}
-
- restoint :: (Player p, Value v) => p -> v -> Float
- restoint p v = tonum $ if maxing p then v else invert v
-
- {-
- updateval :: (Player p, Value d) => p -> [d] -> (WinCount, SimCount) -> (WinCount, SimCount)
- updateval team xs d =
- let newSimCount = snd d + fromIntegral (length xs)
- newWinCount = fst d + sum (fmap (tonum . cvt) xs)
- cvt = if maxing team then id else invert
- in (newWinCount, newSimCount)
- -}
-
- class (Player p, Value d) => HasGameState t p d s | s -> d, s -> p, s -> t where
- moves :: s -> [t]
- execute :: t -> s -> s
- monteevaluate :: s -> d
- current :: s -> p
-
- montecarlo :: (Show s, Show t, Eq p, Show d, Monad m, HasGameState t p d s, MonadRandom m)
- => Tree t s
- -> m (Tree t s)
- montecarlo (Pending state turn) = do
- let currentTeam = current state
- state' = execute turn state
- -- objectively get a final score of random playout (independent of perspective)
- values <- replicateM 1 (montesimulate state')
- let tr = if maxing (current state') then id else invert
- vs = fmap (tonum . tr) values
- n = sum vs / 1
- --let v = if maxing (current state') then value else invert value
- let val = (n, 1)
- pure $ Leaf state' False val
- montecarlo (Leaf state terminal d)
- | terminal || length ms == 0 = pure $ Leaf state True d
- | otherwise = let children = map (Pending state) ms in pure $ Node state False d children
- where ms = moves state
- montecarlo (Node state _ d []) = pure $ Leaf state True d
- montecarlo n@(Node state True d children) = pure n
- montecarlo n@(Node state _ d children)
- | all isterminal children =
- let d' = reevaluateminmax n
- in pure $ Node state True d' children
- | otherwise = do
- let myruns = snd d
- cmp c = if isterminal c then -1 else selectcoeff myruns $ valuation c
- (idx, bestChild) =
- maximumBy (comparing $ cmp . snd) $ zipWith (,) [0..] children
- updated <- montecarlo bestChild
- let cs = updateAt idx children updated
- newSimRuns = simruns updated - simruns bestChild + snd d
- diff = wins updated - wins bestChild
- diff2 =
- if newSimRuns == snd d then 0
- else
- if current state == current (treestate updated)
- then diff
- else 1 - diff
- newWins = diff2 + fst d
- --return $ trace ("updating node " ++ show diff2 ++ "\n" ++ show updated ++ "\n" ++ show bestChild) (Node state False (newWins, newSimRuns) cs)
- return $ Node state False (newWins, newSimRuns) cs
-
- montesimulate :: (Monad m, MonadRandom m, HasGameState t p d s, Show d)
- => s
- -> m d
- montesimulate state = case moves state of
- [] -> pure $ monteevaluate state
- allowed -> do
- turn <- chooser allowed
- montesimulate $ execute turn state
-
- runmonte :: Int -> State Rand.StdGen (Tree t s) -> Tree t s
- runmonte n action = evalState action (Rand.mkStdGen n)
-
- {-
- bestmove :: Tree s -> s
- bestmove (Leaf s _ _) = s
- bestmove (Node s _ _ cs) = treestate $ selection (comparing $ rate . valuation) cs
- where rate (w, s) = w / fromIntegral s
- mxing = maxing . current $ s
- selection = if mxing then maximumBy else minimumBy
- -}
- bestmove :: Tree t s -> s
- bestmove (Leaf s _ _) = s
- bestmove (Node s _ _ cs) = treestate $ maximumBy (comparing $ rate . valuation) cs
- where rate (w, s) = w / fromIntegral s
-
- selectcoeff :: SimCount -> (WinCount, SimCount) -> Float
- selectcoeff _ (_, 0) = 10000000
- selectcoeff t (w, s) = w / fromIntegral s + explorationParam * sqrt (log (fromIntegral t) / fromIntegral s)
- where explorationParam = sqrt 2
-
- reevaluate :: Tree t s -> (WinCount, SimCount)
- reevaluate tree
- | isterminal tree = valuation tree
- | otherwise = case tree of
- (Pending{}) -> valuation tree
- (Leaf{}) -> valuation tree
- (Node _ _ _ children) -> let total = sum $ fmap simruns children
- wns = fromIntegral total - sum (fmap wins children)
- in (wns, total)
-
- reevaluateminmax :: HasGameState t p d s => Tree t s -> (WinCount, SimCount)
- reevaluateminmax tree
- | isterminal tree = valuation tree
- | otherwise = case tree of
- (Pending{}) -> valuation tree
- (Leaf{}) -> valuation tree
- (Node state _ _ children) ->
- let vals = fmap ((\(w, s) -> w / fromIntegral s) . valuation) children
- -- m = maxing . current $ state
- childrenMaxing = all (maxing . current . treestate) children
- selfMaxing = maxing . current $ state
- newval = if childrenMaxing /= selfMaxing then 1 - maximum vals else maximum vals
- in (newval, 1)
-
- --playCLI :: (MonadFail m, Read t, Choose t m, PlayableGame t l v p m) => m ()
|