Skat Engine und AI auf Haskell Basis
Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

245 linhas
8.3KB

  1. {-# LANGUAGE MultiParamTypeClasses #-}
  2. {-# LANGUAGE BlockArguments #-}
  3. {-# LANGUAGE TypeSynonymInstances #-}
  4. {-# LANGUAGE FlexibleInstances #-}
  5. {-# LANGUAGE FlexibleContexts #-}
  6. {-# LANGUAGE FunctionalDependencies #-}
  7. {-# LANGUAGE TupleSections #-}
  8. {-# LANGUAGE InstanceSigs #-}
  9. {-# LANGUAGE StandaloneDeriving #-}
  10. {-# LANGUAGE ImportQualifiedPost #-}
  11. {-# LANGUAGE UndecidableInstances #-}
  12. module Skat.AI.MonteCarlo where
  13. import Control.Monad.State
  14. import Control.Exception (assert)
  15. import Control.Monad.Fail
  16. import Data.Ord
  17. import Text.Read (readMaybe)
  18. import Data.List (maximumBy, minimumBy, sortBy, delete, intercalate)
  19. import Debug.Trace
  20. import Data.Ratio
  21. import Data.Set (Set)
  22. import qualified Data.Set as Set
  23. import Data.Map (Map)
  24. import qualified Data.Map as Map
  25. import Data.Bits
  26. import Data.Vector (Vector)
  27. import qualified Data.Vector as Vector
  28. import System.Random (Random)
  29. import qualified System.Random as Rand
  30. import Text.Printf
  31. import Data.List.Split
  32. import Skat.AI.Base
  33. import qualified Skat as S
  34. import qualified Skat.Card as S
  35. import qualified Skat.Operations as S
  36. import qualified Skat.Pile as S
  37. import qualified Skat.Player as S hiding (trumpColour, turnColour)
  38. import qualified Skat.Render as S
  39. import Skat.Utils
  40. --import TestEnvs (env3, shuffledEnv2)
  41. type WinCount = Float
  42. type SimCount = Int
  43. data Tree t s = Leaf s Bool (WinCount, SimCount)
  44. | Node s Bool (WinCount, SimCount) [Tree t s]
  45. | Pending s t
  46. simruns :: Tree t s -> SimCount
  47. simruns (Leaf _ _ d) = snd d
  48. simruns (Node _ _ d _) = snd d
  49. simruns Pending{} = 0
  50. wins :: Tree t s -> WinCount
  51. wins (Leaf _ _ d) = fst d
  52. wins (Node _ _ d _) = fst d
  53. wins Pending{} = 0
  54. childrenwins :: Tree t s -> WinCount
  55. childrenwins (Node _ _ _ cs) = sum $ fmap wins cs
  56. childrenwins _ = 0
  57. treestate :: Tree t s -> s
  58. treestate (Leaf s _ _) = s
  59. treestate (Node s _ _ _) = s
  60. treestate (Pending s _) = s
  61. isterminal :: Tree t s -> Bool
  62. isterminal (Leaf _ b _) = b
  63. isterminal (Node _ b _ _) = b
  64. isterminal Pending{} = False
  65. class Draw s where
  66. draw :: s -> String
  67. instance Draw Int where
  68. draw = show
  69. indent :: Int -> String -> String
  70. indent n s = intercalate ("\n" ++ replicate n ' ') $ splitOn "\n" s
  71. visualise :: (HasGameState t p d s, Draw s, Draw t) => Tree t s -> String
  72. visualise (Node s _ d children) = printf "[%f/%d]: %s %s:\n%s" (fst d) (snd d) (show . maxing . current $ s) (indent 14 $ draw s) (intercalate "\n" $ fmap f children)
  73. where f c = printf "---%s" (indent 3 $ visualise c)
  74. visualise (Leaf s _ d) = printf "[%f/%d]: %s" (fst d) (snd d) (indent 9 $ draw s)
  75. visualise (Pending s t) = printf "[pend]: %s %s" (indent 9 $ draw s) (indent 9 $ draw t)
  76. emptytree :: s -> Tree t s
  77. emptytree s = Leaf s False (0, 0)
  78. valuation :: Tree t s -> (WinCount, SimCount)
  79. valuation (Leaf _ _ d) = d
  80. valuation (Node _ _ d _) = d
  81. valuation Pending{} = (0,0)
  82. deriving instance (Show s, Show t) => Show (Tree t s)
  83. class MonadRandom m where
  84. random :: Random a => m a
  85. chooser :: [a] -> m a
  86. instance MonadRandom IO where
  87. random = Rand.randomIO
  88. chooser os = (os!!) <$> Rand.randomRIO (0, length os -1)
  89. instance MonadRandom (State Rand.StdGen) where
  90. random = do
  91. gen <- get
  92. let (a, gen') = Rand.random gen
  93. put gen'
  94. return a
  95. chooser os = do
  96. gen <- get
  97. let (a, gen') = Rand.randomR (0, length os -1) gen
  98. put gen'
  99. return (os !! a)
  100. {-
  101. valuetonum :: (Fractional a, Value v) => v -> a
  102. valuetonum v
  103. | v == win = 1
  104. | v == loss = 0
  105. | v == tie = 0.5
  106. -}
  107. restoint :: (Player p, Value v) => p -> v -> Float
  108. restoint p v = tonum $ if maxing p then v else invert v
  109. {-
  110. updateval :: (Player p, Value d) => p -> [d] -> (WinCount, SimCount) -> (WinCount, SimCount)
  111. updateval team xs d =
  112. let newSimCount = snd d + fromIntegral (length xs)
  113. newWinCount = fst d + sum (fmap (tonum . cvt) xs)
  114. cvt = if maxing team then id else invert
  115. in (newWinCount, newSimCount)
  116. -}
  117. class (Player p, Value d) => HasGameState t p d s | s -> d, s -> p, s -> t where
  118. moves :: s -> [t]
  119. execute :: t -> s -> s
  120. monteevaluate :: s -> d
  121. current :: s -> p
  122. montecarlo :: (Show s, Show t, Eq p, Show d, Monad m, HasGameState t p d s, MonadRandom m)
  123. => Tree t s
  124. -> m (Tree t s)
  125. montecarlo (Pending state turn) = do
  126. let currentTeam = current state
  127. state' = execute turn state
  128. -- objectively get a final score of random playout (independent of perspective)
  129. values <- replicateM 1 (montesimulate state')
  130. let tr = if maxing (current state') then id else invert
  131. vs = fmap (tonum . tr) values
  132. n = sum vs / 1
  133. --let v = if maxing (current state') then value else invert value
  134. let val = (n, 1)
  135. pure $ Leaf state' False val
  136. montecarlo (Leaf state terminal d)
  137. | terminal || length ms == 0 = pure $ Leaf state True d
  138. | otherwise = let children = map (Pending state) ms in pure $ Node state False d children
  139. where ms = moves state
  140. montecarlo (Node state _ d []) = pure $ Leaf state True d
  141. montecarlo n@(Node state True d children) = pure n
  142. montecarlo n@(Node state _ d children)
  143. | all isterminal children =
  144. let d' = reevaluateminmax n
  145. in pure $ Node state True d' children
  146. | otherwise = do
  147. let myruns = snd d
  148. cmp c = if isterminal c then -1 else selectcoeff myruns $ valuation c
  149. (idx, bestChild) =
  150. maximumBy (comparing $ cmp . snd) $ zipWith (,) [0..] children
  151. updated <- montecarlo bestChild
  152. let cs = updateAt idx children updated
  153. newSimRuns = simruns updated - simruns bestChild + snd d
  154. diff = wins updated - wins bestChild
  155. diff2 =
  156. if newSimRuns == snd d then 0
  157. else
  158. if current state == current (treestate updated)
  159. then diff
  160. else 1 - diff
  161. newWins = diff2 + fst d
  162. --return $ trace ("updating node " ++ show diff2 ++ "\n" ++ show updated ++ "\n" ++ show bestChild) (Node state False (newWins, newSimRuns) cs)
  163. return $ Node state False (newWins, newSimRuns) cs
  164. montesimulate :: (Monad m, MonadRandom m, HasGameState t p d s, Show d)
  165. => s
  166. -> m d
  167. montesimulate state = case moves state of
  168. [] -> pure $ monteevaluate state
  169. allowed -> do
  170. turn <- chooser allowed
  171. montesimulate $ execute turn state
  172. runmonte :: Int -> State Rand.StdGen (Tree t s) -> Tree t s
  173. runmonte n action = evalState action (Rand.mkStdGen n)
  174. {-
  175. bestmove :: Tree s -> s
  176. bestmove (Leaf s _ _) = s
  177. bestmove (Node s _ _ cs) = treestate $ selection (comparing $ rate . valuation) cs
  178. where rate (w, s) = w / fromIntegral s
  179. mxing = maxing . current $ s
  180. selection = if mxing then maximumBy else minimumBy
  181. -}
  182. bestmove :: Tree t s -> s
  183. bestmove (Leaf s _ _) = s
  184. bestmove (Node s _ _ cs) = treestate $ maximumBy (comparing $ rate . valuation) cs
  185. where rate (w, s) = w / fromIntegral s
  186. selectcoeff :: SimCount -> (WinCount, SimCount) -> Float
  187. selectcoeff _ (_, 0) = 10000000
  188. selectcoeff t (w, s) = w / fromIntegral s + explorationParam * sqrt (log (fromIntegral t) / fromIntegral s)
  189. where explorationParam = sqrt 2
  190. reevaluate :: Tree t s -> (WinCount, SimCount)
  191. reevaluate tree
  192. | isterminal tree = valuation tree
  193. | otherwise = case tree of
  194. (Pending{}) -> valuation tree
  195. (Leaf{}) -> valuation tree
  196. (Node _ _ _ children) -> let total = sum $ fmap simruns children
  197. wns = fromIntegral total - sum (fmap wins children)
  198. in (wns, total)
  199. reevaluateminmax :: HasGameState t p d s => Tree t s -> (WinCount, SimCount)
  200. reevaluateminmax tree
  201. | isterminal tree = valuation tree
  202. | otherwise = case tree of
  203. (Pending{}) -> valuation tree
  204. (Leaf{}) -> valuation tree
  205. (Node state _ _ children) ->
  206. let vals = fmap ((\(w, s) -> w / fromIntegral s) . valuation) children
  207. -- m = maxing . current $ state
  208. childrenMaxing = all (maxing . current . treestate) children
  209. selfMaxing = maxing . current $ state
  210. newval = if childrenMaxing /= selfMaxing then 1 - maximum vals else maximum vals
  211. in (newval, 1)
  212. --playCLI :: (MonadFail m, Read t, Choose t m, PlayableGame t l v p m) => m ()