Haskell Artificial Neural Networking library
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

328 line
12KB

  1. {-# LANGUAGE ScopedTypeVariables, FlexibleContexts, BangPatterns #-}
  2. {-# OPTIONS -Wall #-}
  3. -- |
  4. -- Module : Network
  5. -- Copyright : (c) 2017 Christian Merten
  6. -- Maintainer : c.merten@gmx.net
  7. -- Stability : experimental
  8. -- Portability : GHC
  9. --
  10. -- An implementation of artifical feed-forward neural networks in pure Haskell.
  11. --
  12. -- An example is added in /XOR.hs/
  13. module Network (
  14. -- * Network
  15. Network(..),
  16. Layer(..),
  17. newNetwork,
  18. output,
  19. -- * Learning functions
  20. trainShuffled,
  21. trainNTimes,
  22. CostFunction(..),
  23. getDelta,
  24. LearningRate,
  25. Lambda,
  26. TrainingDataLength,
  27. Sample, Samples, (-->),
  28. -- * Activation functions
  29. ActivationFunction, ActivationFunctionDerivative,
  30. sigmoid,
  31. sigmoid',
  32. -- * Network serialization
  33. saveNetwork,
  34. loadNetwork
  35. ) where
  36. import Data.List.Split (chunksOf)
  37. import Data.Binary
  38. import Data.Maybe (fromMaybe)
  39. import Text.Read (readMaybe)
  40. import System.Directory
  41. import System.Random
  42. import Control.Monad (zipWithM, forM)
  43. import Data.Array.IO
  44. import Debug.Trace (trace)
  45. import Text.Regex.PCRE
  46. import Numeric.LinearAlgebra
  47. -- | The generic feedforward network type, a binary instance is implemented.
  48. -- It takes a list of layers
  49. -- with a minimum of one (output layer).
  50. -- It is usually constructed using the `newNetwork` function.
  51. data Network a = Network { layers :: [Layer a] }
  52. deriving (Show)
  53. -- | One layer of a network, storing the weights matrix and the biases vector
  54. -- of this layer.
  55. data Layer a = Layer { weights :: Matrix a, biases :: Vector a }
  56. deriving (Show)
  57. instance (Element a, Binary a) => Binary (Network a) where
  58. put (Network ls) = put ls
  59. get = Network `fmap` get
  60. instance (Element a, Binary a) => Binary (Layer a) where
  61. put (Layer ws bs) = do
  62. put (toLists ws)
  63. put (toList bs)
  64. get = do
  65. ws <- get
  66. bs <- get
  67. return $ Layer (fromLists ws) (fromList bs)
  68. -- | Cost Function Enum
  69. data CostFunction = QuadraticCost
  70. | CrossEntropyCost
  71. deriving (Show, Eq)
  72. -- | getDelta based on the raw input, the activated input and the desired output
  73. -- results in different values depending on the CostFunction type.
  74. getDelta :: Floating a => CostFunction -> a -> a -> a -> a
  75. getDelta QuadraticCost z a y = (a - y) * sigmoid'(z)
  76. getDelta CrossEntropyCost _ a y = a - y
  77. -- | Activation function used to calculate the actual output of a neuron.
  78. -- Usually the 'sigmoid' function.
  79. type ActivationFunction a = a -> a
  80. -- | The derivative of an activation function.
  81. type ActivationFunctionDerivative a = a -> a
  82. -- | Training sample that can be used for the training functions.
  83. --
  84. -- > trainingData :: Samples Double
  85. -- > trainingData = [ fromList [0, 0] --> fromList [0],
  86. -- > fromList [0, 1] --> fromList [1],
  87. -- > fromList [1, 0] --> fromList [1],
  88. -- > fromList [1, 1] --> fromList [0]]
  89. type Sample a = (Vector a, Vector a)
  90. -- | A list of 'Sample's
  91. type Samples a = [Sample a]
  92. -- | A simple synonym for the (,) operator, used to create samples very intuitively.
  93. (-->) :: Vector a -> Vector a -> Sample a
  94. (-->) = (,)
  95. -- | The learning rate, affects the learning speed, lower learning rate results
  96. -- in slower learning, but usually better results after more epochs.
  97. type LearningRate = Double
  98. -- | Lambda value affecting the regularization while learning.
  99. type Lambda = Double
  100. -- | Wrapper around the training data length.
  101. type TrainingDataLength = Int
  102. -- | Initializes a new network with random values for weights and biases
  103. -- in all layers.
  104. --
  105. -- > net <- newNetwork [2, 3, 4]
  106. newNetwork :: [Int] -> IO (Network Double)
  107. newNetwork layerSizes
  108. | length layerSizes < 2 = error "Network too small!"
  109. | otherwise = do
  110. lays <- zipWithM go (init layerSizes) (tail layerSizes)
  111. return $ Network lays
  112. where go :: Int -> Int -> IO (Layer Double)
  113. go inputSize outputSize = do
  114. ws <- randn outputSize inputSize
  115. seed <- randomIO
  116. let bs = randomVector seed Gaussian outputSize
  117. return $ Layer ws bs
  118. -- | Calculate the output of the network based on the network, a given
  119. -- 'ActivationFunction' and the input vector.
  120. output :: (Numeric a, Num (Vector a))
  121. => Network a
  122. -> ActivationFunction a
  123. -> Vector a
  124. -> Vector a
  125. output net act input = foldl f input (layers net)
  126. where f vec layer = cmap act ((weights layer #> vec) + biases layer)
  127. rawOutputs :: (Numeric a, Num (Vector a))
  128. => Network a
  129. -> ActivationFunction a
  130. -> Vector a
  131. -> [(Vector a, Vector a)]
  132. rawOutputs net act input = scanl f (input, input) (layers net)
  133. where f (_, a) layer = let z' = (weights layer #> a) + biases layer in
  134. (z', cmap act z')
  135. -- | The most used training function, randomly shuffling the training set before
  136. -- every training epoch
  137. --
  138. -- > trainShuffled 30 (\n e -> "") net CrossEntropyCost 0.5 trainData 10 0.1
  139. trainShuffled :: Int
  140. -> (Network Double -> Int -> String)
  141. -> Network Double
  142. -> CostFunction
  143. -> Lambda
  144. -> Samples Double
  145. -> Int
  146. -> Double
  147. -> IO (Network Double)
  148. trainShuffled 0 _ net _ _ _ _ _ = return net
  149. trainShuffled epochs debug net costFunction lambda trainSamples miniBatchSize eta = do
  150. spls <- shuffle trainSamples
  151. let !net' = trainSGD net costFunction lambda spls miniBatchSize eta
  152. trace (debug net' epochs)
  153. (trainShuffled (epochs - 1) debug net' costFunction lambda trainSamples miniBatchSize eta)
  154. -- | Pure version of 'trainShuffled', training the network /n/ times without
  155. -- shuffling the training set, resulting in slightly worse results.
  156. trainNTimes :: Int
  157. -> (Network Double -> Int -> String)
  158. -> Network Double
  159. -> CostFunction
  160. -> Lambda
  161. -> Samples Double
  162. -> Int
  163. -> Double
  164. -> Network Double
  165. trainNTimes 0 _ net _ _ _ _ _ = net
  166. trainNTimes epochs debug net costFunction lambda trainSamples miniBatchSize eta =
  167. trace (debug net' epochs)
  168. (trainNTimes (epochs - 1) debug net' costFunction lambda trainSamples miniBatchSize eta)
  169. where !net' = trainSGD net costFunction lambda trainSamples miniBatchSize eta
  170. trainSGD :: (Numeric Double, Floating Double)
  171. => Network Double
  172. -> CostFunction
  173. -> Lambda
  174. -> Samples Double
  175. -> Int
  176. -> Double
  177. -> Network Double
  178. trainSGD net costFunction lambda trainSamples miniBatchSize eta =
  179. foldl updateMiniBatch net (chunksOf miniBatchSize trainSamples)
  180. where updateMiniBatch = update eta costFunction lambda (length trainSamples)
  181. update :: LearningRate
  182. -> CostFunction
  183. -> Lambda
  184. -> TrainingDataLength
  185. -> Network Double
  186. -> Samples Double
  187. -> Network Double
  188. update eta costFunction lambda n net spls = case newNablas of
  189. Nothing -> net
  190. Just x -> net { layers = layers' x }
  191. where newNablas :: Maybe [Layer Double]
  192. newNablas = foldl updateNablas Nothing spls
  193. updateNablas :: Maybe [Layer Double] -> Sample Double -> Maybe [Layer Double]
  194. updateNablas mayNablas sample =
  195. let nablasDelta = backprop net costFunction sample
  196. f nabla nablaDelta =
  197. nabla { weights = weights nabla + weights nablaDelta,
  198. biases = biases nabla + biases nablaDelta }
  199. in case mayNablas of
  200. Just nablas -> Just $ zipWith f nablas nablasDelta
  201. Nothing -> Just $ nablasDelta
  202. layers' :: [Layer Double] -> [Layer Double]
  203. layers' nablas = zipWith updateLayer (layers net) nablas
  204. updateLayer :: Layer Double -> Layer Double -> Layer Double
  205. updateLayer layer nabla =
  206. let w = weights layer -- weights matrix
  207. nw = weights nabla
  208. b = biases layer -- biases vector
  209. nb = biases nabla
  210. fac = 1 - eta * (lambda / fromIntegral n)
  211. w' = scale fac w - scale (eta / (fromIntegral $ length spls)) nw
  212. b' = b - scale (eta / (fromIntegral $ length spls)) nb
  213. in layer { weights = w', biases = b' }
  214. backprop :: Network Double -> CostFunction -> Sample Double -> [Layer Double]
  215. backprop net costFunction spl = finalNablas
  216. where rawFeedforward :: [(Vector Double, Vector Double)]
  217. rawFeedforward = reverse $ rawOutputs net sigmoid (fst spl)
  218. -- get starting activation and raw value
  219. headZ, headA :: Vector Double
  220. (headZ, headA) = head rawFeedforward
  221. -- get starting delta, based on the activation of the last layer
  222. startDelta = getDelta costFunction headZ headA (snd spl)
  223. -- calculate weighs of last layer in advance
  224. lastNablaB = startDelta
  225. lastNablaW = startDelta `outer` previousA
  226. where previousA
  227. | length rawFeedforward > 1 = snd $ rawFeedforward !! 1
  228. | otherwise = fst spl
  229. lastLayer = Layer { weights = lastNablaW, biases = lastNablaB }
  230. -- reverse layers, analogy to the reversed (z, a) list
  231. layersReversed = reverse $ layers net
  232. -- calculate nablas, beginning at the end of the network (startDelta)
  233. (finalNablas, _) = foldl calculate ([lastLayer], startDelta)
  234. [1..length layersReversed - 1]
  235. -- takes the index and updates nablas
  236. calculate (nablas, oldDelta) idx =
  237. let -- extract raw and activated value
  238. (z, _) = rawFeedforward !! idx
  239. -- apply prime derivative of sigmoid
  240. z' = cmap sigmoid' z
  241. -- calculate new delta
  242. w = weights $ layersReversed !! (idx - 1)
  243. delta = (tr w #> oldDelta) * z'
  244. -- nablaB is just the delta vector
  245. nablaB = delta
  246. -- activation in previous layer
  247. aPrevious = snd $ rawFeedforward !! (idx + 1)
  248. -- dot product of delta and the activation in the previous layer
  249. nablaW = delta `outer` aPrevious
  250. -- put nablas into a new layer
  251. in (Layer { weights = nablaW, biases = nablaB } : nablas, delta)
  252. -- | The sigmoid function
  253. sigmoid :: Floating a => ActivationFunction a
  254. sigmoid x = 1 / (1 + exp (-x))
  255. -- | The derivative of the sigmoid function.
  256. sigmoid' :: Floating a => ActivationFunctionDerivative a
  257. sigmoid' x = sigmoid x * (1 - sigmoid x)
  258. shuffle :: [a] -> IO [a]
  259. shuffle xs = do
  260. ar <- newArr n xs
  261. forM [1..n] $ \i -> do
  262. j <- randomRIO (i,n)
  263. vi <- readArray ar i
  264. vj <- readArray ar j
  265. writeArray ar j vi
  266. return vj
  267. where
  268. n = length xs
  269. newArr :: Int -> [a] -> IO (IOArray Int a)
  270. newArr len lst = newListArray (1,len) lst
  271. -- | Saves the network as the given filename. When the file already exists,
  272. -- it looks for another filename by increasing the version, e.g
  273. -- /mnist.net/ becomes /mnist1.net/.
  274. saveNetwork :: (Element a, Binary a) => FilePath -> Network a -> IO ()
  275. saveNetwork fp net = do
  276. ex <- doesFileExist fp
  277. case ex of
  278. True -> saveNetwork (newFileName fp) net
  279. False -> encodeFile fp net
  280. newFileName :: FilePath -> FilePath
  281. newFileName fp = case fp =~ "(.+[a-z]){0,1}([0-9]*)(\\..*)" :: [[String]] of
  282. [[_, p, v, s]] -> p ++ show (version v + 1) ++ s
  283. _ -> fp ++ "l"
  284. where version :: String -> Int
  285. version xs = fromMaybe 0 (readMaybe xs :: Maybe Int)
  286. -- | Load the network with the given filename.
  287. loadNetwork :: (Element a, Binary a) => FilePath -> IO (Network a)
  288. loadNetwork = decodeFile