|
- {-# LANGUAGE ScopedTypeVariables, FlexibleContexts, BangPatterns #-}
- {-# OPTIONS -Wall #-}
-
- module Network where
-
- import Data.List.Split (chunksOf)
- import Data.Binary
- import Data.Maybe (fromMaybe)
- import Text.Read (readMaybe)
-
- import System.Directory
- import System.Random
- import Control.Monad (zipWithM, forM)
- import Data.Array.IO
- import Debug.Trace (trace)
- import Text.Regex.PCRE
-
- import Numeric.LinearAlgebra
-
- -- | The generic feedforward network type, a binary instance is implemented.
- -- It takes a list of layers
- -- with a minimum of one (output layer).
- -- It is usually constructed using the `newNetwork` function, initializing the matrices
- -- with some default random values.
- --
- -- > net <- newNetwork [2, 3, 4]
- data Network a = Network { layers :: [Layer a] }
- deriving (Show)
-
- -- | One layer of a network, storing the weights matrix and the biases vector
- -- of this layer.
- data Layer a = Layer { weights :: Matrix a, biases :: Vector a }
- deriving (Show)
-
- instance (Element a, Binary a) => Binary (Network a) where
- put (Network ls) = put ls
- get = Network `fmap` get
-
- instance (Element a, Binary a) => Binary (Layer a) where
- put (Layer ws bs) = do
- put (toLists ws)
- put (toList bs)
- get = do
- ws <- get
- bs <- get
- return $ Layer (fromLists ws) (fromList bs)
-
- -- | Cost Function Enum
- data CostFunction = QuadraticCost
- | CrossEntropyCost
- deriving (Show, Eq)
-
- -- | getDelta based on the raw input, the activated input and the desired output
- -- results in different values depending on the CostFunction type.
- getDelta :: Floating a => CostFunction -> a -> a -> a -> a
- getDelta QuadraticCost z a y = (a - y) * sigmoid'(z)
- getDelta CrossEntropyCost _ a y = a - y
-
- type ActivationFunction a = a -> a
- type ActivationFunctionDerivative a = a -> a
-
- type Sample a = (Vector a, Vector a)
- type Samples a = [Sample a]
-
- -- | A simple synonym for the (,) operator, used to create samples very intuitively.
- (-->) :: Vector a -> Vector a -> Sample a
- (-->) = (,)
-
- type LearningRate = Double
- type Lambda = Double
- type TrainingDataLength = Int
-
- newNetwork :: [Int] -> IO (Network Double)
- newNetwork layerSizes
- | length layerSizes < 2 = error "Network too small!"
- | otherwise = do
- lays <- zipWithM go (init layerSizes) (tail layerSizes)
- return $ Network lays
- where go :: Int -> Int -> IO (Layer Double)
- go inputSize outputSize = do
- ws <- randn outputSize inputSize
- seed <- randomIO
- let bs = randomVector seed Gaussian outputSize
- return $ Layer ws bs
-
- output :: (Numeric a, Num (Vector a))
- => Network a
- -> ActivationFunction a
- -> Vector a
- -> Vector a
- output net act input = foldl f input (layers net)
- where f vec layer = cmap act ((weights layer #> vec) + biases layer)
-
- outputs :: (Numeric a, Num (Vector a))
- => Network a
- -> ActivationFunction a
- -> Vector a
- -> [Vector a]
- outputs net act input = scanl f input (layers net)
- where f vec layer = cmap act ((weights layer #> vec) + biases layer)
-
- rawOutputs :: (Numeric a, Num (Vector a))
- => Network a
- -> ActivationFunction a
- -> Vector a
- -> [(Vector a, Vector a)]
- rawOutputs net act input = scanl f (input, input) (layers net)
- where f (_, a) layer = let z' = (weights layer #> a) + biases layer in
- (z', cmap act z')
-
- -- | The most used training function, randomly shuffling the training set before
- -- every training epoch
- --
- -- > trainShuffled 30 (\n e -> "") net CrossEntropyCost 0.5 trainData 10 0.1
- trainShuffled :: Int
- -> (Network Double -> Int -> String)
- -> Network Double
- -> CostFunction
- -> Lambda
- -> Samples Double
- -> Int
- -> Double
- -> IO (Network Double)
- trainShuffled 0 _ net _ _ _ _ _ = return net
- trainShuffled epochs debug net costFunction lambda trainSamples miniBatchSize eta = do
- spls <- shuffle trainSamples
- let !net' = trainSGD net costFunction lambda spls miniBatchSize eta
- trace (debug net' epochs)
- (trainShuffled (epochs - 1) debug net' costFunction lambda trainSamples miniBatchSize eta)
-
-
- trainNTimes :: Int
- -> (Network Double -> Int -> String)
- -> Network Double
- -> CostFunction
- -> Lambda
- -> Samples Double
- -> Int
- -> Double
- -> Network Double
- trainNTimes 0 _ net _ _ _ _ _ = net
- trainNTimes epochs debug net costFunction lambda trainSamples miniBatchSize eta =
- trace (debug net' epochs)
- (trainNTimes (epochs - 1) debug net' costFunction lambda trainSamples miniBatchSize eta)
- where !net' = trainSGD net costFunction lambda trainSamples miniBatchSize eta
-
-
- trainSGD :: (Numeric Double, Floating Double)
- => Network Double
- -> CostFunction
- -> Lambda
- -> Samples Double
- -> Int
- -> Double
- -> Network Double
- trainSGD net costFunction lambda trainSamples miniBatchSize eta =
- foldl updateMiniBatch net (chunksOf miniBatchSize trainSamples)
- where updateMiniBatch = update eta costFunction lambda (length trainSamples)
-
- update :: LearningRate
- -> CostFunction
- -> Lambda
- -> TrainingDataLength
- -> Network Double
- -> Samples Double
- -> Network Double
- update eta costFunction lambda n net spls = case newNablas of
- Nothing -> net
- Just x -> net { layers = layers' x }
- where newNablas :: Maybe [Layer Double]
- newNablas = foldl updateNablas Nothing spls
- updateNablas :: Maybe [Layer Double] -> Sample Double -> Maybe [Layer Double]
- updateNablas mayNablas sample =
- let nablasDelta = backprop net costFunction sample
- f nabla nablaDelta =
- nabla { weights = weights nabla + weights nablaDelta,
- biases = biases nabla + biases nablaDelta }
- in case mayNablas of
- Just nablas -> Just $ zipWith f nablas nablasDelta
- Nothing -> Just $ nablasDelta
- layers' :: [Layer Double] -> [Layer Double]
- layers' nablas = zipWith updateLayer (layers net) nablas
- updateLayer :: Layer Double -> Layer Double -> Layer Double
- updateLayer layer nabla =
- let w = weights layer -- weights matrix
- nw = weights nabla
- b = biases layer -- biases vector
- nb = biases nabla
- fac = 1 - eta * (lambda / fromIntegral n)
- w' = scale fac w - scale (eta / (fromIntegral $ length spls)) nw
- b' = b - scale (eta / (fromIntegral $ length spls)) nb
- in layer { weights = w', biases = b' }
-
- backprop :: Network Double -> CostFunction -> Sample Double -> [Layer Double]
- backprop net costFunction spl = finalNablas
- where rawFeedforward :: [(Vector Double, Vector Double)]
- rawFeedforward = reverse $ rawOutputs net sigmoid (fst spl)
- -- get starting activation and raw value
- headZ, headA :: Vector Double
- (headZ, headA) = head rawFeedforward
- -- get starting delta, based on the activation of the last layer
- startDelta = getDelta costFunction headZ headA (snd spl)
- -- calculate weighs of last layer in advance
- lastNablaB = startDelta
- lastNablaW = startDelta `outer` previousA
- where previousA
- | length rawFeedforward > 1 = snd $ rawFeedforward !! 1
- | otherwise = fst spl
- lastLayer = Layer { weights = lastNablaW, biases = lastNablaB }
- -- reverse layers, analogy to the reversed (z, a) list
- layersReversed = reverse $ layers net
- -- calculate nablas, beginning at the end of the network (startDelta)
- (finalNablas, _) = foldl calculate ([lastLayer], startDelta)
- [1..length layersReversed - 1]
- -- takes the index and updates nablas
- calculate (nablas, oldDelta) idx =
- let -- extract raw and activated value
- (z, _) = rawFeedforward !! idx
- -- apply prime derivative of sigmoid
- z' = cmap sigmoid' z
- -- calculate new delta
- w = weights $ layersReversed !! (idx - 1)
- delta = (tr w #> oldDelta) * z'
- -- nablaB is just the delta vector
- nablaB = delta
- -- activation in previous layer
- aPrevious = snd $ rawFeedforward !! (idx + 1)
- -- dot product of delta and the activation in the previous layer
- nablaW = delta `outer` aPrevious
- -- put nablas into a new layer
- in (Layer { weights = nablaW, biases = nablaB } : nablas, delta)
-
-
- sigmoid :: Floating a => ActivationFunction a
- sigmoid x = 1 / (1 + exp (-x))
-
- sigmoid' :: Floating a => ActivationFunctionDerivative a
- sigmoid' x = sigmoid x * (1 - sigmoid x)
-
- shuffle :: [a] -> IO [a]
- shuffle xs = do
- ar <- newArr n xs
- forM [1..n] $ \i -> do
- j <- randomRIO (i,n)
- vi <- readArray ar i
- vj <- readArray ar j
- writeArray ar j vi
- return vj
- where
- n = length xs
- newArr :: Int -> [a] -> IO (IOArray Int a)
- newArr len lst = newListArray (1,len) lst
-
- saveNetwork :: (Element a, Binary a) => FilePath -> Network a -> IO ()
- saveNetwork fp net = do
- ex <- doesFileExist fp
- case ex of
- True -> saveNetwork (newFileName fp) net
- False -> encodeFile fp net
-
- newFileName :: FilePath -> FilePath
- newFileName fp = case fp =~ "(.+[a-z]){0,1}([0-9]*)(\\..*)" :: [[String]] of
- [[_, p, v, s]] -> p ++ show (version v + 1) ++ s
- _ -> fp ++ "l"
- where version :: String -> Int
- version xs = fromMaybe 0 (readMaybe xs :: Maybe Int)
-
- loadNetwork :: (Element a, Binary a) => FilePath -> IO (Network a)
- loadNetwork = decodeFile
|