{-# LANGUAGE ScopedTypeVariables, FlexibleContexts, BangPatterns #-} {-# OPTIONS -Wall #-} module Network where import Data.List.Split (chunksOf) import Data.Binary import Data.Maybe (fromMaybe) import Text.Read (readMaybe) import System.Directory import System.Random import Control.Monad (zipWithM, forM) import Data.Array.IO import Debug.Trace (trace) import Text.Regex.PCRE import Numeric.LinearAlgebra -- | The generic feedforward network type, a binary instance is implemented. -- It takes a list of layers -- with a minimum of one (output layer). -- It is usually constructed using the `newNetwork` function, initializing the matrices -- with some default random values. -- -- > net <- newNetwork [2, 3, 4] data Network a = Network { layers :: [Layer a] } deriving (Show) -- | One layer of a network, storing the weights matrix and the biases vector -- of this layer. data Layer a = Layer { weights :: Matrix a, biases :: Vector a } deriving (Show) instance (Element a, Binary a) => Binary (Network a) where put (Network ls) = put ls get = Network `fmap` get instance (Element a, Binary a) => Binary (Layer a) where put (Layer ws bs) = do put (toLists ws) put (toList bs) get = do ws <- get bs <- get return $ Layer (fromLists ws) (fromList bs) -- | Cost Function Enum data CostFunction = QuadraticCost | CrossEntropyCost deriving (Show, Eq) -- | getDelta based on the raw input, the activated input and the desired output -- results in different values depending on the CostFunction type. getDelta :: Floating a => CostFunction -> a -> a -> a -> a getDelta QuadraticCost z a y = (a - y) * sigmoid'(z) getDelta CrossEntropyCost _ a y = a - y type ActivationFunction a = a -> a type ActivationFunctionDerivative a = a -> a type Sample a = (Vector a, Vector a) type Samples a = [Sample a] -- | A simple synonym for the (,) operator, used to create samples very intuitively. (-->) :: Vector a -> Vector a -> Sample a (-->) = (,) type LearningRate = Double type Lambda = Double type TrainingDataLength = Int newNetwork :: [Int] -> IO (Network Double) newNetwork layerSizes | length layerSizes < 2 = error "Network too small!" | otherwise = do lays <- zipWithM go (init layerSizes) (tail layerSizes) return $ Network lays where go :: Int -> Int -> IO (Layer Double) go inputSize outputSize = do ws <- randn outputSize inputSize seed <- randomIO let bs = randomVector seed Gaussian outputSize return $ Layer ws bs output :: (Numeric a, Num (Vector a)) => Network a -> ActivationFunction a -> Vector a -> Vector a output net act input = foldl f input (layers net) where f vec layer = cmap act ((weights layer #> vec) + biases layer) outputs :: (Numeric a, Num (Vector a)) => Network a -> ActivationFunction a -> Vector a -> [Vector a] outputs net act input = scanl f input (layers net) where f vec layer = cmap act ((weights layer #> vec) + biases layer) rawOutputs :: (Numeric a, Num (Vector a)) => Network a -> ActivationFunction a -> Vector a -> [(Vector a, Vector a)] rawOutputs net act input = scanl f (input, input) (layers net) where f (_, a) layer = let z' = (weights layer #> a) + biases layer in (z', cmap act z') -- | The most used training function, randomly shuffling the training set before -- every training epoch -- -- > trainShuffled 30 (\n e -> "") net CrossEntropyCost 0.5 trainData 10 0.1 trainShuffled :: Int -> (Network Double -> Int -> String) -> Network Double -> CostFunction -> Lambda -> Samples Double -> Int -> Double -> IO (Network Double) trainShuffled 0 _ net _ _ _ _ _ = return net trainShuffled epochs debug net costFunction lambda trainSamples miniBatchSize eta = do spls <- shuffle trainSamples let !net' = trainSGD net costFunction lambda spls miniBatchSize eta trace (debug net' epochs) (trainShuffled (epochs - 1) debug net' costFunction lambda trainSamples miniBatchSize eta) trainNTimes :: Int -> (Network Double -> Int -> String) -> Network Double -> CostFunction -> Lambda -> Samples Double -> Int -> Double -> Network Double trainNTimes 0 _ net _ _ _ _ _ = net trainNTimes epochs debug net costFunction lambda trainSamples miniBatchSize eta = trace (debug net' epochs) (trainNTimes (epochs - 1) debug net' costFunction lambda trainSamples miniBatchSize eta) where !net' = trainSGD net costFunction lambda trainSamples miniBatchSize eta trainSGD :: (Numeric Double, Floating Double) => Network Double -> CostFunction -> Lambda -> Samples Double -> Int -> Double -> Network Double trainSGD net costFunction lambda trainSamples miniBatchSize eta = foldl updateMiniBatch net (chunksOf miniBatchSize trainSamples) where updateMiniBatch = update eta costFunction lambda (length trainSamples) update :: LearningRate -> CostFunction -> Lambda -> TrainingDataLength -> Network Double -> Samples Double -> Network Double update eta costFunction lambda n net spls = case newNablas of Nothing -> net Just x -> net { layers = layers' x } where newNablas :: Maybe [Layer Double] newNablas = foldl updateNablas Nothing spls updateNablas :: Maybe [Layer Double] -> Sample Double -> Maybe [Layer Double] updateNablas mayNablas sample = let nablasDelta = backprop net costFunction sample f nabla nablaDelta = nabla { weights = weights nabla + weights nablaDelta, biases = biases nabla + biases nablaDelta } in case mayNablas of Just nablas -> Just $ zipWith f nablas nablasDelta Nothing -> Just $ nablasDelta layers' :: [Layer Double] -> [Layer Double] layers' nablas = zipWith updateLayer (layers net) nablas updateLayer :: Layer Double -> Layer Double -> Layer Double updateLayer layer nabla = let w = weights layer -- weights matrix nw = weights nabla b = biases layer -- biases vector nb = biases nabla fac = 1 - eta * (lambda / fromIntegral n) w' = scale fac w - scale (eta / (fromIntegral $ length spls)) nw b' = b - scale (eta / (fromIntegral $ length spls)) nb in layer { weights = w', biases = b' } backprop :: Network Double -> CostFunction -> Sample Double -> [Layer Double] backprop net costFunction spl = finalNablas where rawFeedforward :: [(Vector Double, Vector Double)] rawFeedforward = reverse $ rawOutputs net sigmoid (fst spl) -- get starting activation and raw value headZ, headA :: Vector Double (headZ, headA) = head rawFeedforward -- get starting delta, based on the activation of the last layer startDelta = getDelta costFunction headZ headA (snd spl) -- calculate weighs of last layer in advance lastNablaB = startDelta lastNablaW = startDelta `outer` previousA where previousA | length rawFeedforward > 1 = snd $ rawFeedforward !! 1 | otherwise = fst spl lastLayer = Layer { weights = lastNablaW, biases = lastNablaB } -- reverse layers, analogy to the reversed (z, a) list layersReversed = reverse $ layers net -- calculate nablas, beginning at the end of the network (startDelta) (finalNablas, _) = foldl calculate ([lastLayer], startDelta) [1..length layersReversed - 1] -- takes the index and updates nablas calculate (nablas, oldDelta) idx = let -- extract raw and activated value (z, _) = rawFeedforward !! idx -- apply prime derivative of sigmoid z' = cmap sigmoid' z -- calculate new delta w = weights $ layersReversed !! (idx - 1) delta = (tr w #> oldDelta) * z' -- nablaB is just the delta vector nablaB = delta -- activation in previous layer aPrevious = snd $ rawFeedforward !! (idx + 1) -- dot product of delta and the activation in the previous layer nablaW = delta `outer` aPrevious -- put nablas into a new layer in (Layer { weights = nablaW, biases = nablaB } : nablas, delta) sigmoid :: Floating a => ActivationFunction a sigmoid x = 1 / (1 + exp (-x)) sigmoid' :: Floating a => ActivationFunctionDerivative a sigmoid' x = sigmoid x * (1 - sigmoid x) shuffle :: [a] -> IO [a] shuffle xs = do ar <- newArr n xs forM [1..n] $ \i -> do j <- randomRIO (i,n) vi <- readArray ar i vj <- readArray ar j writeArray ar j vi return vj where n = length xs newArr :: Int -> [a] -> IO (IOArray Int a) newArr len lst = newListArray (1,len) lst saveNetwork :: (Element a, Binary a) => FilePath -> Network a -> IO () saveNetwork fp net = do ex <- doesFileExist fp case ex of True -> saveNetwork (newFileName fp) net False -> encodeFile fp net newFileName :: FilePath -> FilePath newFileName fp = case fp =~ "(.+[a-z]){0,1}([0-9]*)(\\..*)" :: [[String]] of [[_, p, v, s]] -> p ++ show (version v + 1) ++ s _ -> fp ++ "l" where version :: String -> Int version xs = fromMaybe 0 (readMaybe xs :: Maybe Int) loadNetwork :: (Element a, Binary a) => FilePath -> IO (Network a) loadNetwork = decodeFile