improve documentation

8 yıl önce · bf9569c9ab
--- a/Backend.hs
+++ b/Backend.hs
@@ -0,0 +1,5 @@
 -- | no idea, maybe try to implement the whole shit with repa

 import Data.Array.Repa (Z(..), (:.)(..), DIM0(..), DIM1(..), DIM2(..), U, D, Array(..),
                        (!))
 import qualified Data.Array.Repa as R
--- a/MNIST.hs
+++ b/MNIST.hs
@@ -37,7 +37,7 @@ data Arguments = Arguments { eta :: Double, lambda :: Double,
 arguments = Arguments { eta = 0.5 &= help "Learning rate",
                        lambda = 5 &= help "Lambda of regularization",
                        filePath = "" &= help "Load network from file",
                        costFunction = Quadratic &= help "Cost function",
                        costFunction = CrossEntropy &= help "Cost function",
                        epochs = 30 &= help "Number of training epochs",
                        miniBatchSize = 10 &= help "Mini batch size",
                        hiddenNeurons = 30 &= help "Number of neurons in hidden layer" }
--- a/Network.hs
+++ b/Network.hs
@@ -115,14 +115,18 @@ type Samples a = [Sample a]

 -- | The learning rate, affects the learning speed, lower learning rate results
 -- in slower learning, but usually better results after more epochs.
 type LearningRate = Double
 type LearningRate a = a

 -- | Lambda value affecting the regularization while learning.
 type Lambda = Double
 type Lambda a = a

 -- | Wrapper around the training data length.
 type TrainingDataLength = Int

 -- | Size of one mini batch, subset of training set used to update the weights
 -- averaged over this batch.
 type MiniBatchSize = Int

 -- | Initializes a new network with random values for weights and biases
 -- in all layers.
 --
@@ -135,7 +139,7 @@ newNetwork layerSizes
        return $ Network lays
  where go :: Int -> Int -> IO (Layer Double)
        go inputSize outputSize = do
            ws <- randn outputSize inputSize
            ws <- fmap (/ (sqrt $ fromIntegral inputSize)) (randn outputSize inputSize)
            seed <- randomIO
            let bs = randomVector seed Gaussian outputSize
            return $ Layer ws bs
@@ -163,15 +167,16 @@ rawOutputs net act input = scanl f (input, input) (layers net)
 -- every training epoch
 --
 -- > trainShuffled 30 (\n e -> "") net CrossEntropyCost 0.5 trainData 10 0.1
 trainShuffled :: Int
              -> (Network Double -> Int -> String)
              -> Network Double
 trainShuffled :: forall a. (Numeric a, Floating a, Floating (Vector a))
              => Int
              -> (Network a -> Int -> String)
              -> Network a
              -> CostFunction
              -> Lambda
              -> Samples Double
              -> Int
              -> Double
              -> IO (Network Double)
              -> Lambda a
              -> Samples a
              -> MiniBatchSize
              -> LearningRate a
              -> IO (Network a)
 trainShuffled 0 _ net _ _ _ _ _ = return net
 trainShuffled epochs debug net costFunction lambda trainSamples miniBatchSize eta = do
    spls <- shuffle trainSamples
@@ -182,15 +187,16 @@ trainShuffled epochs debug net costFunction lambda trainSamples miniBatchSize et

 -- | Pure version of 'trainShuffled', training the network /n/ times without
 -- shuffling the training set, resulting in slightly worse results.
 trainNTimes :: Int
            -> (Network Double -> Int -> String)
            -> Network Double
 trainNTimes :: forall a. (Numeric a, Floating a, Floating (Vector a))
            => Int
            -> (Network a -> Int -> String)
            -> Network a
            -> CostFunction
            -> Lambda
            -> Samples Double
            -> Int
            -> Double
            -> Network Double
            -> Lambda a
            -> Samples a
            -> MiniBatchSize
            -> LearningRate a
            -> Network a
 trainNTimes 0 _ net _ _ _ _ _ = net
 trainNTimes epochs debug net costFunction lambda trainSamples miniBatchSize eta =
    trace (debug net' epochs)
@@ -198,58 +204,86 @@ trainNTimes epochs debug net costFunction lambda trainSamples miniBatchSize eta
  where !net' = trainSGD net costFunction lambda trainSamples miniBatchSize eta


 trainSGD :: (Numeric Double, Floating Double)
         => Network Double
 -- | Train the network using Stochastic Gradient Descent.
 -- This is an improved version of Gradient Descent splitting the training data into
 -- subsets to update the networks weights more often resulting in better accuracy.
 --
 -- On each mini batch, 'update' is called to calculate the improved network.
 trainSGD :: forall a. (Numeric a, Floating a, Floating (Vector a))
         => Network a
         -> CostFunction
         -> Lambda
         -> Samples Double
         -> Int
         -> Double
         -> Network Double
         -> Lambda a
         -> Samples a
         -> MiniBatchSize
         -> LearningRate a
         -> Network a
 trainSGD net costFunction lambda trainSamples miniBatchSize eta =
    foldl' updateMiniBatch net (chunksOf miniBatchSize trainSamples)
  where updateMiniBatch = update eta costFunction lambda (length trainSamples)

 update :: LearningRate
  where -- update network based on given mini batch using Gradient Descent
        updateMiniBatch :: Network a -> Samples a -> Network a
        updateMiniBatch = update eta costFunction lambda (length trainSamples)

 -- | Update the network using a set of samples and Gradient Descent.
 -- This takes one mini batch to perform GD.
 update :: forall a. (Numeric a, Floating a, Floating (Vector a))
       => LearningRate a
       -> CostFunction
       -> Lambda
       -> Lambda a
       -> TrainingDataLength
       -> Network Double
       -> Samples Double
       -> Network Double
 update eta costFunction lambda n net spls = case newNablas of
       -> Network a
       -> Samples a
       -> Network a
 update eta costFunction lambda n net spls = case mayNewNablas of
        Nothing -> net
        Just x -> net { layers = layers' x }
    where newNablas :: Maybe [Layer Double]
          newNablas = foldl' updateNablas Nothing spls
          updateNablas :: Maybe [Layer Double] -> Sample Double -> Maybe [Layer Double]
        Just newNablas -> net { layers = applyNablas newNablas }
    where -- calculate new nablas based on samples
          mayNewNablas :: Maybe [Layer a]
          mayNewNablas = foldl' updateNablas Nothing spls
          -- update nablas by calculating new nablas and adding them to the
          -- existing ones
          updateNablas :: Maybe [Layer a] -> Sample a -> Maybe [Layer a]
          updateNablas mayNablas sample =
              let !nablasDelta = backprop net costFunction sample
                  f nabla nablaDelta =
              let -- calculate new nablas for this training sample
                  !nablasDelta = backprop net costFunction sample
                  -- takes an existing nabla layer and adds the delta
                  addDelta nabla nablaDelta =
                      nabla { weights = weights nabla + weights nablaDelta,
                              biases = biases nabla + biases nablaDelta }
              in case mayNablas of
                  Just nablas -> Just $ zipWith f nablas nablasDelta
                  -- if there are already nablas, add the new ones
                  Just nablas -> Just $ zipWith addDelta nablas nablasDelta
                  -- otherwise return the new ones
                  Nothing -> Just $ nablasDelta
          layers' :: [Layer Double] -> [Layer Double]
          layers' nablas = zipWith updateLayer (layers net) nablas
          updateLayer :: Layer Double -> Layer Double -> Layer Double
          updateLayer layer nabla =
          -- apply nablas to layers of the network
          applyNablas :: [Layer a] -> [Layer a]
          applyNablas nablas = zipWith applyNabla (layers net) nablas
          -- apply nabla to one layer
          applyNabla :: Layer a -> Layer a -> Layer a
          applyNabla layer nabla =
              let w = weights layer  -- weights matrix
                  nw = weights nabla
                  nw = weights nabla  -- weights nablas matrix
                  b = biases layer  -- biases vector
                  nb = biases nabla
                  nb = biases nabla  -- biases nablas vector
                  fac = 1 - eta * (lambda / fromIntegral n)
                  -- subtract nablas from weights
                  w' = scale fac w - scale (eta / (fromIntegral $ length spls)) nw
                  -- subtract nablas from biases
                  b' = b - scale (eta / (fromIntegral $ length spls)) nb
              in layer { weights = w', biases = b' }

 backprop :: Network Double -> CostFunction -> Sample Double -> [Layer Double]
 -- | Backpropagate the error and calculate the partial derivatives for the
 -- weights and biases in each layer.
 -- Returns a list of layers holding the nablas accordingly.
 backprop :: forall a. (Numeric a, Floating a, Floating (Vector a))
         => Network a
         -> CostFunction
         -> Sample a
         -> [Layer a]
 backprop net costFunction spl = finalNablas
    where rawFeedforward :: [(Vector Double, Vector Double)]
    where rawFeedforward :: [(Vector a, Vector a)]
          rawFeedforward = reverse $ rawOutputs net sigmoid (fst spl)
          -- get starting activation and raw value
          headZ, headA :: Vector Double
          headZ, headA :: Vector a
          (headZ, headA) = head rawFeedforward
          -- get starting delta, based on the activation of the last layer
          startDelta = getDelta costFunction headZ headA (snd spl)
@@ -293,6 +327,7 @@ sigmoid x = 1 / (1 + exp (-x))
 sigmoid' :: Floating a => ActivationFunctionDerivative a
 sigmoid' x = sigmoid x * (1 - sigmoid x)

 -- | Shuffle a list randomly.
 shuffle :: [a] -> IO [a]
 shuffle xs = do
        ar <- newArr n xs
@@ -317,6 +352,7 @@ saveNetwork fp net = do
        True -> saveNetwork (newFileName fp) net
        False -> encodeFile fp net

 -- | Find a new filename by replacing the old version with the next higher one.
 newFileName :: FilePath -> FilePath
 newFileName fp = case fp =~ "(.+[a-z]){0,1}([0-9]*)(\\..*)" :: [[String]] of
    [[_, p, v, s]] -> p ++ show (version v + 1) ++ s