Kaynağa Gözat

improve documentation

master
erichhasl 8 yıl önce
ebeveyn
işleme
bf9569c9ab
3 değiştirilmiş dosya ile 92 ekleme ve 51 silme
  1. +5
    -0
      Backend.hs
  2. +1
    -1
      MNIST.hs
  3. +86
    -50
      Network.hs

+ 5
- 0
Backend.hs Dosyayı Görüntüle

@@ -0,0 +1,5 @@
-- | no idea, maybe try to implement the whole shit with repa

import Data.Array.Repa (Z(..), (:.)(..), DIM0(..), DIM1(..), DIM2(..), U, D, Array(..),
(!))
import qualified Data.Array.Repa as R

+ 1
- 1
MNIST.hs Dosyayı Görüntüle

@@ -37,7 +37,7 @@ data Arguments = Arguments { eta :: Double, lambda :: Double,
arguments = Arguments { eta = 0.5 &= help "Learning rate",
lambda = 5 &= help "Lambda of regularization",
filePath = "" &= help "Load network from file",
costFunction = Quadratic &= help "Cost function",
costFunction = CrossEntropy &= help "Cost function",
epochs = 30 &= help "Number of training epochs",
miniBatchSize = 10 &= help "Mini batch size",
hiddenNeurons = 30 &= help "Number of neurons in hidden layer" }


+ 86
- 50
Network.hs Dosyayı Görüntüle

@@ -115,14 +115,18 @@ type Samples a = [Sample a]

-- | The learning rate, affects the learning speed, lower learning rate results
-- in slower learning, but usually better results after more epochs.
type LearningRate = Double
type LearningRate a = a

-- | Lambda value affecting the regularization while learning.
type Lambda = Double
type Lambda a = a

-- | Wrapper around the training data length.
type TrainingDataLength = Int

-- | Size of one mini batch, subset of training set used to update the weights
-- averaged over this batch.
type MiniBatchSize = Int

-- | Initializes a new network with random values for weights and biases
-- in all layers.
--
@@ -135,7 +139,7 @@ newNetwork layerSizes
return $ Network lays
where go :: Int -> Int -> IO (Layer Double)
go inputSize outputSize = do
ws <- randn outputSize inputSize
ws <- fmap (/ (sqrt $ fromIntegral inputSize)) (randn outputSize inputSize)
seed <- randomIO
let bs = randomVector seed Gaussian outputSize
return $ Layer ws bs
@@ -163,15 +167,16 @@ rawOutputs net act input = scanl f (input, input) (layers net)
-- every training epoch
--
-- > trainShuffled 30 (\n e -> "") net CrossEntropyCost 0.5 trainData 10 0.1
trainShuffled :: Int
-> (Network Double -> Int -> String)
-> Network Double
trainShuffled :: forall a. (Numeric a, Floating a, Floating (Vector a))
=> Int
-> (Network a -> Int -> String)
-> Network a
-> CostFunction
-> Lambda
-> Samples Double
-> Int
-> Double
-> IO (Network Double)
-> Lambda a
-> Samples a
-> MiniBatchSize
-> LearningRate a
-> IO (Network a)
trainShuffled 0 _ net _ _ _ _ _ = return net
trainShuffled epochs debug net costFunction lambda trainSamples miniBatchSize eta = do
spls <- shuffle trainSamples
@@ -182,15 +187,16 @@ trainShuffled epochs debug net costFunction lambda trainSamples miniBatchSize et

-- | Pure version of 'trainShuffled', training the network /n/ times without
-- shuffling the training set, resulting in slightly worse results.
trainNTimes :: Int
-> (Network Double -> Int -> String)
-> Network Double
trainNTimes :: forall a. (Numeric a, Floating a, Floating (Vector a))
=> Int
-> (Network a -> Int -> String)
-> Network a
-> CostFunction
-> Lambda
-> Samples Double
-> Int
-> Double
-> Network Double
-> Lambda a
-> Samples a
-> MiniBatchSize
-> LearningRate a
-> Network a
trainNTimes 0 _ net _ _ _ _ _ = net
trainNTimes epochs debug net costFunction lambda trainSamples miniBatchSize eta =
trace (debug net' epochs)
@@ -198,58 +204,86 @@ trainNTimes epochs debug net costFunction lambda trainSamples miniBatchSize eta
where !net' = trainSGD net costFunction lambda trainSamples miniBatchSize eta


trainSGD :: (Numeric Double, Floating Double)
=> Network Double
-- | Train the network using Stochastic Gradient Descent.
-- This is an improved version of Gradient Descent splitting the training data into
-- subsets to update the networks weights more often resulting in better accuracy.
--
-- On each mini batch, 'update' is called to calculate the improved network.
trainSGD :: forall a. (Numeric a, Floating a, Floating (Vector a))
=> Network a
-> CostFunction
-> Lambda
-> Samples Double
-> Int
-> Double
-> Network Double
-> Lambda a
-> Samples a
-> MiniBatchSize
-> LearningRate a
-> Network a
trainSGD net costFunction lambda trainSamples miniBatchSize eta =
foldl' updateMiniBatch net (chunksOf miniBatchSize trainSamples)
where updateMiniBatch = update eta costFunction lambda (length trainSamples)

update :: LearningRate
where -- update network based on given mini batch using Gradient Descent
updateMiniBatch :: Network a -> Samples a -> Network a
updateMiniBatch = update eta costFunction lambda (length trainSamples)

-- | Update the network using a set of samples and Gradient Descent.
-- This takes one mini batch to perform GD.
update :: forall a. (Numeric a, Floating a, Floating (Vector a))
=> LearningRate a
-> CostFunction
-> Lambda
-> Lambda a
-> TrainingDataLength
-> Network Double
-> Samples Double
-> Network Double
update eta costFunction lambda n net spls = case newNablas of
-> Network a
-> Samples a
-> Network a
update eta costFunction lambda n net spls = case mayNewNablas of
Nothing -> net
Just x -> net { layers = layers' x }
where newNablas :: Maybe [Layer Double]
newNablas = foldl' updateNablas Nothing spls
updateNablas :: Maybe [Layer Double] -> Sample Double -> Maybe [Layer Double]
Just newNablas -> net { layers = applyNablas newNablas }
where -- calculate new nablas based on samples
mayNewNablas :: Maybe [Layer a]
mayNewNablas = foldl' updateNablas Nothing spls
-- update nablas by calculating new nablas and adding them to the
-- existing ones
updateNablas :: Maybe [Layer a] -> Sample a -> Maybe [Layer a]
updateNablas mayNablas sample =
let !nablasDelta = backprop net costFunction sample
f nabla nablaDelta =
let -- calculate new nablas for this training sample
!nablasDelta = backprop net costFunction sample
-- takes an existing nabla layer and adds the delta
addDelta nabla nablaDelta =
nabla { weights = weights nabla + weights nablaDelta,
biases = biases nabla + biases nablaDelta }
in case mayNablas of
Just nablas -> Just $ zipWith f nablas nablasDelta
-- if there are already nablas, add the new ones
Just nablas -> Just $ zipWith addDelta nablas nablasDelta
-- otherwise return the new ones
Nothing -> Just $ nablasDelta
layers' :: [Layer Double] -> [Layer Double]
layers' nablas = zipWith updateLayer (layers net) nablas
updateLayer :: Layer Double -> Layer Double -> Layer Double
updateLayer layer nabla =
-- apply nablas to layers of the network
applyNablas :: [Layer a] -> [Layer a]
applyNablas nablas = zipWith applyNabla (layers net) nablas
-- apply nabla to one layer
applyNabla :: Layer a -> Layer a -> Layer a
applyNabla layer nabla =
let w = weights layer -- weights matrix
nw = weights nabla
nw = weights nabla -- weights nablas matrix
b = biases layer -- biases vector
nb = biases nabla
nb = biases nabla -- biases nablas vector
fac = 1 - eta * (lambda / fromIntegral n)
-- subtract nablas from weights
w' = scale fac w - scale (eta / (fromIntegral $ length spls)) nw
-- subtract nablas from biases
b' = b - scale (eta / (fromIntegral $ length spls)) nb
in layer { weights = w', biases = b' }

backprop :: Network Double -> CostFunction -> Sample Double -> [Layer Double]
-- | Backpropagate the error and calculate the partial derivatives for the
-- weights and biases in each layer.
-- Returns a list of layers holding the nablas accordingly.
backprop :: forall a. (Numeric a, Floating a, Floating (Vector a))
=> Network a
-> CostFunction
-> Sample a
-> [Layer a]
backprop net costFunction spl = finalNablas
where rawFeedforward :: [(Vector Double, Vector Double)]
where rawFeedforward :: [(Vector a, Vector a)]
rawFeedforward = reverse $ rawOutputs net sigmoid (fst spl)
-- get starting activation and raw value
headZ, headA :: Vector Double
headZ, headA :: Vector a
(headZ, headA) = head rawFeedforward
-- get starting delta, based on the activation of the last layer
startDelta = getDelta costFunction headZ headA (snd spl)
@@ -293,6 +327,7 @@ sigmoid x = 1 / (1 + exp (-x))
sigmoid' :: Floating a => ActivationFunctionDerivative a
sigmoid' x = sigmoid x * (1 - sigmoid x)

-- | Shuffle a list randomly.
shuffle :: [a] -> IO [a]
shuffle xs = do
ar <- newArr n xs
@@ -317,6 +352,7 @@ saveNetwork fp net = do
True -> saveNetwork (newFileName fp) net
False -> encodeFile fp net

-- | Find a new filename by replacing the old version with the next higher one.
newFileName :: FilePath -> FilePath
newFileName fp = case fp =~ "(.+[a-z]){0,1}([0-9]*)(\\..*)" :: [[String]] of
[[_, p, v, s]] -> p ++ show (version v + 1) ++ s


Yükleniyor…
İptal
Kaydet