|
|
|
@@ -0,0 +1,269 @@ |
|
|
|
{-# LANGUAGE ScopedTypeVariables, FlexibleContexts, BangPatterns #-} |
|
|
|
{-# OPTIONS -Wall #-} |
|
|
|
|
|
|
|
module Network where |
|
|
|
|
|
|
|
import Data.List.Split (chunksOf) |
|
|
|
import Data.Binary |
|
|
|
import Data.Maybe (fromMaybe) |
|
|
|
import Text.Read (readMaybe) |
|
|
|
|
|
|
|
import System.Directory |
|
|
|
import System.Random |
|
|
|
import Control.Monad (zipWithM, forM) |
|
|
|
import Data.Array.IO |
|
|
|
import Debug.Trace (trace) |
|
|
|
import Text.Regex.PCRE |
|
|
|
|
|
|
|
import Numeric.LinearAlgebra |
|
|
|
|
|
|
|
-- | The generic feedforward network type, a binary instance is implemented. |
|
|
|
-- It takes a list of layers |
|
|
|
-- with a minimum of one (output layer). |
|
|
|
-- It is usually constructed using the `newNetwork` function, initializing the matrices |
|
|
|
-- with some default random values. |
|
|
|
-- |
|
|
|
-- > net <- newNetwork [2, 3, 4] |
|
|
|
data Network a = Network { layers :: [Layer a] } |
|
|
|
deriving (Show) |
|
|
|
|
|
|
|
-- | One layer of a network, storing the weights matrix and the biases vector |
|
|
|
-- of this layer. |
|
|
|
data Layer a = Layer { weights :: Matrix a, biases :: Vector a } |
|
|
|
deriving (Show) |
|
|
|
|
|
|
|
instance (Element a, Binary a) => Binary (Network a) where |
|
|
|
put (Network ls) = put ls |
|
|
|
get = Network `fmap` get |
|
|
|
|
|
|
|
instance (Element a, Binary a) => Binary (Layer a) where |
|
|
|
put (Layer ws bs) = do |
|
|
|
put (toLists ws) |
|
|
|
put (toList bs) |
|
|
|
get = do |
|
|
|
ws <- get |
|
|
|
bs <- get |
|
|
|
return $ Layer (fromLists ws) (fromList bs) |
|
|
|
|
|
|
|
-- | Cost Function Enum |
|
|
|
data CostFunction = QuadraticCost |
|
|
|
| CrossEntropyCost |
|
|
|
deriving (Show, Eq) |
|
|
|
|
|
|
|
-- | getDelta based on the raw input, the activated input and the desired output |
|
|
|
-- results in different values depending on the CostFunction type. |
|
|
|
getDelta :: Floating a => CostFunction -> a -> a -> a -> a |
|
|
|
getDelta QuadraticCost z a y = (a - y) * sigmoid'(z) |
|
|
|
getDelta CrossEntropyCost _ a y = a - y |
|
|
|
|
|
|
|
type ActivationFunction a = a -> a |
|
|
|
type ActivationFunctionDerivative a = a -> a |
|
|
|
|
|
|
|
type Sample a = (Vector a, Vector a) |
|
|
|
type Samples a = [Sample a] |
|
|
|
|
|
|
|
-- | A simple synonym for the (,) operator, used to create samples very intuitively. |
|
|
|
(-->) :: Vector a -> Vector a -> Sample a |
|
|
|
(-->) = (,) |
|
|
|
|
|
|
|
type LearningRate = Double |
|
|
|
type Lambda = Double |
|
|
|
type TrainingDataLength = Int |
|
|
|
|
|
|
|
newNetwork :: [Int] -> IO (Network Double) |
|
|
|
newNetwork layerSizes |
|
|
|
| length layerSizes < 2 = error "Network too small!" |
|
|
|
| otherwise = do |
|
|
|
lays <- zipWithM go (init layerSizes) (tail layerSizes) |
|
|
|
return $ Network lays |
|
|
|
where go :: Int -> Int -> IO (Layer Double) |
|
|
|
go inputSize outputSize = do |
|
|
|
ws <- randn outputSize inputSize |
|
|
|
seed <- randomIO |
|
|
|
let bs = randomVector seed Gaussian outputSize |
|
|
|
return $ Layer ws bs |
|
|
|
|
|
|
|
output :: (Numeric a, Num (Vector a)) |
|
|
|
=> Network a |
|
|
|
-> ActivationFunction a |
|
|
|
-> Vector a |
|
|
|
-> Vector a |
|
|
|
output net act input = foldl f input (layers net) |
|
|
|
where f vec layer = cmap act ((weights layer #> vec) + biases layer) |
|
|
|
|
|
|
|
outputs :: (Numeric a, Num (Vector a)) |
|
|
|
=> Network a |
|
|
|
-> ActivationFunction a |
|
|
|
-> Vector a |
|
|
|
-> [Vector a] |
|
|
|
outputs net act input = scanl f input (layers net) |
|
|
|
where f vec layer = cmap act ((weights layer #> vec) + biases layer) |
|
|
|
|
|
|
|
rawOutputs :: (Numeric a, Num (Vector a)) |
|
|
|
=> Network a |
|
|
|
-> ActivationFunction a |
|
|
|
-> Vector a |
|
|
|
-> [(Vector a, Vector a)] |
|
|
|
rawOutputs net act input = scanl f (input, input) (layers net) |
|
|
|
where f (_, a) layer = let z' = (weights layer #> a) + biases layer in |
|
|
|
(z', cmap act z') |
|
|
|
|
|
|
|
-- | The most used training function, randomly shuffling the training set before |
|
|
|
-- every training epoch |
|
|
|
-- |
|
|
|
-- > trainShuffled 30 (\n e -> "") net CrossEntropyCost 0.5 trainData 10 0.1 |
|
|
|
trainShuffled :: Int |
|
|
|
-> (Network Double -> Int -> String) |
|
|
|
-> Network Double |
|
|
|
-> CostFunction |
|
|
|
-> Lambda |
|
|
|
-> Samples Double |
|
|
|
-> Int |
|
|
|
-> Double |
|
|
|
-> IO (Network Double) |
|
|
|
trainShuffled 0 _ net _ _ _ _ _ = return net |
|
|
|
trainShuffled epochs debug net costFunction lambda trainSamples miniBatchSize eta = do |
|
|
|
spls <- shuffle trainSamples |
|
|
|
let !net' = trainSGD net costFunction lambda spls miniBatchSize eta |
|
|
|
trace (debug net' epochs) |
|
|
|
(trainShuffled (epochs - 1) debug net' costFunction lambda trainSamples miniBatchSize eta) |
|
|
|
|
|
|
|
|
|
|
|
trainNTimes :: Int |
|
|
|
-> (Network Double -> Int -> String) |
|
|
|
-> Network Double |
|
|
|
-> CostFunction |
|
|
|
-> Lambda |
|
|
|
-> Samples Double |
|
|
|
-> Int |
|
|
|
-> Double |
|
|
|
-> Network Double |
|
|
|
trainNTimes 0 _ net _ _ _ _ _ = net |
|
|
|
trainNTimes epochs debug net costFunction lambda trainSamples miniBatchSize eta = |
|
|
|
trace (debug net' epochs) |
|
|
|
(trainNTimes (epochs - 1) debug net' costFunction lambda trainSamples miniBatchSize eta) |
|
|
|
where !net' = trainSGD net costFunction lambda trainSamples miniBatchSize eta |
|
|
|
|
|
|
|
|
|
|
|
trainSGD :: (Numeric Double, Floating Double) |
|
|
|
=> Network Double |
|
|
|
-> CostFunction |
|
|
|
-> Lambda |
|
|
|
-> Samples Double |
|
|
|
-> Int |
|
|
|
-> Double |
|
|
|
-> Network Double |
|
|
|
trainSGD net costFunction lambda trainSamples miniBatchSize eta = |
|
|
|
foldl updateMiniBatch net (chunksOf miniBatchSize trainSamples) |
|
|
|
where updateMiniBatch = update eta costFunction lambda (length trainSamples) |
|
|
|
|
|
|
|
update :: LearningRate |
|
|
|
-> CostFunction |
|
|
|
-> Lambda |
|
|
|
-> TrainingDataLength |
|
|
|
-> Network Double |
|
|
|
-> Samples Double |
|
|
|
-> Network Double |
|
|
|
update eta costFunction lambda n net spls = case newNablas of |
|
|
|
Nothing -> net |
|
|
|
Just x -> net { layers = layers' x } |
|
|
|
where newNablas :: Maybe [Layer Double] |
|
|
|
newNablas = foldl updateNablas Nothing spls |
|
|
|
updateNablas :: Maybe [Layer Double] -> Sample Double -> Maybe [Layer Double] |
|
|
|
updateNablas mayNablas sample = |
|
|
|
let nablasDelta = backprop net costFunction sample |
|
|
|
f nabla nablaDelta = |
|
|
|
nabla { weights = weights nabla + weights nablaDelta, |
|
|
|
biases = biases nabla + biases nablaDelta } |
|
|
|
in case mayNablas of |
|
|
|
Just nablas -> Just $ zipWith f nablas nablasDelta |
|
|
|
Nothing -> Just $ nablasDelta |
|
|
|
layers' :: [Layer Double] -> [Layer Double] |
|
|
|
layers' nablas = zipWith updateLayer (layers net) nablas |
|
|
|
updateLayer :: Layer Double -> Layer Double -> Layer Double |
|
|
|
updateLayer layer nabla = |
|
|
|
let w = weights layer -- weights matrix |
|
|
|
nw = weights nabla |
|
|
|
b = biases layer -- biases vector |
|
|
|
nb = biases nabla |
|
|
|
fac = 1 - eta * (lambda / fromIntegral n) |
|
|
|
w' = scale fac w - scale (eta / (fromIntegral $ length spls)) nw |
|
|
|
b' = b - scale (eta / (fromIntegral $ length spls)) nb |
|
|
|
in layer { weights = w', biases = b' } |
|
|
|
|
|
|
|
backprop :: Network Double -> CostFunction -> Sample Double -> [Layer Double] |
|
|
|
backprop net costFunction spl = finalNablas |
|
|
|
where rawFeedforward :: [(Vector Double, Vector Double)] |
|
|
|
rawFeedforward = reverse $ rawOutputs net sigmoid (fst spl) |
|
|
|
-- get starting activation and raw value |
|
|
|
headZ, headA :: Vector Double |
|
|
|
(headZ, headA) = head rawFeedforward |
|
|
|
-- get starting delta, based on the activation of the last layer |
|
|
|
startDelta = getDelta costFunction headZ headA (snd spl) |
|
|
|
-- calculate weighs of last layer in advance |
|
|
|
lastNablaB = startDelta |
|
|
|
lastNablaW = startDelta `outer` previousA |
|
|
|
where previousA |
|
|
|
| length rawFeedforward > 1 = snd $ rawFeedforward !! 1 |
|
|
|
| otherwise = fst spl |
|
|
|
lastLayer = Layer { weights = lastNablaW, biases = lastNablaB } |
|
|
|
-- reverse layers, analogy to the reversed (z, a) list |
|
|
|
layersReversed = reverse $ layers net |
|
|
|
-- calculate nablas, beginning at the end of the network (startDelta) |
|
|
|
(finalNablas, _) = foldl calculate ([lastLayer], startDelta) |
|
|
|
[1..length layersReversed - 1] |
|
|
|
-- takes the index and updates nablas |
|
|
|
calculate (nablas, oldDelta) idx = |
|
|
|
let -- extract raw and activated value |
|
|
|
(z, _) = rawFeedforward !! idx |
|
|
|
-- apply prime derivative of sigmoid |
|
|
|
z' = cmap sigmoid' z |
|
|
|
-- calculate new delta |
|
|
|
w = weights $ layersReversed !! (idx - 1) |
|
|
|
delta = (tr w #> oldDelta) * z' |
|
|
|
-- nablaB is just the delta vector |
|
|
|
nablaB = delta |
|
|
|
-- activation in previous layer |
|
|
|
aPrevious = snd $ rawFeedforward !! (idx + 1) |
|
|
|
-- dot product of delta and the activation in the previous layer |
|
|
|
nablaW = delta `outer` aPrevious |
|
|
|
-- put nablas into a new layer |
|
|
|
in (Layer { weights = nablaW, biases = nablaB } : nablas, delta) |
|
|
|
|
|
|
|
|
|
|
|
sigmoid :: Floating a => ActivationFunction a |
|
|
|
sigmoid x = 1 / (1 + exp (-x)) |
|
|
|
|
|
|
|
sigmoid' :: Floating a => ActivationFunctionDerivative a |
|
|
|
sigmoid' x = sigmoid x * (1 - sigmoid x) |
|
|
|
|
|
|
|
shuffle :: [a] -> IO [a] |
|
|
|
shuffle xs = do |
|
|
|
ar <- newArr n xs |
|
|
|
forM [1..n] $ \i -> do |
|
|
|
j <- randomRIO (i,n) |
|
|
|
vi <- readArray ar i |
|
|
|
vj <- readArray ar j |
|
|
|
writeArray ar j vi |
|
|
|
return vj |
|
|
|
where |
|
|
|
n = length xs |
|
|
|
newArr :: Int -> [a] -> IO (IOArray Int a) |
|
|
|
newArr len lst = newListArray (1,len) lst |
|
|
|
|
|
|
|
saveNetwork :: (Element a, Binary a) => FilePath -> Network a -> IO () |
|
|
|
saveNetwork fp net = do |
|
|
|
ex <- doesFileExist fp |
|
|
|
case ex of |
|
|
|
True -> saveNetwork (newFileName fp) net |
|
|
|
False -> encodeFile fp net |
|
|
|
|
|
|
|
newFileName :: FilePath -> FilePath |
|
|
|
newFileName fp = case fp =~ "(.+[a-z]){0,1}([0-9]*)(\\..*)" :: [[String]] of |
|
|
|
[[_, p, v, s]] -> p ++ show (version v + 1) ++ s |
|
|
|
_ -> fp ++ "l" |
|
|
|
where version :: String -> Int |
|
|
|
version xs = fromMaybe 0 (readMaybe xs :: Maybe Int) |
|
|
|
|
|
|
|
loadNetwork :: (Element a, Binary a) => FilePath -> IO (Network a) |
|
|
|
loadNetwork = decodeFile |