commit cfd609aa854ca728053a524e70107a73301987ac Author: erichhasl Date: Sun Aug 27 11:39:56 2017 +0200 initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b751b38 --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +* +!*.* + +*.net* +*.o +*.hi +*.prof +*.swp diff --git a/MNIST.hs b/MNIST.hs new file mode 100644 index 0000000..4de5b9d --- /dev/null +++ b/MNIST.hs @@ -0,0 +1,133 @@ +{-# LANGUAGE DeriveDataTypeable, BangPatterns #-} + +import qualified Data.ByteString.Lazy as BS + +import Data.Int +import Data.List (findIndex, maximumBy) +import Data.List.Split (chunksOf) +import Data.Ord (comparing) +import Debug.Trace (trace) + +import Control.DeepSeq + +import Numeric.LinearAlgebra +import Codec.Compression.GZip +import System.Random +import System.Environment (getArgs) +import System.Console.CmdArgs.Implicit + +import Network + +data Cost = Quadratic | CrossEntropy + deriving (Show, Data) + +toCostFunction :: Cost -> CostFunction +toCostFunction Quadratic = QuadraticCost +toCostFunction CrossEntropy = CrossEntropyCost + +data Arguments = Arguments { eta :: Double, lambda :: Double, + filePath :: FilePath, costFunction :: Cost, + epochs :: Int, miniBatchSize :: Int, + hiddenNeurons :: Int } + deriving (Show, Data, Typeable) + +arguments = Arguments { eta = 0.5 &= help "Learning rate", + lambda = 5 &= help "Lambda of regularization", + filePath = "" &= help "Load network from file", + costFunction = Quadratic &= help "Cost function", + epochs = 30 &= help "Number of training epochs", + miniBatchSize = 10 &= help "Mini batch size", + hiddenNeurons = 30 &= help "Number of neurons in hidden layer" } + &= summary "MNIST Image classifier in Haskell v1" + +readImages :: FilePath -> FilePath -> Int64 -> IO ([(Int, Vector R)]) +readImages !imgPath !lblPath !n = do + imgBytes <- fmap decompress (BS.readFile imgPath) + lblBytes <- fmap decompress (BS.readFile lblPath) + let !imgs = map (readImage imgBytes) [0..n-1] + !lbs = map (readLabel lblBytes) [0..n-1] + return $! zip lbs imgs + +readImage :: BS.ByteString -> Int64 -> Vector R +readImage !bytes !n = vector $! map ((/256) . fromIntegral . BS.index bytes . (n*28^2 + 16 +)) [0..783] + +readLabel :: BS.ByteString -> Int64 -> Int +readLabel bytes n = fromIntegral $! BS.index bytes (n + 8) + +toLabel :: Int -> Vector R +toLabel n = fromList [ if i == n then 1 else 0 | i <- [0..9]] + +fromLabel :: Vector R -> Int +fromLabel vec = snd $ maximumBy (comparing fst) (zip (toList vec) [0..]) + +drawImage :: Vector R -> String +drawImage vec = concatMap toLine (chunksOf 28 (toList vec)) + where toLine ps = (map toChar ps) ++ "\n" + toChar v + | v > 0.5 = 'o' + | otherwise = '.' + +drawSample :: Sample Double -> String +drawSample sample = drawImage (fst sample) + ++ "Label: " + ++ show (fromLabel $ snd sample) + +trainIms :: IO [(Int, Vector R)] +trainIms = readImages "mnist-data/train-images-idx3-ubyte.gz" + "mnist-data/train-labels-idx1-ubyte.gz" + 50000 + +trainSamples :: IO (Samples Double) +trainSamples = do + ims <- trainIms + return $! [ img --> toLabel lbl | (lbl, img) <- ims] + +testIms :: IO [(Int, Vector R)] +testIms = readImages "mnist-data/t10k-images-idx3-ubyte.gz" + "mnist-data/t10k-labels-idx1-ubyte.gz" + 10000 + +testSamples :: IO (Samples Double) +testSamples = do + ims <- testIms + return $! [ img --> toLabel lbl | (lbl, img) <- ims] + +classify :: Network Double -> Sample Double -> IO () +classify net spl = do + putStrLn (drawImage (fst spl) + ++ "Recognized as " + ++ show (fromLabel $ output net activation (fst spl))) + +test :: Network Double -> Samples Double -> Int +test net = let f (img, lbl) = (fromLabel $ output net sigmoid img, fromLabel lbl) in + hits . map f + where hits :: [(Int, Int)] -> Int + hits result = sum $ map (\(a, b) -> if a == b then 1 else 0) result + +activation :: (Floating a) => ActivationFunction a +activation = sigmoid + +activation' :: (Floating a) => ActivationFunctionDerivative a +activation' = sigmoid' + +main = do + args <- cmdArgs arguments + net <- case filePath args of + "" -> newNetwork [784, hiddenNeurons args, 10] + fp -> loadNetwork fp :: IO (Network Double) + trSamples <- trainSamples + tstSamples <- testSamples + let bad = tstSamples `deepseq` test net tstSamples + putStrLn $ "Initial performance of network: recognized " ++ show bad ++ " of 10k" + let debug network epochs = "Left epochs: " ++ show epochs + ++ " recognized: " ++ show (test network tstSamples) ++ " of 10k" + smartNet <- trSamples `deepseq` (trainShuffled + (epochs args) debug net + (toCostFunction $ costFunction args) + (lambda args) trSamples + (miniBatchSize args) + (eta args)) + let res = test smartNet tstSamples + putStrLn $ "finished testing. recognized: " ++ show res ++ " of 10k" + putStrLn "saving network" + saveNetwork "mnist.net" smartNet diff --git a/Network.hs b/Network.hs new file mode 100644 index 0000000..c01e5a0 --- /dev/null +++ b/Network.hs @@ -0,0 +1,269 @@ +{-# LANGUAGE ScopedTypeVariables, FlexibleContexts, BangPatterns #-} +{-# OPTIONS -Wall #-} + +module Network where + +import Data.List.Split (chunksOf) +import Data.Binary +import Data.Maybe (fromMaybe) +import Text.Read (readMaybe) + +import System.Directory +import System.Random +import Control.Monad (zipWithM, forM) +import Data.Array.IO +import Debug.Trace (trace) +import Text.Regex.PCRE + +import Numeric.LinearAlgebra + +-- | The generic feedforward network type, a binary instance is implemented. +-- It takes a list of layers +-- with a minimum of one (output layer). +-- It is usually constructed using the `newNetwork` function, initializing the matrices +-- with some default random values. +-- +-- > net <- newNetwork [2, 3, 4] +data Network a = Network { layers :: [Layer a] } + deriving (Show) + +-- | One layer of a network, storing the weights matrix and the biases vector +-- of this layer. +data Layer a = Layer { weights :: Matrix a, biases :: Vector a } + deriving (Show) + +instance (Element a, Binary a) => Binary (Network a) where + put (Network ls) = put ls + get = Network `fmap` get + +instance (Element a, Binary a) => Binary (Layer a) where + put (Layer ws bs) = do + put (toLists ws) + put (toList bs) + get = do + ws <- get + bs <- get + return $ Layer (fromLists ws) (fromList bs) + +-- | Cost Function Enum +data CostFunction = QuadraticCost + | CrossEntropyCost + deriving (Show, Eq) + +-- | getDelta based on the raw input, the activated input and the desired output +-- results in different values depending on the CostFunction type. +getDelta :: Floating a => CostFunction -> a -> a -> a -> a +getDelta QuadraticCost z a y = (a - y) * sigmoid'(z) +getDelta CrossEntropyCost _ a y = a - y + +type ActivationFunction a = a -> a +type ActivationFunctionDerivative a = a -> a + +type Sample a = (Vector a, Vector a) +type Samples a = [Sample a] + +-- | A simple synonym for the (,) operator, used to create samples very intuitively. +(-->) :: Vector a -> Vector a -> Sample a +(-->) = (,) + +type LearningRate = Double +type Lambda = Double +type TrainingDataLength = Int + +newNetwork :: [Int] -> IO (Network Double) +newNetwork layerSizes + | length layerSizes < 2 = error "Network too small!" + | otherwise = do + lays <- zipWithM go (init layerSizes) (tail layerSizes) + return $ Network lays + where go :: Int -> Int -> IO (Layer Double) + go inputSize outputSize = do + ws <- randn outputSize inputSize + seed <- randomIO + let bs = randomVector seed Gaussian outputSize + return $ Layer ws bs + +output :: (Numeric a, Num (Vector a)) + => Network a + -> ActivationFunction a + -> Vector a + -> Vector a +output net act input = foldl f input (layers net) + where f vec layer = cmap act ((weights layer #> vec) + biases layer) + +outputs :: (Numeric a, Num (Vector a)) + => Network a + -> ActivationFunction a + -> Vector a + -> [Vector a] +outputs net act input = scanl f input (layers net) + where f vec layer = cmap act ((weights layer #> vec) + biases layer) + +rawOutputs :: (Numeric a, Num (Vector a)) + => Network a + -> ActivationFunction a + -> Vector a + -> [(Vector a, Vector a)] +rawOutputs net act input = scanl f (input, input) (layers net) + where f (_, a) layer = let z' = (weights layer #> a) + biases layer in + (z', cmap act z') + +-- | The most used training function, randomly shuffling the training set before +-- every training epoch +-- +-- > trainShuffled 30 (\n e -> "") net CrossEntropyCost 0.5 trainData 10 0.1 +trainShuffled :: Int + -> (Network Double -> Int -> String) + -> Network Double + -> CostFunction + -> Lambda + -> Samples Double + -> Int + -> Double + -> IO (Network Double) +trainShuffled 0 _ net _ _ _ _ _ = return net +trainShuffled epochs debug net costFunction lambda trainSamples miniBatchSize eta = do + spls <- shuffle trainSamples + let !net' = trainSGD net costFunction lambda spls miniBatchSize eta + trace (debug net' epochs) + (trainShuffled (epochs - 1) debug net' costFunction lambda trainSamples miniBatchSize eta) + + +trainNTimes :: Int + -> (Network Double -> Int -> String) + -> Network Double + -> CostFunction + -> Lambda + -> Samples Double + -> Int + -> Double + -> Network Double +trainNTimes 0 _ net _ _ _ _ _ = net +trainNTimes epochs debug net costFunction lambda trainSamples miniBatchSize eta = + trace (debug net' epochs) + (trainNTimes (epochs - 1) debug net' costFunction lambda trainSamples miniBatchSize eta) + where !net' = trainSGD net costFunction lambda trainSamples miniBatchSize eta + + +trainSGD :: (Numeric Double, Floating Double) + => Network Double + -> CostFunction + -> Lambda + -> Samples Double + -> Int + -> Double + -> Network Double +trainSGD net costFunction lambda trainSamples miniBatchSize eta = + foldl updateMiniBatch net (chunksOf miniBatchSize trainSamples) + where updateMiniBatch = update eta costFunction lambda (length trainSamples) + +update :: LearningRate + -> CostFunction + -> Lambda + -> TrainingDataLength + -> Network Double + -> Samples Double + -> Network Double +update eta costFunction lambda n net spls = case newNablas of + Nothing -> net + Just x -> net { layers = layers' x } + where newNablas :: Maybe [Layer Double] + newNablas = foldl updateNablas Nothing spls + updateNablas :: Maybe [Layer Double] -> Sample Double -> Maybe [Layer Double] + updateNablas mayNablas sample = + let nablasDelta = backprop net costFunction sample + f nabla nablaDelta = + nabla { weights = weights nabla + weights nablaDelta, + biases = biases nabla + biases nablaDelta } + in case mayNablas of + Just nablas -> Just $ zipWith f nablas nablasDelta + Nothing -> Just $ nablasDelta + layers' :: [Layer Double] -> [Layer Double] + layers' nablas = zipWith updateLayer (layers net) nablas + updateLayer :: Layer Double -> Layer Double -> Layer Double + updateLayer layer nabla = + let w = weights layer -- weights matrix + nw = weights nabla + b = biases layer -- biases vector + nb = biases nabla + fac = 1 - eta * (lambda / fromIntegral n) + w' = scale fac w - scale (eta / (fromIntegral $ length spls)) nw + b' = b - scale (eta / (fromIntegral $ length spls)) nb + in layer { weights = w', biases = b' } + +backprop :: Network Double -> CostFunction -> Sample Double -> [Layer Double] +backprop net costFunction spl = finalNablas + where rawFeedforward :: [(Vector Double, Vector Double)] + rawFeedforward = reverse $ rawOutputs net sigmoid (fst spl) + -- get starting activation and raw value + headZ, headA :: Vector Double + (headZ, headA) = head rawFeedforward + -- get starting delta, based on the activation of the last layer + startDelta = getDelta costFunction headZ headA (snd spl) + -- calculate weighs of last layer in advance + lastNablaB = startDelta + lastNablaW = startDelta `outer` previousA + where previousA + | length rawFeedforward > 1 = snd $ rawFeedforward !! 1 + | otherwise = fst spl + lastLayer = Layer { weights = lastNablaW, biases = lastNablaB } + -- reverse layers, analogy to the reversed (z, a) list + layersReversed = reverse $ layers net + -- calculate nablas, beginning at the end of the network (startDelta) + (finalNablas, _) = foldl calculate ([lastLayer], startDelta) + [1..length layersReversed - 1] + -- takes the index and updates nablas + calculate (nablas, oldDelta) idx = + let -- extract raw and activated value + (z, _) = rawFeedforward !! idx + -- apply prime derivative of sigmoid + z' = cmap sigmoid' z + -- calculate new delta + w = weights $ layersReversed !! (idx - 1) + delta = (tr w #> oldDelta) * z' + -- nablaB is just the delta vector + nablaB = delta + -- activation in previous layer + aPrevious = snd $ rawFeedforward !! (idx + 1) + -- dot product of delta and the activation in the previous layer + nablaW = delta `outer` aPrevious + -- put nablas into a new layer + in (Layer { weights = nablaW, biases = nablaB } : nablas, delta) + + +sigmoid :: Floating a => ActivationFunction a +sigmoid x = 1 / (1 + exp (-x)) + +sigmoid' :: Floating a => ActivationFunctionDerivative a +sigmoid' x = sigmoid x * (1 - sigmoid x) + +shuffle :: [a] -> IO [a] +shuffle xs = do + ar <- newArr n xs + forM [1..n] $ \i -> do + j <- randomRIO (i,n) + vi <- readArray ar i + vj <- readArray ar j + writeArray ar j vi + return vj + where + n = length xs + newArr :: Int -> [a] -> IO (IOArray Int a) + newArr len lst = newListArray (1,len) lst + +saveNetwork :: (Element a, Binary a) => FilePath -> Network a -> IO () +saveNetwork fp net = do + ex <- doesFileExist fp + case ex of + True -> saveNetwork (newFileName fp) net + False -> encodeFile fp net + +newFileName :: FilePath -> FilePath +newFileName fp = case fp =~ "(.+[a-z]){0,1}([0-9]*)(\\..*)" :: [[String]] of + [[_, p, v, s]] -> p ++ show (version v + 1) ++ s + _ -> fp ++ "l" + where version :: String -> Int + version xs = fromMaybe 0 (readMaybe xs :: Maybe Int) + +loadNetwork :: (Element a, Binary a) => FilePath -> IO (Network a) +loadNetwork = decodeFile diff --git a/XOR.hs b/XOR.hs new file mode 100644 index 0000000..867c157 --- /dev/null +++ b/XOR.hs @@ -0,0 +1,24 @@ +import Network +import Numeric.LinearAlgebra + +trainData :: Samples Double +trainData = [(vector [0, 0], vector [0]), + (vector [0, 1], vector [1]), + (vector [1, 0], vector [1]), + (vector [0, 0], vector [0])] + +l1 = Layer { weights = (3><2) [1..], biases = vector [1..3] } +l2 = Layer { weights = (1><3) [1..], biases = vector [1] } +fixedNet = Network [l1, l2] + +debug _ _ = "Bla!" + +main :: IO () +main = do + net <- newNetwork [2, 3, 1] + {-let smartNet = update 0.06 net trainData-} + {-[>let smartNet = trainSGD net trainData 4 0.06<]-} + let debug _ _ = "" + smartNet = trainNTimes 100000 debug net trainData 4 100 + putStrLn "Output after learning: " + mapM_ (print . Network.output smartNet sigmoid . fst) trainData diff --git a/argstest.hs b/argstest.hs new file mode 100644 index 0000000..5dff59e --- /dev/null +++ b/argstest.hs @@ -0,0 +1,18 @@ +{-# LANGUAGE DeriveDataTypeable #-} + +import System.Console.CmdArgs.Implicit + +data CostFunction = Quadratic | CrossEntropy + deriving (Show, Data) + +data Sample = Sample { lambda :: Double, eta :: Double, fileName :: String, + costFunction :: CostFunction } + deriving (Show, Data, Typeable) + +sample = Sample { lambda = 5 &= help "Lambda value for regularization", + eta = 0.5 &= help "Learning rate", + fileName = "" &= help "Load from file", + costFunction = Quadratic &= help "Cost function" } + &= summary "Sample v2" + +main = print =<< cmdArgs sample