commit cfd609aa854ca728053a524e70107a73301987ac
Author: erichhasl <erich.hasl@gmail.com>
Date:   Sun Aug 27 11:39:56 2017 +0200

    initial commit

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..b751b38
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,8 @@
+*
+!*.*
+
+*.net*
+*.o
+*.hi
+*.prof
+*.swp
diff --git a/MNIST.hs b/MNIST.hs
new file mode 100644
index 0000000..4de5b9d
--- /dev/null
+++ b/MNIST.hs
@@ -0,0 +1,133 @@
+{-# LANGUAGE DeriveDataTypeable, BangPatterns #-}
+
+import qualified Data.ByteString.Lazy as BS
+
+import Data.Int
+import Data.List (findIndex, maximumBy)
+import Data.List.Split (chunksOf)
+import Data.Ord (comparing)
+import Debug.Trace (trace)
+
+import Control.DeepSeq
+
+import Numeric.LinearAlgebra
+import Codec.Compression.GZip
+import System.Random
+import System.Environment (getArgs)
+import System.Console.CmdArgs.Implicit
+
+import Network
+
+data Cost = Quadratic | CrossEntropy
+            deriving (Show, Data)
+
+toCostFunction :: Cost -> CostFunction
+toCostFunction Quadratic = QuadraticCost
+toCostFunction CrossEntropy = CrossEntropyCost
+
+data Arguments = Arguments { eta :: Double, lambda :: Double,
+                             filePath :: FilePath, costFunction :: Cost,
+                             epochs :: Int, miniBatchSize :: Int,
+                             hiddenNeurons :: Int }
+                 deriving (Show, Data, Typeable)
+
+arguments = Arguments { eta = 0.5 &= help "Learning rate",
+                        lambda = 5 &= help "Lambda of regularization",
+                        filePath = "" &= help "Load network from file",
+                        costFunction = Quadratic &= help "Cost function",
+                        epochs = 30 &= help "Number of training epochs",
+                        miniBatchSize = 10 &= help "Mini batch size",
+                        hiddenNeurons = 30 &= help "Number of neurons in hidden layer" }
+            &= summary "MNIST Image classifier in Haskell v1"
+
+readImages :: FilePath -> FilePath -> Int64 -> IO ([(Int, Vector R)])
+readImages !imgPath !lblPath !n = do
+    imgBytes <- fmap decompress (BS.readFile imgPath)
+    lblBytes <- fmap decompress (BS.readFile lblPath)
+    let !imgs = map (readImage imgBytes) [0..n-1]
+        !lbs = map (readLabel lblBytes) [0..n-1]
+    return $! zip lbs imgs
+
+readImage :: BS.ByteString -> Int64 -> Vector R
+readImage !bytes !n = vector $! map ((/256) . fromIntegral . BS.index bytes . (n*28^2 + 16 +)) [0..783]
+
+readLabel :: BS.ByteString -> Int64 -> Int
+readLabel bytes n = fromIntegral $! BS.index bytes (n + 8)
+
+toLabel :: Int -> Vector R
+toLabel n = fromList [ if i == n then 1 else 0 | i <- [0..9]]
+
+fromLabel :: Vector R -> Int
+fromLabel vec = snd $ maximumBy (comparing fst) (zip (toList vec) [0..])
+
+drawImage :: Vector R -> String
+drawImage vec = concatMap toLine (chunksOf 28 (toList vec))
+    where toLine ps = (map toChar ps) ++ "\n"
+          toChar v
+            | v > 0.5 = 'o'
+            | otherwise = '.'
+
+drawSample :: Sample Double -> String
+drawSample sample = drawImage (fst sample)
+    ++ "Label: "
+    ++ show (fromLabel $ snd sample)
+
+trainIms :: IO [(Int, Vector R)]
+trainIms = readImages "mnist-data/train-images-idx3-ubyte.gz"
+                      "mnist-data/train-labels-idx1-ubyte.gz"
+                      50000
+
+trainSamples :: IO (Samples Double)
+trainSamples = do
+    ims <- trainIms
+    return $! [ img --> toLabel lbl | (lbl, img) <- ims]
+
+testIms :: IO [(Int, Vector R)]
+testIms = readImages "mnist-data/t10k-images-idx3-ubyte.gz"
+                     "mnist-data/t10k-labels-idx1-ubyte.gz"
+                     10000
+
+testSamples :: IO (Samples Double)
+testSamples = do
+    ims <- testIms
+    return $! [ img --> toLabel lbl | (lbl, img) <- ims]
+
+classify :: Network Double -> Sample Double -> IO ()
+classify net spl = do
+    putStrLn (drawImage (fst spl)
+        ++ "Recognized as "
+        ++ show (fromLabel $ output net activation (fst spl)))
+
+test :: Network Double -> Samples Double -> Int
+test net = let f (img, lbl) = (fromLabel $ output net sigmoid img, fromLabel lbl) in
+           hits . map f
+    where hits :: [(Int, Int)] -> Int
+          hits result = sum $ map (\(a, b) -> if a == b then 1 else 0) result
+
+activation :: (Floating a) => ActivationFunction a
+activation = sigmoid
+
+activation' :: (Floating a) => ActivationFunctionDerivative a
+activation' = sigmoid'
+
+main = do
+    args <- cmdArgs arguments
+    net <- case filePath args of
+            "" -> newNetwork [784, hiddenNeurons args, 10]
+            fp -> loadNetwork fp :: IO (Network Double)
+    trSamples <- trainSamples
+    tstSamples <- testSamples
+    let bad = tstSamples `deepseq` test net tstSamples
+    putStrLn $ "Initial performance of network: recognized " ++ show bad ++ " of 10k" 
+    let debug network epochs = "Left epochs: " ++ show epochs
+            ++ " recognized: " ++ show (test network tstSamples) ++ " of 10k"
+    smartNet <- trSamples `deepseq` (trainShuffled
+        (epochs args) debug net
+        (toCostFunction $ costFunction args)
+        (lambda args) trSamples 
+        (miniBatchSize args)
+        (eta args))
+    let res = test smartNet tstSamples
+    putStrLn $ "finished testing. recognized: " ++ show res ++ " of 10k"
+    putStrLn "saving network"
+    saveNetwork "mnist.net" smartNet
diff --git a/Network.hs b/Network.hs
new file mode 100644
index 0000000..c01e5a0
--- /dev/null
+++ b/Network.hs
@@ -0,0 +1,269 @@
+{-# LANGUAGE ScopedTypeVariables, FlexibleContexts, BangPatterns #-}
+{-# OPTIONS -Wall #-}
+
+module Network where
+
+import Data.List.Split (chunksOf)
+import Data.Binary
+import Data.Maybe (fromMaybe)
+import Text.Read (readMaybe)
+
+import System.Directory
+import System.Random
+import Control.Monad (zipWithM, forM)
+import Data.Array.IO
+import Debug.Trace (trace)
+import Text.Regex.PCRE
+
+import Numeric.LinearAlgebra
+
+-- | The generic feedforward network type, a binary instance is implemented.
+-- It takes a list of layers
+-- with a minimum of one (output layer).
+-- It is usually constructed using the `newNetwork` function, initializing the matrices
+-- with some default random values.
+--
+-- > net <- newNetwork [2, 3, 4]
+data Network a = Network { layers :: [Layer a] }
+                 deriving (Show)
+
+-- | One layer of a network, storing the weights matrix and the biases vector
+-- of this layer.
+data Layer a = Layer { weights :: Matrix a, biases :: Vector a }
+               deriving (Show)
+
+instance (Element a, Binary a) => Binary (Network a) where
+    put (Network ls) = put ls
+    get = Network `fmap` get
+
+instance (Element a, Binary a) => Binary (Layer a) where
+    put (Layer ws bs) = do
+        put (toLists ws)
+        put (toList bs)
+    get = do
+        ws <- get
+        bs <- get
+        return $ Layer (fromLists ws) (fromList bs)
+
+-- | Cost Function Enum
+data CostFunction = QuadraticCost
+                  | CrossEntropyCost
+                    deriving (Show, Eq)
+
+-- | getDelta based on the raw input, the activated input and the desired output
+-- results in different values depending on the CostFunction type.
+getDelta :: Floating a => CostFunction -> a -> a -> a -> a
+getDelta QuadraticCost    z a y = (a - y) * sigmoid'(z)
+getDelta CrossEntropyCost _ a y = a - y
+
+type ActivationFunction a = a -> a
+type ActivationFunctionDerivative a = a -> a
+
+type Sample a = (Vector a, Vector a)
+type Samples a = [Sample a]
+
+-- | A simple synonym for the (,) operator, used to create samples very intuitively.
+(-->) :: Vector a -> Vector a -> Sample a
+(-->) = (,)
+
+type LearningRate = Double
+type Lambda = Double
+type TrainingDataLength = Int
+
+newNetwork :: [Int] -> IO (Network Double)
+newNetwork layerSizes
+    | length layerSizes < 2 = error "Network too small!"
+    | otherwise = do
+        lays <- zipWithM go (init layerSizes) (tail layerSizes)
+        return $ Network lays
+  where go :: Int -> Int -> IO (Layer Double)
+        go inputSize outputSize = do
+            ws <- randn outputSize inputSize
+            seed <- randomIO
+            let bs = randomVector seed Gaussian outputSize
+            return $ Layer ws bs
+
+output :: (Numeric a, Num (Vector a))
+       => Network a
+       -> ActivationFunction a
+       -> Vector a
+       -> Vector a
+output net act input = foldl f input (layers net)
+    where f vec layer = cmap act ((weights layer #> vec) + biases layer)
+
+outputs :: (Numeric a, Num (Vector a))
+        => Network a
+        -> ActivationFunction a
+        -> Vector a
+        -> [Vector a]
+outputs net act input = scanl f input (layers net)
+    where f vec layer = cmap act ((weights layer #> vec) + biases layer)
+
+rawOutputs :: (Numeric a, Num (Vector a))
+           => Network a
+           -> ActivationFunction a
+           -> Vector a
+           -> [(Vector a, Vector a)]
+rawOutputs net act input = scanl f (input, input) (layers net)
+    where f (_, a) layer = let z' = (weights layer #> a) + biases layer in
+                           (z', cmap act z')
+
+-- | The most used training function, randomly shuffling the training set before
+-- every training epoch
+--
+-- > trainShuffled 30 (\n e -> "") net CrossEntropyCost 0.5 trainData 10 0.1
+trainShuffled :: Int
+              -> (Network Double -> Int -> String)
+              -> Network Double
+              -> CostFunction
+              -> Lambda
+              -> Samples Double
+              -> Int
+              -> Double
+              -> IO (Network Double)
+trainShuffled 0 _ net _ _ _ _ _ = return net
+trainShuffled epochs debug net costFunction lambda trainSamples miniBatchSize eta = do
+    spls <- shuffle trainSamples
+    let !net' = trainSGD net costFunction lambda spls miniBatchSize eta
+    trace (debug net' epochs)
+        (trainShuffled (epochs - 1) debug net' costFunction lambda trainSamples miniBatchSize eta)
+
+
+trainNTimes :: Int
+            -> (Network Double -> Int -> String)
+            -> Network Double
+            -> CostFunction
+            -> Lambda
+            -> Samples Double
+            -> Int
+            -> Double
+            -> Network Double
+trainNTimes 0 _ net _ _ _ _ _ = net
+trainNTimes epochs debug net costFunction lambda trainSamples miniBatchSize eta =
+    trace (debug net' epochs)
+    (trainNTimes (epochs - 1) debug net' costFunction lambda trainSamples miniBatchSize eta)
+  where !net' = trainSGD net costFunction lambda trainSamples miniBatchSize eta
+
+
+trainSGD :: (Numeric Double, Floating Double)
+         => Network Double
+         -> CostFunction
+         -> Lambda
+         -> Samples Double
+         -> Int
+         -> Double
+         -> Network Double
+trainSGD net costFunction lambda trainSamples miniBatchSize eta =
+    foldl updateMiniBatch net (chunksOf miniBatchSize trainSamples)
+  where updateMiniBatch = update eta costFunction lambda (length trainSamples)
+
+update :: LearningRate
+       -> CostFunction
+       -> Lambda
+       -> TrainingDataLength
+       -> Network Double
+       -> Samples Double
+       -> Network Double
+update eta costFunction lambda n net spls = case newNablas of
+        Nothing -> net
+        Just x -> net { layers = layers' x }
+    where newNablas :: Maybe [Layer Double]
+          newNablas = foldl updateNablas Nothing spls
+          updateNablas :: Maybe [Layer Double] -> Sample Double -> Maybe [Layer Double]
+          updateNablas mayNablas sample =
+              let nablasDelta = backprop net costFunction sample
+                  f nabla nablaDelta =
+                      nabla { weights = weights nabla + weights nablaDelta,
+                              biases = biases nabla + biases nablaDelta }
+              in case mayNablas of
+                  Just nablas -> Just $ zipWith f nablas nablasDelta
+                  Nothing -> Just $ nablasDelta
+          layers' :: [Layer Double] -> [Layer Double]
+          layers' nablas = zipWith updateLayer (layers net) nablas
+          updateLayer :: Layer Double -> Layer Double -> Layer Double
+          updateLayer layer nabla =
+              let w = weights layer  -- weights matrix
+                  nw = weights nabla
+                  b = biases layer  -- biases vector
+                  nb = biases nabla
+                  fac = 1 - eta * (lambda / fromIntegral n)
+                  w' = scale fac w - scale (eta / (fromIntegral $ length spls)) nw
+                  b' = b - scale (eta / (fromIntegral $ length spls)) nb
+              in layer { weights = w', biases = b' }
+
+backprop :: Network Double -> CostFunction -> Sample Double -> [Layer Double]
+backprop net costFunction spl = finalNablas
+    where rawFeedforward :: [(Vector Double, Vector Double)]
+          rawFeedforward = reverse $ rawOutputs net sigmoid (fst spl)
+          -- get starting activation and raw value
+          headZ, headA :: Vector Double
+          (headZ, headA) = head rawFeedforward
+          -- get starting delta, based on the activation of the last layer
+          startDelta = getDelta costFunction headZ headA (snd spl)
+          -- calculate weighs of last layer in advance
+          lastNablaB = startDelta
+          lastNablaW = startDelta `outer` previousA
+            where previousA
+                    | length rawFeedforward > 1 = snd $ rawFeedforward !! 1
+                    | otherwise                 = fst spl
+          lastLayer = Layer { weights = lastNablaW, biases = lastNablaB }
+          -- reverse layers, analogy to the reversed (z, a) list
+          layersReversed = reverse $ layers net
+          -- calculate nablas, beginning at the end of the network (startDelta)
+          (finalNablas, _) = foldl calculate ([lastLayer], startDelta)
+                        [1..length layersReversed - 1]
+          -- takes the index and updates nablas
+          calculate (nablas, oldDelta) idx =
+              let -- extract raw and activated value
+                  (z, _) = rawFeedforward !! idx
+                  -- apply prime derivative of sigmoid
+                  z' = cmap sigmoid' z
+                  -- calculate new delta
+                  w = weights $ layersReversed !! (idx - 1)
+                  delta = (tr w #> oldDelta) * z'
+                  -- nablaB is just the delta vector
+                  nablaB = delta
+                  -- activation in previous layer
+                  aPrevious = snd $ rawFeedforward !! (idx + 1)
+                  -- dot product of delta and the activation in the previous layer
+                  nablaW = delta `outer` aPrevious
+                  -- put nablas into a new layer
+              in (Layer { weights = nablaW, biases = nablaB } : nablas, delta)
+
+
+sigmoid :: Floating a => ActivationFunction a
+sigmoid x = 1 / (1 + exp (-x))
+
+sigmoid' :: Floating a => ActivationFunctionDerivative a
+sigmoid' x = sigmoid x * (1 - sigmoid x)
+
+shuffle :: [a] -> IO [a]
+shuffle xs = do
+        ar <- newArr n xs
+        forM [1..n] $ \i -> do
+            j <- randomRIO (i,n)
+            vi <- readArray ar i
+            vj <- readArray ar j
+            writeArray ar j vi
+            return vj
+  where
+    n = length xs
+    newArr :: Int -> [a] -> IO (IOArray Int a)
+    newArr len lst = newListArray (1,len) lst
+
+saveNetwork :: (Element a, Binary a) => FilePath -> Network a -> IO ()
+saveNetwork fp net = do
+    ex <- doesFileExist fp
+    case ex of
+        True -> saveNetwork (newFileName fp) net
+        False -> encodeFile fp net
+
+newFileName :: FilePath -> FilePath
+newFileName fp = case fp =~ "(.+[a-z]){0,1}([0-9]*)(\\..*)" :: [[String]] of
+    [[_, p, v, s]] -> p ++ show (version v + 1) ++ s
+    _              -> fp ++ "l"
+  where version :: String -> Int
+        version xs = fromMaybe 0 (readMaybe xs :: Maybe Int)
+
+loadNetwork :: (Element a, Binary a) => FilePath -> IO (Network a)
+loadNetwork = decodeFile
diff --git a/XOR.hs b/XOR.hs
new file mode 100644
index 0000000..867c157
--- /dev/null
+++ b/XOR.hs
@@ -0,0 +1,24 @@
+import Network
+import Numeric.LinearAlgebra
+
+trainData :: Samples Double
+trainData = [(vector [0, 0], vector [0]),
+             (vector [0, 1], vector [1]),
+             (vector [1, 0], vector [1]),
+             (vector [0, 0], vector [0])]
+
+l1 = Layer { weights = (3><2) [1..], biases = vector [1..3] }
+l2 = Layer { weights = (1><3) [1..], biases = vector [1] }
+fixedNet = Network [l1, l2]
+
+debug _ _ = "Bla!"
+
+main :: IO ()
+main = do
+    net <- newNetwork [2, 3, 1]
+    {-let smartNet = update 0.06 net trainData-}
+    {-[>let smartNet = trainSGD net trainData 4 0.06<]-}
+    let debug _ _ = ""
+        smartNet = trainNTimes 100000 debug net trainData 4 100
+    putStrLn "Output after learning: "
+    mapM_ (print . Network.output smartNet sigmoid . fst) trainData
diff --git a/argstest.hs b/argstest.hs
new file mode 100644
index 0000000..5dff59e
--- /dev/null
+++ b/argstest.hs
@@ -0,0 +1,18 @@
+{-# LANGUAGE DeriveDataTypeable #-}
+
+import System.Console.CmdArgs.Implicit
+
+data CostFunction = Quadratic | CrossEntropy
+                    deriving (Show, Data)
+
+data Sample = Sample { lambda :: Double, eta :: Double, fileName :: String,
+                       costFunction :: CostFunction }
+              deriving (Show, Data, Typeable)
+
+sample = Sample { lambda = 5 &= help "Lambda value for regularization",
+                  eta = 0.5 &= help "Learning rate",
+                  fileName = "" &= help "Load from file",
+                  costFunction = Quadratic &= help "Cost function" }
+         &= summary "Sample v2"
+
+main = print =<< cmdArgs sample