浏览代码

initial commit

master
erichhasl 8 年前
当前提交
cfd609aa85
共有 5 个文件被更改,包括 452 次插入0 次删除
  1. +8
    -0
      .gitignore
  2. +133
    -0
      MNIST.hs
  3. +269
    -0
      Network.hs
  4. +24
    -0
      XOR.hs
  5. +18
    -0
      argstest.hs

+ 8
- 0
.gitignore 查看文件

@@ -0,0 +1,8 @@
*
!*.*

*.net*
*.o
*.hi
*.prof
*.swp

+ 133
- 0
MNIST.hs 查看文件

@@ -0,0 +1,133 @@
{-# LANGUAGE DeriveDataTypeable, BangPatterns #-}

import qualified Data.ByteString.Lazy as BS

import Data.Int
import Data.List (findIndex, maximumBy)
import Data.List.Split (chunksOf)
import Data.Ord (comparing)
import Debug.Trace (trace)

import Control.DeepSeq

import Numeric.LinearAlgebra
import Codec.Compression.GZip
import System.Random
import System.Environment (getArgs)
import System.Console.CmdArgs.Implicit

import Network

data Cost = Quadratic | CrossEntropy
deriving (Show, Data)

toCostFunction :: Cost -> CostFunction
toCostFunction Quadratic = QuadraticCost
toCostFunction CrossEntropy = CrossEntropyCost

data Arguments = Arguments { eta :: Double, lambda :: Double,
filePath :: FilePath, costFunction :: Cost,
epochs :: Int, miniBatchSize :: Int,
hiddenNeurons :: Int }
deriving (Show, Data, Typeable)

arguments = Arguments { eta = 0.5 &= help "Learning rate",
lambda = 5 &= help "Lambda of regularization",
filePath = "" &= help "Load network from file",
costFunction = Quadratic &= help "Cost function",
epochs = 30 &= help "Number of training epochs",
miniBatchSize = 10 &= help "Mini batch size",
hiddenNeurons = 30 &= help "Number of neurons in hidden layer" }
&= summary "MNIST Image classifier in Haskell v1"

readImages :: FilePath -> FilePath -> Int64 -> IO ([(Int, Vector R)])
readImages !imgPath !lblPath !n = do
imgBytes <- fmap decompress (BS.readFile imgPath)
lblBytes <- fmap decompress (BS.readFile lblPath)
let !imgs = map (readImage imgBytes) [0..n-1]
!lbs = map (readLabel lblBytes) [0..n-1]
return $! zip lbs imgs

readImage :: BS.ByteString -> Int64 -> Vector R
readImage !bytes !n = vector $! map ((/256) . fromIntegral . BS.index bytes . (n*28^2 + 16 +)) [0..783]

readLabel :: BS.ByteString -> Int64 -> Int
readLabel bytes n = fromIntegral $! BS.index bytes (n + 8)

toLabel :: Int -> Vector R
toLabel n = fromList [ if i == n then 1 else 0 | i <- [0..9]]

fromLabel :: Vector R -> Int
fromLabel vec = snd $ maximumBy (comparing fst) (zip (toList vec) [0..])

drawImage :: Vector R -> String
drawImage vec = concatMap toLine (chunksOf 28 (toList vec))
where toLine ps = (map toChar ps) ++ "\n"
toChar v
| v > 0.5 = 'o'
| otherwise = '.'

drawSample :: Sample Double -> String
drawSample sample = drawImage (fst sample)
++ "Label: "
++ show (fromLabel $ snd sample)

trainIms :: IO [(Int, Vector R)]
trainIms = readImages "mnist-data/train-images-idx3-ubyte.gz"
"mnist-data/train-labels-idx1-ubyte.gz"
50000

trainSamples :: IO (Samples Double)
trainSamples = do
ims <- trainIms
return $! [ img --> toLabel lbl | (lbl, img) <- ims]

testIms :: IO [(Int, Vector R)]
testIms = readImages "mnist-data/t10k-images-idx3-ubyte.gz"
"mnist-data/t10k-labels-idx1-ubyte.gz"
10000

testSamples :: IO (Samples Double)
testSamples = do
ims <- testIms
return $! [ img --> toLabel lbl | (lbl, img) <- ims]

classify :: Network Double -> Sample Double -> IO ()
classify net spl = do
putStrLn (drawImage (fst spl)
++ "Recognized as "
++ show (fromLabel $ output net activation (fst spl)))

test :: Network Double -> Samples Double -> Int
test net = let f (img, lbl) = (fromLabel $ output net sigmoid img, fromLabel lbl) in
hits . map f
where hits :: [(Int, Int)] -> Int
hits result = sum $ map (\(a, b) -> if a == b then 1 else 0) result

activation :: (Floating a) => ActivationFunction a
activation = sigmoid

activation' :: (Floating a) => ActivationFunctionDerivative a
activation' = sigmoid'

main = do
args <- cmdArgs arguments
net <- case filePath args of
"" -> newNetwork [784, hiddenNeurons args, 10]
fp -> loadNetwork fp :: IO (Network Double)
trSamples <- trainSamples
tstSamples <- testSamples
let bad = tstSamples `deepseq` test net tstSamples
putStrLn $ "Initial performance of network: recognized " ++ show bad ++ " of 10k"
let debug network epochs = "Left epochs: " ++ show epochs
++ " recognized: " ++ show (test network tstSamples) ++ " of 10k"
smartNet <- trSamples `deepseq` (trainShuffled
(epochs args) debug net
(toCostFunction $ costFunction args)
(lambda args) trSamples
(miniBatchSize args)
(eta args))
let res = test smartNet tstSamples
putStrLn $ "finished testing. recognized: " ++ show res ++ " of 10k"
putStrLn "saving network"
saveNetwork "mnist.net" smartNet

+ 269
- 0
Network.hs 查看文件

@@ -0,0 +1,269 @@
{-# LANGUAGE ScopedTypeVariables, FlexibleContexts, BangPatterns #-}
{-# OPTIONS -Wall #-}

module Network where

import Data.List.Split (chunksOf)
import Data.Binary
import Data.Maybe (fromMaybe)
import Text.Read (readMaybe)

import System.Directory
import System.Random
import Control.Monad (zipWithM, forM)
import Data.Array.IO
import Debug.Trace (trace)
import Text.Regex.PCRE

import Numeric.LinearAlgebra

-- | The generic feedforward network type, a binary instance is implemented.
-- It takes a list of layers
-- with a minimum of one (output layer).
-- It is usually constructed using the `newNetwork` function, initializing the matrices
-- with some default random values.
--
-- > net <- newNetwork [2, 3, 4]
data Network a = Network { layers :: [Layer a] }
deriving (Show)

-- | One layer of a network, storing the weights matrix and the biases vector
-- of this layer.
data Layer a = Layer { weights :: Matrix a, biases :: Vector a }
deriving (Show)

instance (Element a, Binary a) => Binary (Network a) where
put (Network ls) = put ls
get = Network `fmap` get

instance (Element a, Binary a) => Binary (Layer a) where
put (Layer ws bs) = do
put (toLists ws)
put (toList bs)
get = do
ws <- get
bs <- get
return $ Layer (fromLists ws) (fromList bs)

-- | Cost Function Enum
data CostFunction = QuadraticCost
| CrossEntropyCost
deriving (Show, Eq)

-- | getDelta based on the raw input, the activated input and the desired output
-- results in different values depending on the CostFunction type.
getDelta :: Floating a => CostFunction -> a -> a -> a -> a
getDelta QuadraticCost z a y = (a - y) * sigmoid'(z)
getDelta CrossEntropyCost _ a y = a - y

type ActivationFunction a = a -> a
type ActivationFunctionDerivative a = a -> a

type Sample a = (Vector a, Vector a)
type Samples a = [Sample a]

-- | A simple synonym for the (,) operator, used to create samples very intuitively.
(-->) :: Vector a -> Vector a -> Sample a
(-->) = (,)

type LearningRate = Double
type Lambda = Double
type TrainingDataLength = Int

newNetwork :: [Int] -> IO (Network Double)
newNetwork layerSizes
| length layerSizes < 2 = error "Network too small!"
| otherwise = do
lays <- zipWithM go (init layerSizes) (tail layerSizes)
return $ Network lays
where go :: Int -> Int -> IO (Layer Double)
go inputSize outputSize = do
ws <- randn outputSize inputSize
seed <- randomIO
let bs = randomVector seed Gaussian outputSize
return $ Layer ws bs

output :: (Numeric a, Num (Vector a))
=> Network a
-> ActivationFunction a
-> Vector a
-> Vector a
output net act input = foldl f input (layers net)
where f vec layer = cmap act ((weights layer #> vec) + biases layer)

outputs :: (Numeric a, Num (Vector a))
=> Network a
-> ActivationFunction a
-> Vector a
-> [Vector a]
outputs net act input = scanl f input (layers net)
where f vec layer = cmap act ((weights layer #> vec) + biases layer)

rawOutputs :: (Numeric a, Num (Vector a))
=> Network a
-> ActivationFunction a
-> Vector a
-> [(Vector a, Vector a)]
rawOutputs net act input = scanl f (input, input) (layers net)
where f (_, a) layer = let z' = (weights layer #> a) + biases layer in
(z', cmap act z')

-- | The most used training function, randomly shuffling the training set before
-- every training epoch
--
-- > trainShuffled 30 (\n e -> "") net CrossEntropyCost 0.5 trainData 10 0.1
trainShuffled :: Int
-> (Network Double -> Int -> String)
-> Network Double
-> CostFunction
-> Lambda
-> Samples Double
-> Int
-> Double
-> IO (Network Double)
trainShuffled 0 _ net _ _ _ _ _ = return net
trainShuffled epochs debug net costFunction lambda trainSamples miniBatchSize eta = do
spls <- shuffle trainSamples
let !net' = trainSGD net costFunction lambda spls miniBatchSize eta
trace (debug net' epochs)
(trainShuffled (epochs - 1) debug net' costFunction lambda trainSamples miniBatchSize eta)


trainNTimes :: Int
-> (Network Double -> Int -> String)
-> Network Double
-> CostFunction
-> Lambda
-> Samples Double
-> Int
-> Double
-> Network Double
trainNTimes 0 _ net _ _ _ _ _ = net
trainNTimes epochs debug net costFunction lambda trainSamples miniBatchSize eta =
trace (debug net' epochs)
(trainNTimes (epochs - 1) debug net' costFunction lambda trainSamples miniBatchSize eta)
where !net' = trainSGD net costFunction lambda trainSamples miniBatchSize eta


trainSGD :: (Numeric Double, Floating Double)
=> Network Double
-> CostFunction
-> Lambda
-> Samples Double
-> Int
-> Double
-> Network Double
trainSGD net costFunction lambda trainSamples miniBatchSize eta =
foldl updateMiniBatch net (chunksOf miniBatchSize trainSamples)
where updateMiniBatch = update eta costFunction lambda (length trainSamples)

update :: LearningRate
-> CostFunction
-> Lambda
-> TrainingDataLength
-> Network Double
-> Samples Double
-> Network Double
update eta costFunction lambda n net spls = case newNablas of
Nothing -> net
Just x -> net { layers = layers' x }
where newNablas :: Maybe [Layer Double]
newNablas = foldl updateNablas Nothing spls
updateNablas :: Maybe [Layer Double] -> Sample Double -> Maybe [Layer Double]
updateNablas mayNablas sample =
let nablasDelta = backprop net costFunction sample
f nabla nablaDelta =
nabla { weights = weights nabla + weights nablaDelta,
biases = biases nabla + biases nablaDelta }
in case mayNablas of
Just nablas -> Just $ zipWith f nablas nablasDelta
Nothing -> Just $ nablasDelta
layers' :: [Layer Double] -> [Layer Double]
layers' nablas = zipWith updateLayer (layers net) nablas
updateLayer :: Layer Double -> Layer Double -> Layer Double
updateLayer layer nabla =
let w = weights layer -- weights matrix
nw = weights nabla
b = biases layer -- biases vector
nb = biases nabla
fac = 1 - eta * (lambda / fromIntegral n)
w' = scale fac w - scale (eta / (fromIntegral $ length spls)) nw
b' = b - scale (eta / (fromIntegral $ length spls)) nb
in layer { weights = w', biases = b' }

backprop :: Network Double -> CostFunction -> Sample Double -> [Layer Double]
backprop net costFunction spl = finalNablas
where rawFeedforward :: [(Vector Double, Vector Double)]
rawFeedforward = reverse $ rawOutputs net sigmoid (fst spl)
-- get starting activation and raw value
headZ, headA :: Vector Double
(headZ, headA) = head rawFeedforward
-- get starting delta, based on the activation of the last layer
startDelta = getDelta costFunction headZ headA (snd spl)
-- calculate weighs of last layer in advance
lastNablaB = startDelta
lastNablaW = startDelta `outer` previousA
where previousA
| length rawFeedforward > 1 = snd $ rawFeedforward !! 1
| otherwise = fst spl
lastLayer = Layer { weights = lastNablaW, biases = lastNablaB }
-- reverse layers, analogy to the reversed (z, a) list
layersReversed = reverse $ layers net
-- calculate nablas, beginning at the end of the network (startDelta)
(finalNablas, _) = foldl calculate ([lastLayer], startDelta)
[1..length layersReversed - 1]
-- takes the index and updates nablas
calculate (nablas, oldDelta) idx =
let -- extract raw and activated value
(z, _) = rawFeedforward !! idx
-- apply prime derivative of sigmoid
z' = cmap sigmoid' z
-- calculate new delta
w = weights $ layersReversed !! (idx - 1)
delta = (tr w #> oldDelta) * z'
-- nablaB is just the delta vector
nablaB = delta
-- activation in previous layer
aPrevious = snd $ rawFeedforward !! (idx + 1)
-- dot product of delta and the activation in the previous layer
nablaW = delta `outer` aPrevious
-- put nablas into a new layer
in (Layer { weights = nablaW, biases = nablaB } : nablas, delta)


sigmoid :: Floating a => ActivationFunction a
sigmoid x = 1 / (1 + exp (-x))

sigmoid' :: Floating a => ActivationFunctionDerivative a
sigmoid' x = sigmoid x * (1 - sigmoid x)

shuffle :: [a] -> IO [a]
shuffle xs = do
ar <- newArr n xs
forM [1..n] $ \i -> do
j <- randomRIO (i,n)
vi <- readArray ar i
vj <- readArray ar j
writeArray ar j vi
return vj
where
n = length xs
newArr :: Int -> [a] -> IO (IOArray Int a)
newArr len lst = newListArray (1,len) lst

saveNetwork :: (Element a, Binary a) => FilePath -> Network a -> IO ()
saveNetwork fp net = do
ex <- doesFileExist fp
case ex of
True -> saveNetwork (newFileName fp) net
False -> encodeFile fp net

newFileName :: FilePath -> FilePath
newFileName fp = case fp =~ "(.+[a-z]){0,1}([0-9]*)(\\..*)" :: [[String]] of
[[_, p, v, s]] -> p ++ show (version v + 1) ++ s
_ -> fp ++ "l"
where version :: String -> Int
version xs = fromMaybe 0 (readMaybe xs :: Maybe Int)

loadNetwork :: (Element a, Binary a) => FilePath -> IO (Network a)
loadNetwork = decodeFile

+ 24
- 0
XOR.hs 查看文件

@@ -0,0 +1,24 @@
import Network
import Numeric.LinearAlgebra

trainData :: Samples Double
trainData = [(vector [0, 0], vector [0]),
(vector [0, 1], vector [1]),
(vector [1, 0], vector [1]),
(vector [0, 0], vector [0])]

l1 = Layer { weights = (3><2) [1..], biases = vector [1..3] }
l2 = Layer { weights = (1><3) [1..], biases = vector [1] }
fixedNet = Network [l1, l2]

debug _ _ = "Bla!"

main :: IO ()
main = do
net <- newNetwork [2, 3, 1]
{-let smartNet = update 0.06 net trainData-}
{-[>let smartNet = trainSGD net trainData 4 0.06<]-}
let debug _ _ = ""
smartNet = trainNTimes 100000 debug net trainData 4 100
putStrLn "Output after learning: "
mapM_ (print . Network.output smartNet sigmoid . fst) trainData

+ 18
- 0
argstest.hs 查看文件

@@ -0,0 +1,18 @@
{-# LANGUAGE DeriveDataTypeable #-}

import System.Console.CmdArgs.Implicit

data CostFunction = Quadratic | CrossEntropy
deriving (Show, Data)

data Sample = Sample { lambda :: Double, eta :: Double, fileName :: String,
costFunction :: CostFunction }
deriving (Show, Data, Typeable)

sample = Sample { lambda = 5 &= help "Lambda value for regularization",
eta = 0.5 &= help "Learning rate",
fileName = "" &= help "Load from file",
costFunction = Quadratic &= help "Cost function" }
&= summary "Sample v2"

main = print =<< cmdArgs sample

正在加载...
取消
保存