Pārlūkot izejas kodu

update analysis in python, add some more hs

master
flavis pirms 3 gadiem
vecāks
revīzija
2abb417adf
Parakstījis: christian <christian@flavigny.de> GPG atslēgas ID: D953D69721B948B3
13 mainītis faili ar 493 papildinājumiem un 175 dzēšanām
  1. +3
    -0
      .gitignore
  2. +12
    -9
      analysis.py
  3. +210
    -58
      analysis_cont.py
  4. +89
    -0
      app/Diffs.hs
  5. +78
    -54
      app/Main.hs
  6. +5
    -0
      autocut.cabal
  7. +2
    -0
      package.yaml
  8. +41
    -14
      src/Driver.hs
  9. +8
    -22
      src/Lib.hs
  10. +13
    -18
      src/Parser.hs
  11. +10
    -0
      src/Plot.hs
  12. +20
    -0
      src/Types.hs
  13. +2
    -0
      stack.yaml

+ 3
- 0
.gitignore Parādīt failu

@@ -5,3 +5,6 @@
*.wav
*.lock
*_data
*.prof
labels.txt
*.png

+ 12
- 9
analysis.py Parādīt failu

@@ -133,7 +133,7 @@ def spl_on_silence():
def non_silent_chunks(song):
#song = AudioSegment.from_wav("recording.wav")

return detect_nonsilent(song, min_silence_len=400, silence_thresh=-50)
return detect_nonsilent(song, min_silence_len=10, silence_thresh=-50)


def audiosegment_to_librosawav(audiosegment):
@@ -175,7 +175,8 @@ def seg_is_speech(seg):
offset = offset + n
total += 1

return speeches / total
#return speeches / total
return 1.0


def make_widths_equal(fig, rect, ax1, ax2, ax3, pad):
@@ -193,7 +194,7 @@ if __name__ == '__main__':
vad = webrtcvad.Vad()

frame_duration_ms = 10
fp = "hard_pieces.wav"
fp = "hard_piece_7.wav"
y, sr = librosa.load(fp, mono=True, sr=32000)

#pcm_data = y.tobytes()
@@ -269,7 +270,8 @@ if __name__ == '__main__':
continue
max_j = i
for j in range(i, n_segs):
if diffs_penalised[i,j] < 80:
if diffs[i,j] < 80:
#if diffs_penalised[i,j] < 80:
max_j = j
delete_segs[i:max_j] = True
@@ -285,13 +287,14 @@ if __name__ == '__main__':
#print("{0}\t{1}\tvad {2}".format(s1/1000, e1/1000, vad_coeff))


fig, ax = plt.subplots(nrows=3, sharex=True)
ax[0].imshow(diffs)
ax[1].imshow(diffs_penalised)
#fig, ax = plt.subplots(nrows=3, sharex=True)
fig, ax = plt.subplots(nrows=1, sharex=True)
ax.imshow(diffs)
#ax[1].imshow(diffs_penalised)
#ax[1].imshow(np.reshape(vad_coeffs, (1, n_segs)))
ax[2].imshow(np.reshape(lengths, (1, n_segs)))
#ax[2].imshow(np.reshape(lengths, (1, n_segs)))

make_widths_equal(fig, 111, ax[0], ax[1], ax[2], pad=0.5)
#make_widths_equal(fig, 111, ax[0], ax[1], ax[2], pad=0.5)
plt.show()
#for n, seg in enumerate(segs):
# sf.write('part' + str(n) + '.wav', seg, sr)


+ 210
- 58
analysis_cont.py Parādīt failu

@@ -12,10 +12,12 @@ import random
from mpl_toolkits.axes_grid1.axes_divider import VBoxDivider
import mpl_toolkits.axes_grid1.axes_size as Size
import cv2
import sys

import webrtcvad

min_silence_len = 400
frame_duration_ms = 10


def calc_dtw_sim(y1, y2, sr1, sr2, plot_result=False):
@@ -159,9 +161,13 @@ def samples_to_millisecond(samples, sr):
return (samples / sr) * 1000


def samples_to_time(samples, sr):
return ms_to_time(samples_to_millisecond(samples, sr))


def ms_to_time(ms):
secs = ms / 1000
return "{0}:{1}".format(math.floor(secs / 60), secs % 60)
return "{0}:{1:.4f}".format(math.floor(secs / 60), secs % 60)


def seg_is_speech(seg):
@@ -182,21 +188,29 @@ def seg_is_speech(seg):
offset = offset + n
total += 1

#return speeches / total
return 1.0
return speeches / total


def calculate_best_offset(mfcc_ref, mfcc_seg, sr):
return librosa.segment.cross_similarity(mfcc_seg, mfcc_ref, mode='affinity', metric='cosine')


def detect_lines(img, duration_x, duration_y):
def detect_lines(img, duration_x, duration_y, plot_result=False):
#print(img.shape)
#print(np.min(img), np.max(img))
img = cv2.imread('affine_similarity.png')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray = np.vectorize(int)((1-img) * 255).astype('uint8')
img = cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR)
#print(img, type(img))
#img = cv2.imread('affine_similarity_2.png')
#gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
#cv2.imshow("gray", gray)
#cv2.waitKey(0)
#print(gray, type(gray), gray.shape, gray.dtype)
#print(gray2, type(gray2), gray2.shape, gray2.dtype)
kernel_size = 5
blur_gray = cv2.GaussianBlur(gray, (kernel_size, kernel_size), 0)
#cv2.imshow("blur gray", blur_gray)
#cv2.waitKey(0)

low_threshold = 50
high_threshold = 150
@@ -207,7 +221,8 @@ def detect_lines(img, duration_x, duration_y):
threshold = 15 # minimum number of votes (intersections in Hough grid cell)
min_line_length = 50 # minimum number of pixels making up a line
max_line_gap = 20 # maximum gap in pixels between connectable line segments
line_image = np.copy(img) * 0 # creating a blank to draw lines on
if plot_result:
line_image = np.copy(img) * 0 # creating a blank to draw lines on

# Run Hough on edge detected image
# Output "lines" is an array containing endpoints of detected line segments
@@ -218,35 +233,54 @@ def detect_lines(img, duration_x, duration_y):

scale_x = duration_x / width
scale_y = duration_y / height
print(img.shape, scale_x, scale_y, duration_x, duration_y)
#print(img.shape, scale_x, scale_y, duration_x, duration_y)

#slope = duration_y / duration_x
slope = 1

expected_slope = scale_x / scale_y
#print(expected_slope)
#expected_slope = 1.0 # y is inverted by opencv
#expected_slope = 0.101694915

print(expected_slope)
ls = []
offsets = []
for line in lines:
for x1,y1,x2,y2 in line:
# swapped y1 and y2 since y is measured from the top
slope = (y1-y2)/(x2-x1)
if abs(slope - expected_slope) < 0.03:
cv2.line(line_image,(x1,y1),(x2,y2),(255,0,0),5)
cv2.putText(img, "{:.2f}".format(slope), (x1, y1), fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=0.5,color=(0, 0, 255))
if (x1 / width) < 0.15:
print(height-y1)
y = height - y1
y0 = y - x1 * slope
offsets.append(y0 * scale_y)
#actual_lines.append((x1 * scale_x, (height - y1) * scale_y, x2 * scale_x, (height - y2) * scale_y))
xs = []
if lines is not None:
for line in lines:
for x1,y1,x2,y2 in line:
# swapped y1 and y2 since y is measured from the top
slope = (y2-y1)/(x2-x1) if x2 != x1 else 42
if abs(slope - expected_slope) < 0.15:#and (x1 / width) < 0.15:
y = y1
y0 = (y - x1 * slope)
if plot_result:
#cv2.line(line_image,(0,int(y0)),(x2,y2),(0,255,0),5)
cv2.line(line_image,(x1, y1),(x2,y2),(255,0,0),5)
cv2.putText(img, "{:.2f}".format(slope), (x1, y1), fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=2,color=(0, 0, 255))
#if (x1 / width) < 0.15:
#print(height-y1)
#y = height - y1
#y = y1
#y0 = y - x1 * slope
#offsets.append(y0 * scale_y)
#xs.append(x1)
ls.append((x1, y1, slope))
#actual_lines.append((x1 * scale_x, (height - y1) * scale_y, x2 * scale_x, (height - y2) * scale_y))
#print(max(slopes))
x_min = min(ls, key=lambda a: a[0])[0] if len(ls) > 0 else 42 # just something > 10
offsets = [ (y1 + (x_min - x1)*slope) * scale_y for x1, y1, slope in ls ]
if plot_result:
for x1, y1, slope in ls:
y = y1 + (x_min -x1)*slope
#cv2.line(line_image,(x_min,int(y)),(x1,y1),(0,255,0),5)

lines_edges = cv2.addWeighted(img, 0.8, line_image, 1, 0)
#cv2.imshow("lines", lines_edges)
#cv2.waitKey(0)
return offsets
#cv2.line(line_image, (x_min, 0), (x_min, height-1), (0, 0, 255), 2)
lines_edges = cv2.addWeighted(img, 0.8, line_image, 1, 0)
lines_edges_resized = cv2.resize(lines_edges, (int(1024 * duration_x / duration_y ), 1024))
cv2.imshow("lines", lines_edges_resized)
cv2.waitKey(0)
return (x_min*scale_x, offsets)


def map2d(x, y, f):
@@ -259,6 +293,103 @@ def map2d(x, y, f):
return res


def find_repetition(mfcc_ref, seg, sr, hop_length, sentence_timestamps, plot_result=False):
mfcc_seg = librosa.feature.mfcc(y=seg, sr=sr, hop_length=hop_length, n_mfcc=n_mfcc)[1:,:]
xsim = calculate_best_offset(mfcc_ref, mfcc_seg, sr)
x_min, offsets = detect_lines(xsim, len(seg), mfcc_ref.shape[1] * hop_length, plot_result=plot_result)
found_starts = sorted([ samples_to_millisecond(y0, sr) for y0 in offsets ])

def f(ts, start):
return abs(ts - start)

closest = map2d(sentence_timestamps, found_starts, f)
if plot_result:
plt.imshow(closest)
plt.show()
latest = None
for i, row in enumerate(closest):
if len(row) == 0:
continue
if min(row) < min_silence_len / 2:
latest = sentence_timestamps[i]
return (samples_to_millisecond(x_min, sr), latest)


def samples_to_hops(samples, hop_length):
return round(samples / hop_length)


def hops_to_samples(hops, hop_length):
return round(hop_length * hops)


def cont_find_repetitions(y, sr, hop_length, sentence_timestamps):
assert sorted(sentence_timestamps, key=lambda t: t[0]) == sentence_timestamps
#print(y.shape)
mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=n_mfcc)[1:,:]

step_length_ms = 200
step_length_samples = millisecond_to_samples(step_length_ms, sr)
window_length_ms = 1500
window_length_samples = millisecond_to_samples(window_length_ms, sr)
ref_window_length_ms = 20*1000 # 10 sekunden
ref_window_length_samples = millisecond_to_samples(ref_window_length_ms, sr)
ref_window_length_hops = samples_to_hops(ref_window_length_samples, hop_length)

offset = 0
available_ts = sentence_timestamps
last_sentence_end = 0
deletion_suggestions = []

while offset + step_length_samples < len(y) and len(available_ts) > 0:
offset_ms = samples_to_millisecond(offset, sr)
#print(ms_to_time(offset_ms), file=sys.stderr)
if offset_ms < available_ts[0][0] and offset_ms >= last_sentence_end:
offset += step_length_samples
continue
seg = y[ offset : offset + window_length_samples ]
# no longer needed since skipping based on sentence timestamps?
#if seg_is_speech(seg) < 0.5:
# offset += step_length_samples
# continue
relevant_start = offset_ms
mfcc_window = mfcc[:,samples_to_hops(offset, hop_length) : samples_to_hops(offset, hop_length) + ref_window_length_hops]
x_offset_ms, ts_ms = find_repetition(mfcc_window,
seg,
sr,
hop_length,
[ t[0] - offset_ms for t in available_ts ])
if ts_ms is not None and x_offset_ms < step_length_ms:
print("delete from {0} to {1}".format(samples_to_time(offset + millisecond_to_samples(x_offset_ms, sr), sr), ms_to_time(offset_ms + ts_ms)))
deletion_suggestions.append((offset_ms + x_offset_ms, offset_ms + ts_ms))
#print("window {0} - {1} is repeated at: {2}".format(samples_to_time(offset, sr), samples_to_time(offset + window_length_samples, sr), ms_to_time(ts_ms)))
offset += step_length_samples
if offset_ms + step_length_ms > available_ts[0][0]:
last_sentence_end = available_ts[0][1]
available_ts = available_ts[1:]
#available_ts = [t for t in ts_non_sil_ms if t[0] > offset_ms ]
deletions = []
cur_deletion = None
for sugg in deletion_suggestions:
if cur_deletion is None:
cur_deletion = [sugg]
else:
if sugg[0] - cur_deletion[-1][0] < 250:
cur_deletion.append(sugg)
else:
deletions.append(cur_deletion)
cur_deletion = [sugg]
deletions = [(np.mean([d[0] for d in ds]), np.max([d[1] for d in ds])) for ds in deletions]
for n, d in enumerate(deletions):
offs = [abs(d[0]-ts[0]) for ts in sentence_timestamps]
i = np.argmin(offs)
if offs[i] < 150:
deletions[n] = (sentence_timestamps[i][0], d[1])
else:
deletions[n] = (d[0], d[1])
return deletions


def make_widths_equal(fig, rect, ax1, ax2, ax3, pad):
# pad in inches
divider = VBoxDivider(
@@ -271,20 +402,36 @@ def make_widths_equal(fig, rect, ax1, ax2, ax3, pad):


if __name__ == '__main__':
#vad = webrtcvad.Vad()
#hop_length = 128
#n_mfcc = 13

#frame_duration_ms = 10
fp = "hard_piece_7.wav"
y, sr = librosa.load(fp, mono=True)
vad = webrtcvad.Vad()
hop_length = 128
n_mfcc = 42

fp = "hard_pieces.wav"
print("loading file ...")
y, sr = librosa.load(fp, mono=True, sr=32000)
print("calculating mfcc ...")
mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=n_mfcc)[1:,:]
song = AudioSegment.from_wav(fp)
mf_w = mfcc.shape[1]
l = y.shape[0]
print(l / mf_w)

ts_non_sil_ms = non_silent_chunks(song)

#print(y.shape)
#mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=n_mfcc)[1:,:]
#autocorr = librosa.autocorrelate(y)
#fig, ax = plt.subplots()
#ax.plot(autocorr)
#plt.show()
#ts_non_sil_ms = [ t[0] for t in non_silent_chunks(song) ]
#print(mfcc.shape)
#print("finding reps ...")
dels = cont_find_repetitions(y, sr, hop_length, ts_non_sil_ms)

for d in dels:
print("{0}\t{1}\tdelete".format(d[0]/1000, d[1]/1000))
#window_length_ms = 1000
#window_length_samples = millisecond_to_samples(window_length_ms, sr)
#seg = y[25280 : 25280 + window_length_samples]

#seg_duration_ms = 100
#seg_duration_samples = millisecond_to_samples(seg_duration_ms, sr)
@@ -305,16 +452,36 @@ if __name__ == '__main__':
##(seg, offset) = segs[0]

fp_segment = "segment.wav"
seg, sr_seg = librosa.load(fp_segment, mono=True)

assert sr==sr_seg

##for seg in segs:
#mfcc_seg = librosa.feature.mfcc(y=seg, sr=sr_seg, hop_length=hop_length, n_mfcc=n_mfcc)[1:,:]
#xsim = calculate_best_offset(mfcc, mfcc_seg, sr)

#seg = y
#sr_seg = sr
seg, sr_seg = librosa.load(fp_segment, mono=True, sr=32000)

#assert sr==sr_seg
#mfcc_window = mfcc[:,1000:]

#x_offset, ts_ms = find_repetition(mfcc_window, seg, sr, hop_length, [ t[0] for t in ts_non_sil_ms], plot_result=True)
#if ts_ms is not None:
# print("starting from {0} the seg is repeated at {1}".format(ms_to_time(x_offset), ms_to_time(ts_ms)))
#else:
# print("no rep found")

#cutoff = int(0.2*len(seg))
#print(samples_to_millisecond(cutoff, sr))

#print("calculating xcross ...")
#xsim = librosa.segment.cross_similarity(mfcc, mfcc, mode='affinity', metric='cosine')
#chroma = librosa.feature.chroma_cqt(y=y, sr=sr, hop_length=hop_length)
#mfcc_stack = librosa.feature.stack_memory(mfcc, n_steps=10, delay=3)
#xsim = librosa.segment.recurrence_matrix(mfcc, mode='affinity', metric='cosine')
#lag = librosa.segment.recurrence_to_lag(xsim, pad=False)

#xsim = librosa.segment.recurrence_matrix(mfcc, mode='affinity', metric='cosine',
# width=50)
#fig, ax = plt.subplots(nrows=1, sharex=True)
#img = librosa.display.specshow(xsim, x_axis='s', y_axis='s', hop_length=hop_length, ax=ax, cmap='magma_r')
#plt.show()
print("detecting lines ...")
#detect_lines(np.flip(xsim, 0), len(y), len(y), plot_result=True)
#print(detect_lines(xsim))
#ax.imshow(np.transpose(xsim), aspect='auto')
#ax[1].imshow(diffs_penalised)
@@ -323,21 +490,6 @@ if __name__ == '__main__':

#make_widths_equal(fig, 111, ax[0], ax[1], ax[2], pad=0.5)
#plt.show()
found_starts = sorted([ samples_to_millisecond(y0, sr) for y0 in detect_lines(None, len(seg), len(y))])

def f(ts, start):
return abs(ts[0] - start)

closest = map2d(ts_non_sil_ms, found_starts, f)
plt.imshow(closest)
plt.show()
latest = -1
for i, row in enumerate(closest):
# min silence len = 400
if min(row) < min_silence_len / 2:
latest = ts_non_sil_ms[i]
print("delete until:", ms_to_time(latest[0]))

#print("possible starts:", [ ms_to_time(t) for t in found_starts])
#for n, seg in enumerate(segs):
# sf.write('part' + str(n) + '.wav', seg, sr)


+ 89
- 0
app/Diffs.hs Parādīt failu

@@ -0,0 +1,89 @@
module Diffs where

import Data.Int (Int32, )
import Control.Arrow (arr, (<<<), (^<<), )
import qualified Synthesizer.Causal.Process as Causal
import qualified Sound.SoxLib as SoxLib
import qualified Data.StorableVector.Lazy as SVL
import qualified Synthesizer.Storable.Signal as SigSt
import qualified Synthesizer.Generic.Signal as Sig
import qualified Synthesizer.Basic.Binary as Bin
import qualified Synthesizer.Generic.Analysis as Ana
import qualified Synthesizer.Generic.Cut as Cut
import qualified Synthesizer.Generic.Fourier as Four
import qualified Foreign.Storable as Stor

import qualified Number.Complex as C
import qualified Algebra.Additive as A
import qualified Algebra.Transcendental as T
import Graphics.Matplotlib

import Driver
import Types

calcDiff :: IO ()
calcDiff = withAudio "out002.wav" $ \a' -> withAudio "out004.wav" $ \b' -> do
let a = prepare a'
b = prepare b'
maxLen = min (Cut.length a) (Cut.length b)
l = maxLen `div` 5 -- take first 20%
sa = Cut.take l a
sb = Cut.take l b
fsa = fourTrafo $ padWithZeros sa
fsb = fourTrafo $ padWithZeros sb
--negb = Causal.apply (arr (*(-1))) sb
--conjb = Causal.apply (arr conjugate) sb
let corr = Four.transformBackward
(Sig.zipWith (*) (Four.transformForward fsa) (Causal.apply (arr C.conjugate) $ Four.transformForward fsb))
print $ Cut.length sa
print $ Cut.length sb
print $ Cut.length corr
let reals = (Causal.apply (arr $ C.real) corr) :: SVL.Vector Float
imgs = (Causal.apply (arr $ C.imag) corr) :: SVL.Vector Float
ys = SVL.unpack reals :: [Float]
--zs = SVL.unpack imgs :: [Float]
xs = [1..length ys]
onscreen $ line xs ys
--onscreen $ line xs zs


prepare :: SVL.Vector Int32 -> SVL.Vector (C.T Float)
prepare sig =
head .
map (Causal.apply (arr Bin.toCanonical)) .
SVL.deinterleave 2 $ sig

readFirst :: IO (SVL.Vector Int32)
readFirst = withAudio "out003.wav" $ \sig -> do
let s = Cut.take 100 sig
return s

--padWithZeros :: SVL.Vector (C.T Float) -> SVL.Vector (C.T Float)
padWithZeros x = pad <> x <> pad
where zeros = SVL.repeat SVL.defaultChunkSize 0
l = SVL.length x
pad = SVL.take (l `div` 2) zeros

sine :: SVL.Vector Float
sine = SVL.pack SVL.defaultChunkSize $ map sin [0::Float,0.1..]

plotVec :: SVL.Vector Float -> IO ()
plotVec v = let ys = SVL.unpack v
xs = [1..length ys]
in onscreen $ line xs ys

fourTrafo :: (A.C a, T.C a, Stor.Storable a) => SVL.Vector a -> SVL.Vector a
fourTrafo = Causal.apply (arr $ C.real) .
Four.transformForward .
Causal.apply (arr C.fromReal)

four :: FilePath -> IO ()
four input = withAudio input $ \a' -> do
let a = prepare a'
sa = a
r = Four.transformForward sa
reals = (Causal.apply (arr $ C.real) r) :: SVL.Vector Float
ys = SVL.unpack reals :: [Float]
xs = [1..length ys]
print $ SVL.length r
onscreen $ line xs ys

+ 78
- 54
app/Main.hs Parādīt failu

@@ -5,46 +5,63 @@ import Parser
import Driver
import Types

runDehum :: Flags -> FilePath -> FilePath -> IO ()
runDehum flags input output =
withSound flags input $ \fmtIn params sig ->
SoxLib.withWrite
(writerInfoFromFormat fmtIn params)
output $ \fmtOut ->
SoxLib.writeStorableVectorLazy fmtOut $
SVL.interleaveFirstPattern $
map
(Causal.apply
(arr (Bin.fromCanonicalWith Real.roundSimple)
<<<
dehum params
<<<
arr Bin.toCanonical)) $
SVL.deinterleave (numChannels params) sig
import qualified System.Console.GetOpt as Opt
import qualified Algebra.RealRing as Real
import qualified Data.StorableVector.Lazy as SVL
import qualified Data.List.HT as ListHT
import qualified Synthesizer.Basic.Binary as Bin
import qualified Synthesizer.Causal.Process as Causal
import Shell.Utility.Exit (exitFailureMsg)
import System.Environment (getArgs, getProgName, )
import Control.Monad (when, )
import Text.Printf (printf, )
import Data.Foldable (forM_, )
import Control.Arrow (arr, (<<<), (^<<), )
import System.Console.GetOpt
(getOpt, usageInfo, ArgDescr(NoArg, ReqArg), )
import qualified Sound.SoxLib as SoxLib
import Data.Int (Int32, )

runEnvelope :: Flags -> FilePath -> FilePath -> IO ()
runEnvelope flags input output =
withSound flags input $ \fmtIn params sig ->
SoxLib.withWrite
(monoInfoFromFormat fmtIn params)
output $ \fmtOut ->
SoxLib.writeStorableVectorLazy fmtOut $
Causal.apply
(arr (Bin.fromCanonicalWith Real.roundSimple)) $
trackEnvelope params $
map
(Causal.apply
(arr (^2)
<<<
dehum params
<<<
arr Bin.toCanonical)) $
SVL.deinterleave (numChannels params) sig
--runDehum :: Flags -> FilePath -> FilePath -> IO ()
--runDehum flags input output =
-- withSound flags input $ \fmtIn params sig ->
-- SoxLib.withWrite
-- (writerInfoFromFormat fmtIn params)
-- output $ \fmtOut ->
-- SoxLib.writeStorableVectorLazy fmtOut $
-- SVL.interleaveFirstPattern $
-- map
-- (Causal.apply
-- (arr (Bin.fromCanonicalWith Real.roundSimple)
-- <<<
-- dehum params
-- <<<
-- arr Bin.toCanonical)) $
-- SVL.deinterleave (numChannels params) sig

runSizes :: Flags -> FilePath -> IO ()
runSizes flags input =
withSound flags input $ \_fmt params sig ->
mapM_ print $ pieceDurations params sig
--runEnvelope :: Flags -> FilePath -> FilePath -> IO ()
--runEnvelope flags input output =
-- withSound flags input $ \fmtIn params sig ->
-- SoxLib.withWrite
-- (monoInfoFromFormat fmtIn params)
-- output $ \fmtOut ->
-- SoxLib.writeStorableVectorLazy fmtOut $
-- Causal.apply
-- (arr (Bin.fromCanonicalWith Real.roundSimple)) $
-- trackEnvelope params $
-- map
-- (Causal.apply
-- (arr (^2)
-- <<<
-- dehum params
-- <<<
-- arr Bin.toCanonical)) $
-- SVL.deinterleave (numChannels params) sig

--runSizes :: Flags -> FilePath -> IO ()
--runSizes flags input =
-- withSound flags input $ \_fmt params sig ->
-- mapM_ print $ pieceDurations params sig

runLabels :: Flags -> FilePath -> IO ()
runLabels flags input =
@@ -61,6 +78,12 @@ runLabels flags input =
prefetch (preStart params) $
pieceDurations params sig

getChops :: Flags -> FilePath -> IO [SVL.Vector Int32]
getChops flags input = withSound flags input $ \_ params sig -> do
let ps = chopLazy params sig
a = show ps
putStrLn $ [last a]
return $! ps
{- |
> runChop flags "in.wav" "%03d.wav"
-}
@@ -68,10 +91,11 @@ runChop :: Flags -> FilePath -> FilePath -> IO ()
runChop flags input output =
withSound flags input $ \fmtIn params sig ->
forM_ (zip [(0::Int)..] $ chopLazy params sig) $ \(n,piece) ->
SoxLib.withWrite
(writerInfoFromFormat fmtIn params)
(printf output n) $ \fmtOut ->
SoxLib.writeStorableVectorLazy fmtOut piece
print piece
--SoxLib.withWrite
-- (writerInfoFromFormat fmtIn params)
-- (printf output n) $ \fmtOut ->
-- SoxLib.writeStorableVectorLazy fmtOut piece

main :: IO ()
main = SoxLib.formatWith $ do
@@ -83,15 +107,15 @@ main = SoxLib.formatWith $ do

flags <- foldl (>>=) (return defltFlags) opts

if flagComputeEnvelope flags
then
case files of
[input,output] -> runEnvelope flags input output
[] -> exitFailureMsg "need input and output file envelope computation"
_ -> exitFailureMsg "more than two file names given"
else
case files of
[input,output] -> runChop flags input output
[input] -> runLabels flags input
[] -> exitFailureMsg "no input or output given"
_ -> exitFailureMsg "more than two file names given"
--if flagComputeEnvelope flags
-- then
-- case files of
-- [input,output] -> runEnvelope flags input output
-- [] -> exitFailureMsg "need input and output file envelope computation"
-- _ -> exitFailureMsg "more than two file names given"
-- else
case files of
--[input,output] -> runChop flags input output
[input] -> getChops flags input >>= print --runLabels flags input
[] -> exitFailureMsg "no input or output given"
_ -> exitFailureMsg "more than two file names given"

+ 5
- 0
autocut.cabal Parādīt failu

@@ -28,6 +28,7 @@ library
Driver
Lib
Parser
Plot
Types
other-modules:
Paths_autocut
@@ -36,6 +37,7 @@ library
ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints
build-depends:
base >=4.7 && <5
, matplotlib
, numeric-prelude
, shell-utility
, soxlib
@@ -48,6 +50,7 @@ library
executable autocut-exe
main-is: Main.hs
other-modules:
Diffs
Paths_autocut
hs-source-dirs:
app
@@ -55,6 +58,7 @@ executable autocut-exe
build-depends:
autocut
, base >=4.7 && <5
, matplotlib
, numeric-prelude
, shell-utility
, soxlib
@@ -75,6 +79,7 @@ test-suite autocut-test
build-depends:
autocut
, base >=4.7 && <5
, matplotlib
, numeric-prelude
, shell-utility
, soxlib


+ 2
- 0
package.yaml Parādīt failu

@@ -28,6 +28,8 @@ dependencies:
- utility-ht
- storablevector
- shell-utility
- matplotlib
- webrtc-vad

ghc-options:
- -Wall


+ 41
- 14
src/Driver.hs Parādīt failu

@@ -1,6 +1,10 @@
module Driver where
module Driver (withSound, writerInfoFromFormat, monoInfoFromFormat, readAudio, withAudio, processAudio) where

import Data.Maybe (fromMaybe, )
import Foreign.Storable (peek, )
import qualified Data.StorableVector.Lazy as SVL
import qualified Sound.SoxLib as SoxLib
import Data.Int (Int32, )
import Types

withSound ::
@@ -11,15 +15,13 @@ withSound ::
withSound flags path act =
SoxLib.withRead SoxLib.defaultReaderInfo path $ \fmtPtr -> do
fmt <- peek fmtPtr
let numChan =
fromMaybe 1 $ SoxLib.channels $ SoxLib.signalInfo fmt
rate =
case flagSampleRate flags of
Just r -> r
Nothing ->
case SoxLib.rate $ SoxLib.signalInfo fmt of
Just r -> r
Nothing -> defaultSampleRate
let numChan = fromMaybe 1 $ SoxLib.channels $ SoxLib.signalInfo fmt
rate = case flagSampleRate flags of
Just r -> r
Nothing ->
case SoxLib.rate $ SoxLib.signalInfo fmt of
Just r -> r
Nothing -> defaultSampleRate
params =
Params {
sampleRate = rate,
@@ -35,8 +37,7 @@ withSound flags path act =
(case flagBlocksize flags of
SVL.ChunkSize size -> SVL.ChunkSize $ numChan * size)

monoInfoFromFormat ::
SoxLib.Format mode -> Params -> SoxLib.WriterInfo
monoInfoFromFormat :: SoxLib.Format mode -> Params -> SoxLib.WriterInfo
monoInfoFromFormat fmtIn params =
SoxLib.defaultWriterInfo {
SoxLib.writerSignalInfo = Just $
@@ -47,8 +48,7 @@ monoInfoFromFormat fmtIn params =
SoxLib.writerEncodingInfo = Just $ SoxLib.encodingInfo fmtIn
}

writerInfoFromFormat ::
SoxLib.Format mode -> Params -> SoxLib.WriterInfo
writerInfoFromFormat :: SoxLib.Format mode -> Params -> SoxLib.WriterInfo
writerInfoFromFormat fmtIn params =
SoxLib.defaultWriterInfo {
SoxLib.writerSignalInfo = Just $
@@ -57,3 +57,30 @@ writerInfoFromFormat fmtIn params =
},
SoxLib.writerEncodingInfo = Just $ SoxLib.encodingInfo fmtIn
}

readAudio :: FilePath -> IO (SVL.Vector Int32)
readAudio path = do
fmtPtr <- SoxLib.openRead SoxLib.defaultReaderInfo path
fmt <- peek fmtPtr
let numChan = fromMaybe 1 $ SoxLib.channels $ SoxLib.signalInfo fmt
print numChan

v <- SoxLib.readStorableVectorLazy fmtPtr (SVL.chunkSize $ 65536 * numChan)
SoxLib.close fmtPtr
return v

withAudio :: FilePath -> (SVL.Vector Int32 -> IO a) -> IO a
withAudio path action = do
SoxLib.withRead SoxLib.defaultReaderInfo path $ \fmtPtr -> do
fmt <- peek fmtPtr
let numChan = fromMaybe 1 $ SoxLib.channels $ SoxLib.signalInfo fmt
v <- SoxLib.readStorableVectorLazy fmtPtr (SVL.chunkSize $ 65536 * numChan)
action v

processAudio :: FilePath -> FilePath -> (SVL.Vector Int32 -> IO (SVL.Vector Int32)) -> IO ()
processAudio input output action =
withSound defltFlags input $ \fmtIn params sig -> do
v <- action sig
SoxLib.withWrite
(writerInfoFromFormat fmtIn params) output
$ \fmtOut -> SoxLib.writeStorableVectorLazy fmtOut v

+ 8
- 22
src/Lib.hs Parādīt failu

@@ -1,4 +1,4 @@
module Lib (someFunc) where
module Lib (prefetch, pieceDurations, chopLazy, chop) where

import qualified Synthesizer.Storable.Signal as SigSt
import qualified Synthesizer.ChunkySize.Cut as CutCS
@@ -9,40 +9,22 @@ import qualified Synthesizer.State.Cut as Cut
import qualified Synthesizer.State.Signal as Sig
import qualified Synthesizer.Basic.Binary as Bin

import qualified Sound.SoxLib as SoxLib

import qualified Data.StorableVector.Lazy as SVL
import Foreign.Storable (peek, )

import qualified Control.Monad.Trans.State as MS
import Control.Monad (when, )
import Control.Arrow (arr, (<<<), (^<<), )

import qualified Data.List.HT as ListHT
import qualified Data.List as List
import Data.Tuple.HT (swap, )
import Data.Foldable (forM_, )
import Data.Maybe (fromMaybe, )

import qualified System.Console.GetOpt as Opt
import System.Console.GetOpt
(getOpt, usageInfo, ArgDescr(NoArg, ReqArg), )
import System.Environment (getArgs, getProgName, )
import Text.Printf (printf, )

import qualified System.Exit as Exit
import Shell.Utility.Exit (exitFailureMsg)

import qualified Algebra.RealRing as Real
import NumericPrelude.Numeric
import NumericPrelude.Base

import Data.Int (Int32, )

import Types
import Driver

import Prelude ()
import Prelude (, )

dehum :: Params -> Causal.T Float Float
dehum params =
@@ -63,9 +45,11 @@ trackEnvelope params =
.
foldl SigSt.mix SVL.empty

-- Float -> Bool
threshold :: Params -> Causal.T Float Bool
threshold params = Causal.map (< pauseVolume params)

-- Bool -> Bool
findStarts :: Params -> Causal.T Bool Bool
findStarts params =
flip Causal.fromState 0 $ \b ->
@@ -73,6 +57,7 @@ findStarts params =
then MS.modify succ >> evalReturn False
else do n <- MS.get; MS.put 0; return (n >= minPause params)

-- Bool -> Maybe Int
measurePauses :: Causal.T Bool (Maybe Int)
measurePauses =
flip Causal.fromState 0 $ \b ->
@@ -92,7 +77,9 @@ pieceDurations params =
(measurePauses <<< findStarts params <<< threshold params) .
Sig.fromStorableSignal .
trackEnvelope params .
-- on every channel:
map (Causal.apply (arr (^2) <<< dehum params <<< arr Bin.toCanonical)) .
-- seperate channels ?
SVL.deinterleave (numChannels params)

pieceDurationsPrefetchLazy :: Params -> SVL.Vector Int32 -> [ChunkySize.T]
@@ -114,8 +101,7 @@ prefetch n (s:ss) =
then prefetch (n-s) ss
else (s-n) : ss

chop, chopLazy ::
Params -> SVL.Vector Int32 -> [SVL.Vector Int32]
chop, chopLazy :: Params -> SVL.Vector Int32 -> [SVL.Vector Int32]
chop params sig0 =
snd $
List.mapAccumL (\sig n -> swap $ SVL.splitAt n sig) sig0 $


+ 13
- 18
src/Parser.hs Parādīt failu

@@ -1,10 +1,23 @@
module Parser where

import qualified Sound.SoxLib as SoxLib
import qualified System.Console.GetOpt as Opt
import qualified Data.StorableVector.Lazy as SVL
import qualified System.Exit as Exit
import qualified Algebra.RealRing as Real
import Shell.Utility.Exit (exitFailureMsg)
import System.Console.GetOpt
(getOpt, usageInfo, ArgDescr(NoArg, ReqArg), )
import System.Environment (getArgs, getProgName, )
import Text.Printf (printf, )
import NumericPrelude.Numeric
import NumericPrelude.Base

import Types
import Driver

import Prelude ()

parseCard :: (Read a, Real.C a) => String -> String -> IO a
parseCard name str =
case reads str of
@@ -67,21 +80,3 @@ description =
return $ flags{flagComputeEnvelope = True})
"compute envelope for assistance in finding appropriate parameters" :
[]

defaultSampleRate :: SoxLib.Rate
defaultSampleRate = 44100

freq :: SoxLib.Rate -> (Flags -> Freq) -> (Flags -> Float)
freq sr acc flags =
(case acc flags of Freq f -> f) / realToFrac sr

time :: SoxLib.Rate -> (Flags -> Time) -> (Flags -> Int)
time sr acc flags =
round ((case acc flags of Time t -> t) * realToFrac sr)

formatFreq :: Freq -> String
formatFreq (Freq t) = show t -- ++ "Hz"

formatTime :: Time -> String
formatTime (Time t) = show t -- ++ "s"


+ 10
- 0
src/Plot.hs Parādīt failu

@@ -0,0 +1,10 @@
module Plot where

import Graphics.Matplotlib

signal :: [Double] -> [Double]
signal xs = [ (sin (x*3.14159/45) + 1) / 2 * (sin (x*3.14159/5)) | x <- xs ]

plot :: IO ()
plot = onscreen $ line xs $ signal xs
where xs = [1..1000]

+ 20
- 0
src/Types.hs Parādīt failu

@@ -1,5 +1,8 @@
module Types where

import qualified Data.StorableVector.Lazy as SVL
import qualified Sound.SoxLib as SoxLib

newtype Time = Time Float
deriving (Eq, Show)

@@ -43,3 +46,20 @@ data Params =
pauseVolume :: Float,
minPause, preStart :: Int
}

formatFreq :: Freq -> String
formatFreq (Freq t) = show t -- ++ "Hz"

formatTime :: Time -> String
formatTime (Time t) = show t -- ++ "s"

defaultSampleRate :: SoxLib.Rate
defaultSampleRate = 44100

freq :: SoxLib.Rate -> (Flags -> Freq) -> (Flags -> Float)
freq sr acc flags =
(case acc flags of Freq f -> f) / realToFrac sr

time :: SoxLib.Rate -> (Flags -> Time) -> (Flags -> Int)
time sr acc flags =
round ((case acc flags of Time t -> t) * realToFrac sr)

+ 2
- 0
stack.yaml Parādīt failu

@@ -65,3 +65,5 @@ packages:
#
# Allow a newer minor version of GHC than the snapshot specifies
# compiler-check: newer-minor
extra-deps:
# - Chart-diagrams-1.9.3@sha256:63668daff044a79827b7edb265265a4a8237424abb8f808ad1fcbdb3d47e753d,1801

Notiek ielāde…
Atcelt
Saglabāt