diff --git a/.gitignore b/.gitignore index 3b66b74..9401372 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,6 @@ *.wav *.lock *_data +*.prof +labels.txt +*.png diff --git a/analysis.py b/analysis.py index fbe3a65..f211701 100644 --- a/analysis.py +++ b/analysis.py @@ -133,7 +133,7 @@ def spl_on_silence(): def non_silent_chunks(song): #song = AudioSegment.from_wav("recording.wav") - return detect_nonsilent(song, min_silence_len=400, silence_thresh=-50) + return detect_nonsilent(song, min_silence_len=10, silence_thresh=-50) def audiosegment_to_librosawav(audiosegment): @@ -175,7 +175,8 @@ def seg_is_speech(seg): offset = offset + n total += 1 - return speeches / total + #return speeches / total + return 1.0 def make_widths_equal(fig, rect, ax1, ax2, ax3, pad): @@ -193,7 +194,7 @@ if __name__ == '__main__': vad = webrtcvad.Vad() frame_duration_ms = 10 - fp = "hard_pieces.wav" + fp = "hard_piece_7.wav" y, sr = librosa.load(fp, mono=True, sr=32000) #pcm_data = y.tobytes() @@ -269,7 +270,8 @@ if __name__ == '__main__': continue max_j = i for j in range(i, n_segs): - if diffs_penalised[i,j] < 80: + if diffs[i,j] < 80: + #if diffs_penalised[i,j] < 80: max_j = j delete_segs[i:max_j] = True @@ -285,13 +287,14 @@ if __name__ == '__main__': #print("{0}\t{1}\tvad {2}".format(s1/1000, e1/1000, vad_coeff)) - fig, ax = plt.subplots(nrows=3, sharex=True) - ax[0].imshow(diffs) - ax[1].imshow(diffs_penalised) + #fig, ax = plt.subplots(nrows=3, sharex=True) + fig, ax = plt.subplots(nrows=1, sharex=True) + ax.imshow(diffs) + #ax[1].imshow(diffs_penalised) #ax[1].imshow(np.reshape(vad_coeffs, (1, n_segs))) - ax[2].imshow(np.reshape(lengths, (1, n_segs))) + #ax[2].imshow(np.reshape(lengths, (1, n_segs))) - make_widths_equal(fig, 111, ax[0], ax[1], ax[2], pad=0.5) + #make_widths_equal(fig, 111, ax[0], ax[1], ax[2], pad=0.5) plt.show() #for n, seg in enumerate(segs): # sf.write('part' + str(n) + '.wav', seg, sr) diff --git a/analysis_cont.py b/analysis_cont.py index fd5791d..f3a0bb5 100644 --- a/analysis_cont.py +++ b/analysis_cont.py @@ -12,10 +12,12 @@ import random from mpl_toolkits.axes_grid1.axes_divider import VBoxDivider import mpl_toolkits.axes_grid1.axes_size as Size import cv2 +import sys import webrtcvad min_silence_len = 400 +frame_duration_ms = 10 def calc_dtw_sim(y1, y2, sr1, sr2, plot_result=False): @@ -159,9 +161,13 @@ def samples_to_millisecond(samples, sr): return (samples / sr) * 1000 +def samples_to_time(samples, sr): + return ms_to_time(samples_to_millisecond(samples, sr)) + + def ms_to_time(ms): secs = ms / 1000 - return "{0}:{1}".format(math.floor(secs / 60), secs % 60) + return "{0}:{1:.4f}".format(math.floor(secs / 60), secs % 60) def seg_is_speech(seg): @@ -182,21 +188,29 @@ def seg_is_speech(seg): offset = offset + n total += 1 - #return speeches / total - return 1.0 + return speeches / total def calculate_best_offset(mfcc_ref, mfcc_seg, sr): return librosa.segment.cross_similarity(mfcc_seg, mfcc_ref, mode='affinity', metric='cosine') -def detect_lines(img, duration_x, duration_y): +def detect_lines(img, duration_x, duration_y, plot_result=False): #print(img.shape) #print(np.min(img), np.max(img)) - img = cv2.imread('affine_similarity.png') - gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + gray = np.vectorize(int)((1-img) * 255).astype('uint8') + img = cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR) + #print(img, type(img)) + #img = cv2.imread('affine_similarity_2.png') + #gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + #cv2.imshow("gray", gray) + #cv2.waitKey(0) + #print(gray, type(gray), gray.shape, gray.dtype) + #print(gray2, type(gray2), gray2.shape, gray2.dtype) kernel_size = 5 blur_gray = cv2.GaussianBlur(gray, (kernel_size, kernel_size), 0) + #cv2.imshow("blur gray", blur_gray) + #cv2.waitKey(0) low_threshold = 50 high_threshold = 150 @@ -207,7 +221,8 @@ def detect_lines(img, duration_x, duration_y): threshold = 15 # minimum number of votes (intersections in Hough grid cell) min_line_length = 50 # minimum number of pixels making up a line max_line_gap = 20 # maximum gap in pixels between connectable line segments - line_image = np.copy(img) * 0 # creating a blank to draw lines on + if plot_result: + line_image = np.copy(img) * 0 # creating a blank to draw lines on # Run Hough on edge detected image # Output "lines" is an array containing endpoints of detected line segments @@ -218,35 +233,54 @@ def detect_lines(img, duration_x, duration_y): scale_x = duration_x / width scale_y = duration_y / height - print(img.shape, scale_x, scale_y, duration_x, duration_y) + #print(img.shape, scale_x, scale_y, duration_x, duration_y) #slope = duration_y / duration_x slope = 1 expected_slope = scale_x / scale_y + #print(expected_slope) + #expected_slope = 1.0 # y is inverted by opencv #expected_slope = 0.101694915 - print(expected_slope) + ls = [] offsets = [] - for line in lines: - for x1,y1,x2,y2 in line: - # swapped y1 and y2 since y is measured from the top - slope = (y1-y2)/(x2-x1) - if abs(slope - expected_slope) < 0.03: - cv2.line(line_image,(x1,y1),(x2,y2),(255,0,0),5) - cv2.putText(img, "{:.2f}".format(slope), (x1, y1), fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=0.5,color=(0, 0, 255)) - if (x1 / width) < 0.15: - print(height-y1) - y = height - y1 - y0 = y - x1 * slope - offsets.append(y0 * scale_y) - #actual_lines.append((x1 * scale_x, (height - y1) * scale_y, x2 * scale_x, (height - y2) * scale_y)) + xs = [] + if lines is not None: + for line in lines: + for x1,y1,x2,y2 in line: + # swapped y1 and y2 since y is measured from the top + slope = (y2-y1)/(x2-x1) if x2 != x1 else 42 + if abs(slope - expected_slope) < 0.15:#and (x1 / width) < 0.15: + y = y1 + y0 = (y - x1 * slope) + if plot_result: + #cv2.line(line_image,(0,int(y0)),(x2,y2),(0,255,0),5) + cv2.line(line_image,(x1, y1),(x2,y2),(255,0,0),5) + cv2.putText(img, "{:.2f}".format(slope), (x1, y1), fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=2,color=(0, 0, 255)) + #if (x1 / width) < 0.15: + #print(height-y1) + #y = height - y1 + #y = y1 + #y0 = y - x1 * slope + #offsets.append(y0 * scale_y) + #xs.append(x1) + ls.append((x1, y1, slope)) + #actual_lines.append((x1 * scale_x, (height - y1) * scale_y, x2 * scale_x, (height - y2) * scale_y)) #print(max(slopes)) + x_min = min(ls, key=lambda a: a[0])[0] if len(ls) > 0 else 42 # just something > 10 + offsets = [ (y1 + (x_min - x1)*slope) * scale_y for x1, y1, slope in ls ] + if plot_result: + for x1, y1, slope in ls: + y = y1 + (x_min -x1)*slope + #cv2.line(line_image,(x_min,int(y)),(x1,y1),(0,255,0),5) - lines_edges = cv2.addWeighted(img, 0.8, line_image, 1, 0) - #cv2.imshow("lines", lines_edges) - #cv2.waitKey(0) - return offsets + #cv2.line(line_image, (x_min, 0), (x_min, height-1), (0, 0, 255), 2) + lines_edges = cv2.addWeighted(img, 0.8, line_image, 1, 0) + lines_edges_resized = cv2.resize(lines_edges, (int(1024 * duration_x / duration_y ), 1024)) + cv2.imshow("lines", lines_edges_resized) + cv2.waitKey(0) + return (x_min*scale_x, offsets) def map2d(x, y, f): @@ -259,6 +293,103 @@ def map2d(x, y, f): return res +def find_repetition(mfcc_ref, seg, sr, hop_length, sentence_timestamps, plot_result=False): + mfcc_seg = librosa.feature.mfcc(y=seg, sr=sr, hop_length=hop_length, n_mfcc=n_mfcc)[1:,:] + xsim = calculate_best_offset(mfcc_ref, mfcc_seg, sr) + x_min, offsets = detect_lines(xsim, len(seg), mfcc_ref.shape[1] * hop_length, plot_result=plot_result) + found_starts = sorted([ samples_to_millisecond(y0, sr) for y0 in offsets ]) + + def f(ts, start): + return abs(ts - start) + + closest = map2d(sentence_timestamps, found_starts, f) + if plot_result: + plt.imshow(closest) + plt.show() + latest = None + for i, row in enumerate(closest): + if len(row) == 0: + continue + if min(row) < min_silence_len / 2: + latest = sentence_timestamps[i] + return (samples_to_millisecond(x_min, sr), latest) + + +def samples_to_hops(samples, hop_length): + return round(samples / hop_length) + + +def hops_to_samples(hops, hop_length): + return round(hop_length * hops) + + +def cont_find_repetitions(y, sr, hop_length, sentence_timestamps): + assert sorted(sentence_timestamps, key=lambda t: t[0]) == sentence_timestamps + #print(y.shape) + mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=n_mfcc)[1:,:] + + step_length_ms = 200 + step_length_samples = millisecond_to_samples(step_length_ms, sr) + window_length_ms = 1500 + window_length_samples = millisecond_to_samples(window_length_ms, sr) + ref_window_length_ms = 20*1000 # 10 sekunden + ref_window_length_samples = millisecond_to_samples(ref_window_length_ms, sr) + ref_window_length_hops = samples_to_hops(ref_window_length_samples, hop_length) + + offset = 0 + available_ts = sentence_timestamps + last_sentence_end = 0 + deletion_suggestions = [] + + while offset + step_length_samples < len(y) and len(available_ts) > 0: + offset_ms = samples_to_millisecond(offset, sr) + #print(ms_to_time(offset_ms), file=sys.stderr) + if offset_ms < available_ts[0][0] and offset_ms >= last_sentence_end: + offset += step_length_samples + continue + seg = y[ offset : offset + window_length_samples ] + # no longer needed since skipping based on sentence timestamps? + #if seg_is_speech(seg) < 0.5: + # offset += step_length_samples + # continue + relevant_start = offset_ms + mfcc_window = mfcc[:,samples_to_hops(offset, hop_length) : samples_to_hops(offset, hop_length) + ref_window_length_hops] + x_offset_ms, ts_ms = find_repetition(mfcc_window, + seg, + sr, + hop_length, + [ t[0] - offset_ms for t in available_ts ]) + if ts_ms is not None and x_offset_ms < step_length_ms: + print("delete from {0} to {1}".format(samples_to_time(offset + millisecond_to_samples(x_offset_ms, sr), sr), ms_to_time(offset_ms + ts_ms))) + deletion_suggestions.append((offset_ms + x_offset_ms, offset_ms + ts_ms)) + #print("window {0} - {1} is repeated at: {2}".format(samples_to_time(offset, sr), samples_to_time(offset + window_length_samples, sr), ms_to_time(ts_ms))) + offset += step_length_samples + if offset_ms + step_length_ms > available_ts[0][0]: + last_sentence_end = available_ts[0][1] + available_ts = available_ts[1:] + #available_ts = [t for t in ts_non_sil_ms if t[0] > offset_ms ] + deletions = [] + cur_deletion = None + for sugg in deletion_suggestions: + if cur_deletion is None: + cur_deletion = [sugg] + else: + if sugg[0] - cur_deletion[-1][0] < 250: + cur_deletion.append(sugg) + else: + deletions.append(cur_deletion) + cur_deletion = [sugg] + deletions = [(np.mean([d[0] for d in ds]), np.max([d[1] for d in ds])) for ds in deletions] + for n, d in enumerate(deletions): + offs = [abs(d[0]-ts[0]) for ts in sentence_timestamps] + i = np.argmin(offs) + if offs[i] < 150: + deletions[n] = (sentence_timestamps[i][0], d[1]) + else: + deletions[n] = (d[0], d[1]) + return deletions + + def make_widths_equal(fig, rect, ax1, ax2, ax3, pad): # pad in inches divider = VBoxDivider( @@ -271,20 +402,36 @@ def make_widths_equal(fig, rect, ax1, ax2, ax3, pad): if __name__ == '__main__': - #vad = webrtcvad.Vad() - #hop_length = 128 - #n_mfcc = 13 - - #frame_duration_ms = 10 - fp = "hard_piece_7.wav" - y, sr = librosa.load(fp, mono=True) + vad = webrtcvad.Vad() + hop_length = 128 + n_mfcc = 42 + + fp = "hard_pieces.wav" + print("loading file ...") + y, sr = librosa.load(fp, mono=True, sr=32000) + print("calculating mfcc ...") + mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=n_mfcc)[1:,:] song = AudioSegment.from_wav(fp) + mf_w = mfcc.shape[1] + l = y.shape[0] + print(l / mf_w) ts_non_sil_ms = non_silent_chunks(song) - #print(y.shape) - #mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=n_mfcc)[1:,:] + #autocorr = librosa.autocorrelate(y) + #fig, ax = plt.subplots() + #ax.plot(autocorr) + #plt.show() + #ts_non_sil_ms = [ t[0] for t in non_silent_chunks(song) ] #print(mfcc.shape) + #print("finding reps ...") + dels = cont_find_repetitions(y, sr, hop_length, ts_non_sil_ms) + + for d in dels: + print("{0}\t{1}\tdelete".format(d[0]/1000, d[1]/1000)) + #window_length_ms = 1000 + #window_length_samples = millisecond_to_samples(window_length_ms, sr) + #seg = y[25280 : 25280 + window_length_samples] #seg_duration_ms = 100 #seg_duration_samples = millisecond_to_samples(seg_duration_ms, sr) @@ -305,16 +452,36 @@ if __name__ == '__main__': ##(seg, offset) = segs[0] fp_segment = "segment.wav" - seg, sr_seg = librosa.load(fp_segment, mono=True) - - assert sr==sr_seg - - ##for seg in segs: - #mfcc_seg = librosa.feature.mfcc(y=seg, sr=sr_seg, hop_length=hop_length, n_mfcc=n_mfcc)[1:,:] - #xsim = calculate_best_offset(mfcc, mfcc_seg, sr) - + #seg = y + #sr_seg = sr + seg, sr_seg = librosa.load(fp_segment, mono=True, sr=32000) + + #assert sr==sr_seg + #mfcc_window = mfcc[:,1000:] + + #x_offset, ts_ms = find_repetition(mfcc_window, seg, sr, hop_length, [ t[0] for t in ts_non_sil_ms], plot_result=True) + #if ts_ms is not None: + # print("starting from {0} the seg is repeated at {1}".format(ms_to_time(x_offset), ms_to_time(ts_ms))) + #else: + # print("no rep found") + + #cutoff = int(0.2*len(seg)) + #print(samples_to_millisecond(cutoff, sr)) + + #print("calculating xcross ...") + #xsim = librosa.segment.cross_similarity(mfcc, mfcc, mode='affinity', metric='cosine') + #chroma = librosa.feature.chroma_cqt(y=y, sr=sr, hop_length=hop_length) + #mfcc_stack = librosa.feature.stack_memory(mfcc, n_steps=10, delay=3) + #xsim = librosa.segment.recurrence_matrix(mfcc, mode='affinity', metric='cosine') + #lag = librosa.segment.recurrence_to_lag(xsim, pad=False) + + #xsim = librosa.segment.recurrence_matrix(mfcc, mode='affinity', metric='cosine', + # width=50) #fig, ax = plt.subplots(nrows=1, sharex=True) #img = librosa.display.specshow(xsim, x_axis='s', y_axis='s', hop_length=hop_length, ax=ax, cmap='magma_r') + #plt.show() + print("detecting lines ...") + #detect_lines(np.flip(xsim, 0), len(y), len(y), plot_result=True) #print(detect_lines(xsim)) #ax.imshow(np.transpose(xsim), aspect='auto') #ax[1].imshow(diffs_penalised) @@ -323,21 +490,6 @@ if __name__ == '__main__': #make_widths_equal(fig, 111, ax[0], ax[1], ax[2], pad=0.5) #plt.show() - found_starts = sorted([ samples_to_millisecond(y0, sr) for y0 in detect_lines(None, len(seg), len(y))]) - - def f(ts, start): - return abs(ts[0] - start) - - closest = map2d(ts_non_sil_ms, found_starts, f) - plt.imshow(closest) - plt.show() - latest = -1 - for i, row in enumerate(closest): - # min silence len = 400 - if min(row) < min_silence_len / 2: - latest = ts_non_sil_ms[i] - print("delete until:", ms_to_time(latest[0])) - #print("possible starts:", [ ms_to_time(t) for t in found_starts]) #for n, seg in enumerate(segs): # sf.write('part' + str(n) + '.wav', seg, sr) diff --git a/app/Diffs.hs b/app/Diffs.hs new file mode 100644 index 0000000..178cd66 --- /dev/null +++ b/app/Diffs.hs @@ -0,0 +1,89 @@ +module Diffs where + +import Data.Int (Int32, ) +import Control.Arrow (arr, (<<<), (^<<), ) +import qualified Synthesizer.Causal.Process as Causal +import qualified Sound.SoxLib as SoxLib +import qualified Data.StorableVector.Lazy as SVL +import qualified Synthesizer.Storable.Signal as SigSt +import qualified Synthesizer.Generic.Signal as Sig +import qualified Synthesizer.Basic.Binary as Bin +import qualified Synthesizer.Generic.Analysis as Ana +import qualified Synthesizer.Generic.Cut as Cut +import qualified Synthesizer.Generic.Fourier as Four +import qualified Foreign.Storable as Stor + +import qualified Number.Complex as C +import qualified Algebra.Additive as A +import qualified Algebra.Transcendental as T +import Graphics.Matplotlib + +import Driver +import Types + +calcDiff :: IO () +calcDiff = withAudio "out002.wav" $ \a' -> withAudio "out004.wav" $ \b' -> do + let a = prepare a' + b = prepare b' + maxLen = min (Cut.length a) (Cut.length b) + l = maxLen `div` 5 -- take first 20% + sa = Cut.take l a + sb = Cut.take l b + fsa = fourTrafo $ padWithZeros sa + fsb = fourTrafo $ padWithZeros sb + --negb = Causal.apply (arr (*(-1))) sb + --conjb = Causal.apply (arr conjugate) sb + let corr = Four.transformBackward + (Sig.zipWith (*) (Four.transformForward fsa) (Causal.apply (arr C.conjugate) $ Four.transformForward fsb)) + print $ Cut.length sa + print $ Cut.length sb + print $ Cut.length corr + let reals = (Causal.apply (arr $ C.real) corr) :: SVL.Vector Float + imgs = (Causal.apply (arr $ C.imag) corr) :: SVL.Vector Float + ys = SVL.unpack reals :: [Float] + --zs = SVL.unpack imgs :: [Float] + xs = [1..length ys] + onscreen $ line xs ys + --onscreen $ line xs zs + + +prepare :: SVL.Vector Int32 -> SVL.Vector (C.T Float) +prepare sig = + head . + map (Causal.apply (arr Bin.toCanonical)) . + SVL.deinterleave 2 $ sig + +readFirst :: IO (SVL.Vector Int32) +readFirst = withAudio "out003.wav" $ \sig -> do + let s = Cut.take 100 sig + return s + +--padWithZeros :: SVL.Vector (C.T Float) -> SVL.Vector (C.T Float) +padWithZeros x = pad <> x <> pad + where zeros = SVL.repeat SVL.defaultChunkSize 0 + l = SVL.length x + pad = SVL.take (l `div` 2) zeros + +sine :: SVL.Vector Float +sine = SVL.pack SVL.defaultChunkSize $ map sin [0::Float,0.1..] + +plotVec :: SVL.Vector Float -> IO () +plotVec v = let ys = SVL.unpack v + xs = [1..length ys] + in onscreen $ line xs ys + +fourTrafo :: (A.C a, T.C a, Stor.Storable a) => SVL.Vector a -> SVL.Vector a +fourTrafo = Causal.apply (arr $ C.real) . + Four.transformForward . + Causal.apply (arr C.fromReal) + +four :: FilePath -> IO () +four input = withAudio input $ \a' -> do + let a = prepare a' + sa = a + r = Four.transformForward sa + reals = (Causal.apply (arr $ C.real) r) :: SVL.Vector Float + ys = SVL.unpack reals :: [Float] + xs = [1..length ys] + print $ SVL.length r + onscreen $ line xs ys diff --git a/app/Main.hs b/app/Main.hs index ea60500..625d1d1 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -5,46 +5,63 @@ import Parser import Driver import Types -runDehum :: Flags -> FilePath -> FilePath -> IO () -runDehum flags input output = - withSound flags input $ \fmtIn params sig -> - SoxLib.withWrite - (writerInfoFromFormat fmtIn params) - output $ \fmtOut -> - SoxLib.writeStorableVectorLazy fmtOut $ - SVL.interleaveFirstPattern $ - map - (Causal.apply - (arr (Bin.fromCanonicalWith Real.roundSimple) - <<< - dehum params - <<< - arr Bin.toCanonical)) $ - SVL.deinterleave (numChannels params) sig +import qualified System.Console.GetOpt as Opt +import qualified Algebra.RealRing as Real +import qualified Data.StorableVector.Lazy as SVL +import qualified Data.List.HT as ListHT +import qualified Synthesizer.Basic.Binary as Bin +import qualified Synthesizer.Causal.Process as Causal +import Shell.Utility.Exit (exitFailureMsg) +import System.Environment (getArgs, getProgName, ) +import Control.Monad (when, ) +import Text.Printf (printf, ) +import Data.Foldable (forM_, ) +import Control.Arrow (arr, (<<<), (^<<), ) +import System.Console.GetOpt + (getOpt, usageInfo, ArgDescr(NoArg, ReqArg), ) +import qualified Sound.SoxLib as SoxLib +import Data.Int (Int32, ) -runEnvelope :: Flags -> FilePath -> FilePath -> IO () -runEnvelope flags input output = - withSound flags input $ \fmtIn params sig -> - SoxLib.withWrite - (monoInfoFromFormat fmtIn params) - output $ \fmtOut -> - SoxLib.writeStorableVectorLazy fmtOut $ - Causal.apply - (arr (Bin.fromCanonicalWith Real.roundSimple)) $ - trackEnvelope params $ - map - (Causal.apply - (arr (^2) - <<< - dehum params - <<< - arr Bin.toCanonical)) $ - SVL.deinterleave (numChannels params) sig +--runDehum :: Flags -> FilePath -> FilePath -> IO () +--runDehum flags input output = +-- withSound flags input $ \fmtIn params sig -> +-- SoxLib.withWrite +-- (writerInfoFromFormat fmtIn params) +-- output $ \fmtOut -> +-- SoxLib.writeStorableVectorLazy fmtOut $ +-- SVL.interleaveFirstPattern $ +-- map +-- (Causal.apply +-- (arr (Bin.fromCanonicalWith Real.roundSimple) +-- <<< +-- dehum params +-- <<< +-- arr Bin.toCanonical)) $ +-- SVL.deinterleave (numChannels params) sig -runSizes :: Flags -> FilePath -> IO () -runSizes flags input = - withSound flags input $ \_fmt params sig -> - mapM_ print $ pieceDurations params sig +--runEnvelope :: Flags -> FilePath -> FilePath -> IO () +--runEnvelope flags input output = +-- withSound flags input $ \fmtIn params sig -> +-- SoxLib.withWrite +-- (monoInfoFromFormat fmtIn params) +-- output $ \fmtOut -> +-- SoxLib.writeStorableVectorLazy fmtOut $ +-- Causal.apply +-- (arr (Bin.fromCanonicalWith Real.roundSimple)) $ +-- trackEnvelope params $ +-- map +-- (Causal.apply +-- (arr (^2) +-- <<< +-- dehum params +-- <<< +-- arr Bin.toCanonical)) $ +-- SVL.deinterleave (numChannels params) sig + +--runSizes :: Flags -> FilePath -> IO () +--runSizes flags input = +-- withSound flags input $ \_fmt params sig -> +-- mapM_ print $ pieceDurations params sig runLabels :: Flags -> FilePath -> IO () runLabels flags input = @@ -61,6 +78,12 @@ runLabels flags input = prefetch (preStart params) $ pieceDurations params sig +getChops :: Flags -> FilePath -> IO [SVL.Vector Int32] +getChops flags input = withSound flags input $ \_ params sig -> do + let ps = chopLazy params sig + a = show ps + putStrLn $ [last a] + return $! ps {- | > runChop flags "in.wav" "%03d.wav" -} @@ -68,10 +91,11 @@ runChop :: Flags -> FilePath -> FilePath -> IO () runChop flags input output = withSound flags input $ \fmtIn params sig -> forM_ (zip [(0::Int)..] $ chopLazy params sig) $ \(n,piece) -> - SoxLib.withWrite - (writerInfoFromFormat fmtIn params) - (printf output n) $ \fmtOut -> - SoxLib.writeStorableVectorLazy fmtOut piece + print piece + --SoxLib.withWrite + -- (writerInfoFromFormat fmtIn params) + -- (printf output n) $ \fmtOut -> + -- SoxLib.writeStorableVectorLazy fmtOut piece main :: IO () main = SoxLib.formatWith $ do @@ -83,15 +107,15 @@ main = SoxLib.formatWith $ do flags <- foldl (>>=) (return defltFlags) opts - if flagComputeEnvelope flags - then - case files of - [input,output] -> runEnvelope flags input output - [] -> exitFailureMsg "need input and output file envelope computation" - _ -> exitFailureMsg "more than two file names given" - else - case files of - [input,output] -> runChop flags input output - [input] -> runLabels flags input - [] -> exitFailureMsg "no input or output given" - _ -> exitFailureMsg "more than two file names given" + --if flagComputeEnvelope flags + -- then + -- case files of + -- [input,output] -> runEnvelope flags input output + -- [] -> exitFailureMsg "need input and output file envelope computation" + -- _ -> exitFailureMsg "more than two file names given" + -- else + case files of + --[input,output] -> runChop flags input output + [input] -> getChops flags input >>= print --runLabels flags input + [] -> exitFailureMsg "no input or output given" + _ -> exitFailureMsg "more than two file names given" diff --git a/autocut.cabal b/autocut.cabal index 6b824c7..dd7069b 100644 --- a/autocut.cabal +++ b/autocut.cabal @@ -28,6 +28,7 @@ library Driver Lib Parser + Plot Types other-modules: Paths_autocut @@ -36,6 +37,7 @@ library ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints build-depends: base >=4.7 && <5 + , matplotlib , numeric-prelude , shell-utility , soxlib @@ -48,6 +50,7 @@ library executable autocut-exe main-is: Main.hs other-modules: + Diffs Paths_autocut hs-source-dirs: app @@ -55,6 +58,7 @@ executable autocut-exe build-depends: autocut , base >=4.7 && <5 + , matplotlib , numeric-prelude , shell-utility , soxlib @@ -75,6 +79,7 @@ test-suite autocut-test build-depends: autocut , base >=4.7 && <5 + , matplotlib , numeric-prelude , shell-utility , soxlib diff --git a/package.yaml b/package.yaml index 33460bd..ca46e72 100644 --- a/package.yaml +++ b/package.yaml @@ -28,6 +28,8 @@ dependencies: - utility-ht - storablevector - shell-utility +- matplotlib +- webrtc-vad ghc-options: - -Wall diff --git a/src/Driver.hs b/src/Driver.hs index 8e475c8..17fb5a4 100644 --- a/src/Driver.hs +++ b/src/Driver.hs @@ -1,6 +1,10 @@ -module Driver where +module Driver (withSound, writerInfoFromFormat, monoInfoFromFormat, readAudio, withAudio, processAudio) where +import Data.Maybe (fromMaybe, ) +import Foreign.Storable (peek, ) +import qualified Data.StorableVector.Lazy as SVL import qualified Sound.SoxLib as SoxLib +import Data.Int (Int32, ) import Types withSound :: @@ -11,15 +15,13 @@ withSound :: withSound flags path act = SoxLib.withRead SoxLib.defaultReaderInfo path $ \fmtPtr -> do fmt <- peek fmtPtr - let numChan = - fromMaybe 1 $ SoxLib.channels $ SoxLib.signalInfo fmt - rate = - case flagSampleRate flags of - Just r -> r - Nothing -> - case SoxLib.rate $ SoxLib.signalInfo fmt of - Just r -> r - Nothing -> defaultSampleRate + let numChan = fromMaybe 1 $ SoxLib.channels $ SoxLib.signalInfo fmt + rate = case flagSampleRate flags of + Just r -> r + Nothing -> + case SoxLib.rate $ SoxLib.signalInfo fmt of + Just r -> r + Nothing -> defaultSampleRate params = Params { sampleRate = rate, @@ -35,8 +37,7 @@ withSound flags path act = (case flagBlocksize flags of SVL.ChunkSize size -> SVL.ChunkSize $ numChan * size) -monoInfoFromFormat :: - SoxLib.Format mode -> Params -> SoxLib.WriterInfo +monoInfoFromFormat :: SoxLib.Format mode -> Params -> SoxLib.WriterInfo monoInfoFromFormat fmtIn params = SoxLib.defaultWriterInfo { SoxLib.writerSignalInfo = Just $ @@ -47,8 +48,7 @@ monoInfoFromFormat fmtIn params = SoxLib.writerEncodingInfo = Just $ SoxLib.encodingInfo fmtIn } -writerInfoFromFormat :: - SoxLib.Format mode -> Params -> SoxLib.WriterInfo +writerInfoFromFormat :: SoxLib.Format mode -> Params -> SoxLib.WriterInfo writerInfoFromFormat fmtIn params = SoxLib.defaultWriterInfo { SoxLib.writerSignalInfo = Just $ @@ -57,3 +57,30 @@ writerInfoFromFormat fmtIn params = }, SoxLib.writerEncodingInfo = Just $ SoxLib.encodingInfo fmtIn } + +readAudio :: FilePath -> IO (SVL.Vector Int32) +readAudio path = do + fmtPtr <- SoxLib.openRead SoxLib.defaultReaderInfo path + fmt <- peek fmtPtr + let numChan = fromMaybe 1 $ SoxLib.channels $ SoxLib.signalInfo fmt + print numChan + + v <- SoxLib.readStorableVectorLazy fmtPtr (SVL.chunkSize $ 65536 * numChan) + SoxLib.close fmtPtr + return v + +withAudio :: FilePath -> (SVL.Vector Int32 -> IO a) -> IO a +withAudio path action = do + SoxLib.withRead SoxLib.defaultReaderInfo path $ \fmtPtr -> do + fmt <- peek fmtPtr + let numChan = fromMaybe 1 $ SoxLib.channels $ SoxLib.signalInfo fmt + v <- SoxLib.readStorableVectorLazy fmtPtr (SVL.chunkSize $ 65536 * numChan) + action v + +processAudio :: FilePath -> FilePath -> (SVL.Vector Int32 -> IO (SVL.Vector Int32)) -> IO () +processAudio input output action = + withSound defltFlags input $ \fmtIn params sig -> do + v <- action sig + SoxLib.withWrite + (writerInfoFromFormat fmtIn params) output + $ \fmtOut -> SoxLib.writeStorableVectorLazy fmtOut v diff --git a/src/Lib.hs b/src/Lib.hs index e810852..808fb40 100644 --- a/src/Lib.hs +++ b/src/Lib.hs @@ -1,4 +1,4 @@ -module Lib (someFunc) where +module Lib (prefetch, pieceDurations, chopLazy, chop) where import qualified Synthesizer.Storable.Signal as SigSt import qualified Synthesizer.ChunkySize.Cut as CutCS @@ -9,40 +9,22 @@ import qualified Synthesizer.State.Cut as Cut import qualified Synthesizer.State.Signal as Sig import qualified Synthesizer.Basic.Binary as Bin -import qualified Sound.SoxLib as SoxLib - import qualified Data.StorableVector.Lazy as SVL -import Foreign.Storable (peek, ) import qualified Control.Monad.Trans.State as MS -import Control.Monad (when, ) import Control.Arrow (arr, (<<<), (^<<), ) -import qualified Data.List.HT as ListHT import qualified Data.List as List import Data.Tuple.HT (swap, ) -import Data.Foldable (forM_, ) -import Data.Maybe (fromMaybe, ) - -import qualified System.Console.GetOpt as Opt -import System.Console.GetOpt - (getOpt, usageInfo, ArgDescr(NoArg, ReqArg), ) -import System.Environment (getArgs, getProgName, ) -import Text.Printf (printf, ) - -import qualified System.Exit as Exit -import Shell.Utility.Exit (exitFailureMsg) -import qualified Algebra.RealRing as Real import NumericPrelude.Numeric import NumericPrelude.Base import Data.Int (Int32, ) import Types -import Driver -import Prelude () +import Prelude (, ) dehum :: Params -> Causal.T Float Float dehum params = @@ -63,9 +45,11 @@ trackEnvelope params = . foldl SigSt.mix SVL.empty +-- Float -> Bool threshold :: Params -> Causal.T Float Bool threshold params = Causal.map (< pauseVolume params) +-- Bool -> Bool findStarts :: Params -> Causal.T Bool Bool findStarts params = flip Causal.fromState 0 $ \b -> @@ -73,6 +57,7 @@ findStarts params = then MS.modify succ >> evalReturn False else do n <- MS.get; MS.put 0; return (n >= minPause params) +-- Bool -> Maybe Int measurePauses :: Causal.T Bool (Maybe Int) measurePauses = flip Causal.fromState 0 $ \b -> @@ -92,7 +77,9 @@ pieceDurations params = (measurePauses <<< findStarts params <<< threshold params) . Sig.fromStorableSignal . trackEnvelope params . + -- on every channel: map (Causal.apply (arr (^2) <<< dehum params <<< arr Bin.toCanonical)) . + -- seperate channels ? SVL.deinterleave (numChannels params) pieceDurationsPrefetchLazy :: Params -> SVL.Vector Int32 -> [ChunkySize.T] @@ -114,8 +101,7 @@ prefetch n (s:ss) = then prefetch (n-s) ss else (s-n) : ss -chop, chopLazy :: - Params -> SVL.Vector Int32 -> [SVL.Vector Int32] +chop, chopLazy :: Params -> SVL.Vector Int32 -> [SVL.Vector Int32] chop params sig0 = snd $ List.mapAccumL (\sig n -> swap $ SVL.splitAt n sig) sig0 $ diff --git a/src/Parser.hs b/src/Parser.hs index 9737e28..144d0f1 100644 --- a/src/Parser.hs +++ b/src/Parser.hs @@ -1,10 +1,23 @@ module Parser where import qualified Sound.SoxLib as SoxLib +import qualified System.Console.GetOpt as Opt +import qualified Data.StorableVector.Lazy as SVL +import qualified System.Exit as Exit +import qualified Algebra.RealRing as Real +import Shell.Utility.Exit (exitFailureMsg) +import System.Console.GetOpt + (getOpt, usageInfo, ArgDescr(NoArg, ReqArg), ) +import System.Environment (getArgs, getProgName, ) +import Text.Printf (printf, ) +import NumericPrelude.Numeric +import NumericPrelude.Base import Types import Driver +import Prelude () + parseCard :: (Read a, Real.C a) => String -> String -> IO a parseCard name str = case reads str of @@ -67,21 +80,3 @@ description = return $ flags{flagComputeEnvelope = True}) "compute envelope for assistance in finding appropriate parameters" : [] - -defaultSampleRate :: SoxLib.Rate -defaultSampleRate = 44100 - -freq :: SoxLib.Rate -> (Flags -> Freq) -> (Flags -> Float) -freq sr acc flags = - (case acc flags of Freq f -> f) / realToFrac sr - -time :: SoxLib.Rate -> (Flags -> Time) -> (Flags -> Int) -time sr acc flags = - round ((case acc flags of Time t -> t) * realToFrac sr) - -formatFreq :: Freq -> String -formatFreq (Freq t) = show t -- ++ "Hz" - -formatTime :: Time -> String -formatTime (Time t) = show t -- ++ "s" - diff --git a/src/Plot.hs b/src/Plot.hs new file mode 100644 index 0000000..59f5952 --- /dev/null +++ b/src/Plot.hs @@ -0,0 +1,10 @@ +module Plot where + +import Graphics.Matplotlib + +signal :: [Double] -> [Double] +signal xs = [ (sin (x*3.14159/45) + 1) / 2 * (sin (x*3.14159/5)) | x <- xs ] + +plot :: IO () +plot = onscreen $ line xs $ signal xs + where xs = [1..1000] diff --git a/src/Types.hs b/src/Types.hs index f4dcfff..d6e899a 100644 --- a/src/Types.hs +++ b/src/Types.hs @@ -1,5 +1,8 @@ module Types where +import qualified Data.StorableVector.Lazy as SVL +import qualified Sound.SoxLib as SoxLib + newtype Time = Time Float deriving (Eq, Show) @@ -43,3 +46,20 @@ data Params = pauseVolume :: Float, minPause, preStart :: Int } + +formatFreq :: Freq -> String +formatFreq (Freq t) = show t -- ++ "Hz" + +formatTime :: Time -> String +formatTime (Time t) = show t -- ++ "s" + +defaultSampleRate :: SoxLib.Rate +defaultSampleRate = 44100 + +freq :: SoxLib.Rate -> (Flags -> Freq) -> (Flags -> Float) +freq sr acc flags = + (case acc flags of Freq f -> f) / realToFrac sr + +time :: SoxLib.Rate -> (Flags -> Time) -> (Flags -> Int) +time sr acc flags = + round ((case acc flags of Time t -> t) * realToFrac sr) diff --git a/stack.yaml b/stack.yaml index 67f6e63..490ff2a 100644 --- a/stack.yaml +++ b/stack.yaml @@ -65,3 +65,5 @@ packages: # # Allow a newer minor version of GHC than the snapshot specifies # compiler-check: newer-minor +extra-deps: + # - Chart-diagrams-1.9.3@sha256:63668daff044a79827b7edb265265a4a8237424abb8f808ad1fcbdb3d47e753d,1801