| @@ -5,3 +5,6 @@ | |||
| *.wav | |||
| *.lock | |||
| *_data | |||
| *.prof | |||
| labels.txt | |||
| *.png | |||
| @@ -133,7 +133,7 @@ def spl_on_silence(): | |||
| def non_silent_chunks(song): | |||
| #song = AudioSegment.from_wav("recording.wav") | |||
| return detect_nonsilent(song, min_silence_len=400, silence_thresh=-50) | |||
| return detect_nonsilent(song, min_silence_len=10, silence_thresh=-50) | |||
| def audiosegment_to_librosawav(audiosegment): | |||
| @@ -175,7 +175,8 @@ def seg_is_speech(seg): | |||
| offset = offset + n | |||
| total += 1 | |||
| return speeches / total | |||
| #return speeches / total | |||
| return 1.0 | |||
| def make_widths_equal(fig, rect, ax1, ax2, ax3, pad): | |||
| @@ -193,7 +194,7 @@ if __name__ == '__main__': | |||
| vad = webrtcvad.Vad() | |||
| frame_duration_ms = 10 | |||
| fp = "hard_pieces.wav" | |||
| fp = "hard_piece_7.wav" | |||
| y, sr = librosa.load(fp, mono=True, sr=32000) | |||
| #pcm_data = y.tobytes() | |||
| @@ -269,7 +270,8 @@ if __name__ == '__main__': | |||
| continue | |||
| max_j = i | |||
| for j in range(i, n_segs): | |||
| if diffs_penalised[i,j] < 80: | |||
| if diffs[i,j] < 80: | |||
| #if diffs_penalised[i,j] < 80: | |||
| max_j = j | |||
| delete_segs[i:max_j] = True | |||
| @@ -285,13 +287,14 @@ if __name__ == '__main__': | |||
| #print("{0}\t{1}\tvad {2}".format(s1/1000, e1/1000, vad_coeff)) | |||
| fig, ax = plt.subplots(nrows=3, sharex=True) | |||
| ax[0].imshow(diffs) | |||
| ax[1].imshow(diffs_penalised) | |||
| #fig, ax = plt.subplots(nrows=3, sharex=True) | |||
| fig, ax = plt.subplots(nrows=1, sharex=True) | |||
| ax.imshow(diffs) | |||
| #ax[1].imshow(diffs_penalised) | |||
| #ax[1].imshow(np.reshape(vad_coeffs, (1, n_segs))) | |||
| ax[2].imshow(np.reshape(lengths, (1, n_segs))) | |||
| #ax[2].imshow(np.reshape(lengths, (1, n_segs))) | |||
| make_widths_equal(fig, 111, ax[0], ax[1], ax[2], pad=0.5) | |||
| #make_widths_equal(fig, 111, ax[0], ax[1], ax[2], pad=0.5) | |||
| plt.show() | |||
| #for n, seg in enumerate(segs): | |||
| # sf.write('part' + str(n) + '.wav', seg, sr) | |||
| @@ -12,10 +12,12 @@ import random | |||
| from mpl_toolkits.axes_grid1.axes_divider import VBoxDivider | |||
| import mpl_toolkits.axes_grid1.axes_size as Size | |||
| import cv2 | |||
| import sys | |||
| import webrtcvad | |||
| min_silence_len = 400 | |||
| frame_duration_ms = 10 | |||
| def calc_dtw_sim(y1, y2, sr1, sr2, plot_result=False): | |||
| @@ -159,9 +161,13 @@ def samples_to_millisecond(samples, sr): | |||
| return (samples / sr) * 1000 | |||
| def samples_to_time(samples, sr): | |||
| return ms_to_time(samples_to_millisecond(samples, sr)) | |||
| def ms_to_time(ms): | |||
| secs = ms / 1000 | |||
| return "{0}:{1}".format(math.floor(secs / 60), secs % 60) | |||
| return "{0}:{1:.4f}".format(math.floor(secs / 60), secs % 60) | |||
| def seg_is_speech(seg): | |||
| @@ -182,21 +188,29 @@ def seg_is_speech(seg): | |||
| offset = offset + n | |||
| total += 1 | |||
| #return speeches / total | |||
| return 1.0 | |||
| return speeches / total | |||
| def calculate_best_offset(mfcc_ref, mfcc_seg, sr): | |||
| return librosa.segment.cross_similarity(mfcc_seg, mfcc_ref, mode='affinity', metric='cosine') | |||
| def detect_lines(img, duration_x, duration_y): | |||
| def detect_lines(img, duration_x, duration_y, plot_result=False): | |||
| #print(img.shape) | |||
| #print(np.min(img), np.max(img)) | |||
| img = cv2.imread('affine_similarity.png') | |||
| gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |||
| gray = np.vectorize(int)((1-img) * 255).astype('uint8') | |||
| img = cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR) | |||
| #print(img, type(img)) | |||
| #img = cv2.imread('affine_similarity_2.png') | |||
| #gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |||
| #cv2.imshow("gray", gray) | |||
| #cv2.waitKey(0) | |||
| #print(gray, type(gray), gray.shape, gray.dtype) | |||
| #print(gray2, type(gray2), gray2.shape, gray2.dtype) | |||
| kernel_size = 5 | |||
| blur_gray = cv2.GaussianBlur(gray, (kernel_size, kernel_size), 0) | |||
| #cv2.imshow("blur gray", blur_gray) | |||
| #cv2.waitKey(0) | |||
| low_threshold = 50 | |||
| high_threshold = 150 | |||
| @@ -207,7 +221,8 @@ def detect_lines(img, duration_x, duration_y): | |||
| threshold = 15 # minimum number of votes (intersections in Hough grid cell) | |||
| min_line_length = 50 # minimum number of pixels making up a line | |||
| max_line_gap = 20 # maximum gap in pixels between connectable line segments | |||
| line_image = np.copy(img) * 0 # creating a blank to draw lines on | |||
| if plot_result: | |||
| line_image = np.copy(img) * 0 # creating a blank to draw lines on | |||
| # Run Hough on edge detected image | |||
| # Output "lines" is an array containing endpoints of detected line segments | |||
| @@ -218,35 +233,54 @@ def detect_lines(img, duration_x, duration_y): | |||
| scale_x = duration_x / width | |||
| scale_y = duration_y / height | |||
| print(img.shape, scale_x, scale_y, duration_x, duration_y) | |||
| #print(img.shape, scale_x, scale_y, duration_x, duration_y) | |||
| #slope = duration_y / duration_x | |||
| slope = 1 | |||
| expected_slope = scale_x / scale_y | |||
| #print(expected_slope) | |||
| #expected_slope = 1.0 # y is inverted by opencv | |||
| #expected_slope = 0.101694915 | |||
| print(expected_slope) | |||
| ls = [] | |||
| offsets = [] | |||
| for line in lines: | |||
| for x1,y1,x2,y2 in line: | |||
| # swapped y1 and y2 since y is measured from the top | |||
| slope = (y1-y2)/(x2-x1) | |||
| if abs(slope - expected_slope) < 0.03: | |||
| cv2.line(line_image,(x1,y1),(x2,y2),(255,0,0),5) | |||
| cv2.putText(img, "{:.2f}".format(slope), (x1, y1), fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=0.5,color=(0, 0, 255)) | |||
| if (x1 / width) < 0.15: | |||
| print(height-y1) | |||
| y = height - y1 | |||
| y0 = y - x1 * slope | |||
| offsets.append(y0 * scale_y) | |||
| #actual_lines.append((x1 * scale_x, (height - y1) * scale_y, x2 * scale_x, (height - y2) * scale_y)) | |||
| xs = [] | |||
| if lines is not None: | |||
| for line in lines: | |||
| for x1,y1,x2,y2 in line: | |||
| # swapped y1 and y2 since y is measured from the top | |||
| slope = (y2-y1)/(x2-x1) if x2 != x1 else 42 | |||
| if abs(slope - expected_slope) < 0.15:#and (x1 / width) < 0.15: | |||
| y = y1 | |||
| y0 = (y - x1 * slope) | |||
| if plot_result: | |||
| #cv2.line(line_image,(0,int(y0)),(x2,y2),(0,255,0),5) | |||
| cv2.line(line_image,(x1, y1),(x2,y2),(255,0,0),5) | |||
| cv2.putText(img, "{:.2f}".format(slope), (x1, y1), fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=2,color=(0, 0, 255)) | |||
| #if (x1 / width) < 0.15: | |||
| #print(height-y1) | |||
| #y = height - y1 | |||
| #y = y1 | |||
| #y0 = y - x1 * slope | |||
| #offsets.append(y0 * scale_y) | |||
| #xs.append(x1) | |||
| ls.append((x1, y1, slope)) | |||
| #actual_lines.append((x1 * scale_x, (height - y1) * scale_y, x2 * scale_x, (height - y2) * scale_y)) | |||
| #print(max(slopes)) | |||
| x_min = min(ls, key=lambda a: a[0])[0] if len(ls) > 0 else 42 # just something > 10 | |||
| offsets = [ (y1 + (x_min - x1)*slope) * scale_y for x1, y1, slope in ls ] | |||
| if plot_result: | |||
| for x1, y1, slope in ls: | |||
| y = y1 + (x_min -x1)*slope | |||
| #cv2.line(line_image,(x_min,int(y)),(x1,y1),(0,255,0),5) | |||
| lines_edges = cv2.addWeighted(img, 0.8, line_image, 1, 0) | |||
| #cv2.imshow("lines", lines_edges) | |||
| #cv2.waitKey(0) | |||
| return offsets | |||
| #cv2.line(line_image, (x_min, 0), (x_min, height-1), (0, 0, 255), 2) | |||
| lines_edges = cv2.addWeighted(img, 0.8, line_image, 1, 0) | |||
| lines_edges_resized = cv2.resize(lines_edges, (int(1024 * duration_x / duration_y ), 1024)) | |||
| cv2.imshow("lines", lines_edges_resized) | |||
| cv2.waitKey(0) | |||
| return (x_min*scale_x, offsets) | |||
| def map2d(x, y, f): | |||
| @@ -259,6 +293,103 @@ def map2d(x, y, f): | |||
| return res | |||
| def find_repetition(mfcc_ref, seg, sr, hop_length, sentence_timestamps, plot_result=False): | |||
| mfcc_seg = librosa.feature.mfcc(y=seg, sr=sr, hop_length=hop_length, n_mfcc=n_mfcc)[1:,:] | |||
| xsim = calculate_best_offset(mfcc_ref, mfcc_seg, sr) | |||
| x_min, offsets = detect_lines(xsim, len(seg), mfcc_ref.shape[1] * hop_length, plot_result=plot_result) | |||
| found_starts = sorted([ samples_to_millisecond(y0, sr) for y0 in offsets ]) | |||
| def f(ts, start): | |||
| return abs(ts - start) | |||
| closest = map2d(sentence_timestamps, found_starts, f) | |||
| if plot_result: | |||
| plt.imshow(closest) | |||
| plt.show() | |||
| latest = None | |||
| for i, row in enumerate(closest): | |||
| if len(row) == 0: | |||
| continue | |||
| if min(row) < min_silence_len / 2: | |||
| latest = sentence_timestamps[i] | |||
| return (samples_to_millisecond(x_min, sr), latest) | |||
| def samples_to_hops(samples, hop_length): | |||
| return round(samples / hop_length) | |||
| def hops_to_samples(hops, hop_length): | |||
| return round(hop_length * hops) | |||
| def cont_find_repetitions(y, sr, hop_length, sentence_timestamps): | |||
| assert sorted(sentence_timestamps, key=lambda t: t[0]) == sentence_timestamps | |||
| #print(y.shape) | |||
| mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=n_mfcc)[1:,:] | |||
| step_length_ms = 200 | |||
| step_length_samples = millisecond_to_samples(step_length_ms, sr) | |||
| window_length_ms = 1500 | |||
| window_length_samples = millisecond_to_samples(window_length_ms, sr) | |||
| ref_window_length_ms = 20*1000 # 10 sekunden | |||
| ref_window_length_samples = millisecond_to_samples(ref_window_length_ms, sr) | |||
| ref_window_length_hops = samples_to_hops(ref_window_length_samples, hop_length) | |||
| offset = 0 | |||
| available_ts = sentence_timestamps | |||
| last_sentence_end = 0 | |||
| deletion_suggestions = [] | |||
| while offset + step_length_samples < len(y) and len(available_ts) > 0: | |||
| offset_ms = samples_to_millisecond(offset, sr) | |||
| #print(ms_to_time(offset_ms), file=sys.stderr) | |||
| if offset_ms < available_ts[0][0] and offset_ms >= last_sentence_end: | |||
| offset += step_length_samples | |||
| continue | |||
| seg = y[ offset : offset + window_length_samples ] | |||
| # no longer needed since skipping based on sentence timestamps? | |||
| #if seg_is_speech(seg) < 0.5: | |||
| # offset += step_length_samples | |||
| # continue | |||
| relevant_start = offset_ms | |||
| mfcc_window = mfcc[:,samples_to_hops(offset, hop_length) : samples_to_hops(offset, hop_length) + ref_window_length_hops] | |||
| x_offset_ms, ts_ms = find_repetition(mfcc_window, | |||
| seg, | |||
| sr, | |||
| hop_length, | |||
| [ t[0] - offset_ms for t in available_ts ]) | |||
| if ts_ms is not None and x_offset_ms < step_length_ms: | |||
| print("delete from {0} to {1}".format(samples_to_time(offset + millisecond_to_samples(x_offset_ms, sr), sr), ms_to_time(offset_ms + ts_ms))) | |||
| deletion_suggestions.append((offset_ms + x_offset_ms, offset_ms + ts_ms)) | |||
| #print("window {0} - {1} is repeated at: {2}".format(samples_to_time(offset, sr), samples_to_time(offset + window_length_samples, sr), ms_to_time(ts_ms))) | |||
| offset += step_length_samples | |||
| if offset_ms + step_length_ms > available_ts[0][0]: | |||
| last_sentence_end = available_ts[0][1] | |||
| available_ts = available_ts[1:] | |||
| #available_ts = [t for t in ts_non_sil_ms if t[0] > offset_ms ] | |||
| deletions = [] | |||
| cur_deletion = None | |||
| for sugg in deletion_suggestions: | |||
| if cur_deletion is None: | |||
| cur_deletion = [sugg] | |||
| else: | |||
| if sugg[0] - cur_deletion[-1][0] < 250: | |||
| cur_deletion.append(sugg) | |||
| else: | |||
| deletions.append(cur_deletion) | |||
| cur_deletion = [sugg] | |||
| deletions = [(np.mean([d[0] for d in ds]), np.max([d[1] for d in ds])) for ds in deletions] | |||
| for n, d in enumerate(deletions): | |||
| offs = [abs(d[0]-ts[0]) for ts in sentence_timestamps] | |||
| i = np.argmin(offs) | |||
| if offs[i] < 150: | |||
| deletions[n] = (sentence_timestamps[i][0], d[1]) | |||
| else: | |||
| deletions[n] = (d[0], d[1]) | |||
| return deletions | |||
| def make_widths_equal(fig, rect, ax1, ax2, ax3, pad): | |||
| # pad in inches | |||
| divider = VBoxDivider( | |||
| @@ -271,20 +402,36 @@ def make_widths_equal(fig, rect, ax1, ax2, ax3, pad): | |||
| if __name__ == '__main__': | |||
| #vad = webrtcvad.Vad() | |||
| #hop_length = 128 | |||
| #n_mfcc = 13 | |||
| #frame_duration_ms = 10 | |||
| fp = "hard_piece_7.wav" | |||
| y, sr = librosa.load(fp, mono=True) | |||
| vad = webrtcvad.Vad() | |||
| hop_length = 128 | |||
| n_mfcc = 42 | |||
| fp = "hard_pieces.wav" | |||
| print("loading file ...") | |||
| y, sr = librosa.load(fp, mono=True, sr=32000) | |||
| print("calculating mfcc ...") | |||
| mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=n_mfcc)[1:,:] | |||
| song = AudioSegment.from_wav(fp) | |||
| mf_w = mfcc.shape[1] | |||
| l = y.shape[0] | |||
| print(l / mf_w) | |||
| ts_non_sil_ms = non_silent_chunks(song) | |||
| #print(y.shape) | |||
| #mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=n_mfcc)[1:,:] | |||
| #autocorr = librosa.autocorrelate(y) | |||
| #fig, ax = plt.subplots() | |||
| #ax.plot(autocorr) | |||
| #plt.show() | |||
| #ts_non_sil_ms = [ t[0] for t in non_silent_chunks(song) ] | |||
| #print(mfcc.shape) | |||
| #print("finding reps ...") | |||
| dels = cont_find_repetitions(y, sr, hop_length, ts_non_sil_ms) | |||
| for d in dels: | |||
| print("{0}\t{1}\tdelete".format(d[0]/1000, d[1]/1000)) | |||
| #window_length_ms = 1000 | |||
| #window_length_samples = millisecond_to_samples(window_length_ms, sr) | |||
| #seg = y[25280 : 25280 + window_length_samples] | |||
| #seg_duration_ms = 100 | |||
| #seg_duration_samples = millisecond_to_samples(seg_duration_ms, sr) | |||
| @@ -305,16 +452,36 @@ if __name__ == '__main__': | |||
| ##(seg, offset) = segs[0] | |||
| fp_segment = "segment.wav" | |||
| seg, sr_seg = librosa.load(fp_segment, mono=True) | |||
| assert sr==sr_seg | |||
| ##for seg in segs: | |||
| #mfcc_seg = librosa.feature.mfcc(y=seg, sr=sr_seg, hop_length=hop_length, n_mfcc=n_mfcc)[1:,:] | |||
| #xsim = calculate_best_offset(mfcc, mfcc_seg, sr) | |||
| #seg = y | |||
| #sr_seg = sr | |||
| seg, sr_seg = librosa.load(fp_segment, mono=True, sr=32000) | |||
| #assert sr==sr_seg | |||
| #mfcc_window = mfcc[:,1000:] | |||
| #x_offset, ts_ms = find_repetition(mfcc_window, seg, sr, hop_length, [ t[0] for t in ts_non_sil_ms], plot_result=True) | |||
| #if ts_ms is not None: | |||
| # print("starting from {0} the seg is repeated at {1}".format(ms_to_time(x_offset), ms_to_time(ts_ms))) | |||
| #else: | |||
| # print("no rep found") | |||
| #cutoff = int(0.2*len(seg)) | |||
| #print(samples_to_millisecond(cutoff, sr)) | |||
| #print("calculating xcross ...") | |||
| #xsim = librosa.segment.cross_similarity(mfcc, mfcc, mode='affinity', metric='cosine') | |||
| #chroma = librosa.feature.chroma_cqt(y=y, sr=sr, hop_length=hop_length) | |||
| #mfcc_stack = librosa.feature.stack_memory(mfcc, n_steps=10, delay=3) | |||
| #xsim = librosa.segment.recurrence_matrix(mfcc, mode='affinity', metric='cosine') | |||
| #lag = librosa.segment.recurrence_to_lag(xsim, pad=False) | |||
| #xsim = librosa.segment.recurrence_matrix(mfcc, mode='affinity', metric='cosine', | |||
| # width=50) | |||
| #fig, ax = plt.subplots(nrows=1, sharex=True) | |||
| #img = librosa.display.specshow(xsim, x_axis='s', y_axis='s', hop_length=hop_length, ax=ax, cmap='magma_r') | |||
| #plt.show() | |||
| print("detecting lines ...") | |||
| #detect_lines(np.flip(xsim, 0), len(y), len(y), plot_result=True) | |||
| #print(detect_lines(xsim)) | |||
| #ax.imshow(np.transpose(xsim), aspect='auto') | |||
| #ax[1].imshow(diffs_penalised) | |||
| @@ -323,21 +490,6 @@ if __name__ == '__main__': | |||
| #make_widths_equal(fig, 111, ax[0], ax[1], ax[2], pad=0.5) | |||
| #plt.show() | |||
| found_starts = sorted([ samples_to_millisecond(y0, sr) for y0 in detect_lines(None, len(seg), len(y))]) | |||
| def f(ts, start): | |||
| return abs(ts[0] - start) | |||
| closest = map2d(ts_non_sil_ms, found_starts, f) | |||
| plt.imshow(closest) | |||
| plt.show() | |||
| latest = -1 | |||
| for i, row in enumerate(closest): | |||
| # min silence len = 400 | |||
| if min(row) < min_silence_len / 2: | |||
| latest = ts_non_sil_ms[i] | |||
| print("delete until:", ms_to_time(latest[0])) | |||
| #print("possible starts:", [ ms_to_time(t) for t in found_starts]) | |||
| #for n, seg in enumerate(segs): | |||
| # sf.write('part' + str(n) + '.wav', seg, sr) | |||
| @@ -0,0 +1,89 @@ | |||
| module Diffs where | |||
| import Data.Int (Int32, ) | |||
| import Control.Arrow (arr, (<<<), (^<<), ) | |||
| import qualified Synthesizer.Causal.Process as Causal | |||
| import qualified Sound.SoxLib as SoxLib | |||
| import qualified Data.StorableVector.Lazy as SVL | |||
| import qualified Synthesizer.Storable.Signal as SigSt | |||
| import qualified Synthesizer.Generic.Signal as Sig | |||
| import qualified Synthesizer.Basic.Binary as Bin | |||
| import qualified Synthesizer.Generic.Analysis as Ana | |||
| import qualified Synthesizer.Generic.Cut as Cut | |||
| import qualified Synthesizer.Generic.Fourier as Four | |||
| import qualified Foreign.Storable as Stor | |||
| import qualified Number.Complex as C | |||
| import qualified Algebra.Additive as A | |||
| import qualified Algebra.Transcendental as T | |||
| import Graphics.Matplotlib | |||
| import Driver | |||
| import Types | |||
| calcDiff :: IO () | |||
| calcDiff = withAudio "out002.wav" $ \a' -> withAudio "out004.wav" $ \b' -> do | |||
| let a = prepare a' | |||
| b = prepare b' | |||
| maxLen = min (Cut.length a) (Cut.length b) | |||
| l = maxLen `div` 5 -- take first 20% | |||
| sa = Cut.take l a | |||
| sb = Cut.take l b | |||
| fsa = fourTrafo $ padWithZeros sa | |||
| fsb = fourTrafo $ padWithZeros sb | |||
| --negb = Causal.apply (arr (*(-1))) sb | |||
| --conjb = Causal.apply (arr conjugate) sb | |||
| let corr = Four.transformBackward | |||
| (Sig.zipWith (*) (Four.transformForward fsa) (Causal.apply (arr C.conjugate) $ Four.transformForward fsb)) | |||
| print $ Cut.length sa | |||
| print $ Cut.length sb | |||
| print $ Cut.length corr | |||
| let reals = (Causal.apply (arr $ C.real) corr) :: SVL.Vector Float | |||
| imgs = (Causal.apply (arr $ C.imag) corr) :: SVL.Vector Float | |||
| ys = SVL.unpack reals :: [Float] | |||
| --zs = SVL.unpack imgs :: [Float] | |||
| xs = [1..length ys] | |||
| onscreen $ line xs ys | |||
| --onscreen $ line xs zs | |||
| prepare :: SVL.Vector Int32 -> SVL.Vector (C.T Float) | |||
| prepare sig = | |||
| head . | |||
| map (Causal.apply (arr Bin.toCanonical)) . | |||
| SVL.deinterleave 2 $ sig | |||
| readFirst :: IO (SVL.Vector Int32) | |||
| readFirst = withAudio "out003.wav" $ \sig -> do | |||
| let s = Cut.take 100 sig | |||
| return s | |||
| --padWithZeros :: SVL.Vector (C.T Float) -> SVL.Vector (C.T Float) | |||
| padWithZeros x = pad <> x <> pad | |||
| where zeros = SVL.repeat SVL.defaultChunkSize 0 | |||
| l = SVL.length x | |||
| pad = SVL.take (l `div` 2) zeros | |||
| sine :: SVL.Vector Float | |||
| sine = SVL.pack SVL.defaultChunkSize $ map sin [0::Float,0.1..] | |||
| plotVec :: SVL.Vector Float -> IO () | |||
| plotVec v = let ys = SVL.unpack v | |||
| xs = [1..length ys] | |||
| in onscreen $ line xs ys | |||
| fourTrafo :: (A.C a, T.C a, Stor.Storable a) => SVL.Vector a -> SVL.Vector a | |||
| fourTrafo = Causal.apply (arr $ C.real) . | |||
| Four.transformForward . | |||
| Causal.apply (arr C.fromReal) | |||
| four :: FilePath -> IO () | |||
| four input = withAudio input $ \a' -> do | |||
| let a = prepare a' | |||
| sa = a | |||
| r = Four.transformForward sa | |||
| reals = (Causal.apply (arr $ C.real) r) :: SVL.Vector Float | |||
| ys = SVL.unpack reals :: [Float] | |||
| xs = [1..length ys] | |||
| print $ SVL.length r | |||
| onscreen $ line xs ys | |||
| @@ -5,46 +5,63 @@ import Parser | |||
| import Driver | |||
| import Types | |||
| runDehum :: Flags -> FilePath -> FilePath -> IO () | |||
| runDehum flags input output = | |||
| withSound flags input $ \fmtIn params sig -> | |||
| SoxLib.withWrite | |||
| (writerInfoFromFormat fmtIn params) | |||
| output $ \fmtOut -> | |||
| SoxLib.writeStorableVectorLazy fmtOut $ | |||
| SVL.interleaveFirstPattern $ | |||
| map | |||
| (Causal.apply | |||
| (arr (Bin.fromCanonicalWith Real.roundSimple) | |||
| <<< | |||
| dehum params | |||
| <<< | |||
| arr Bin.toCanonical)) $ | |||
| SVL.deinterleave (numChannels params) sig | |||
| import qualified System.Console.GetOpt as Opt | |||
| import qualified Algebra.RealRing as Real | |||
| import qualified Data.StorableVector.Lazy as SVL | |||
| import qualified Data.List.HT as ListHT | |||
| import qualified Synthesizer.Basic.Binary as Bin | |||
| import qualified Synthesizer.Causal.Process as Causal | |||
| import Shell.Utility.Exit (exitFailureMsg) | |||
| import System.Environment (getArgs, getProgName, ) | |||
| import Control.Monad (when, ) | |||
| import Text.Printf (printf, ) | |||
| import Data.Foldable (forM_, ) | |||
| import Control.Arrow (arr, (<<<), (^<<), ) | |||
| import System.Console.GetOpt | |||
| (getOpt, usageInfo, ArgDescr(NoArg, ReqArg), ) | |||
| import qualified Sound.SoxLib as SoxLib | |||
| import Data.Int (Int32, ) | |||
| runEnvelope :: Flags -> FilePath -> FilePath -> IO () | |||
| runEnvelope flags input output = | |||
| withSound flags input $ \fmtIn params sig -> | |||
| SoxLib.withWrite | |||
| (monoInfoFromFormat fmtIn params) | |||
| output $ \fmtOut -> | |||
| SoxLib.writeStorableVectorLazy fmtOut $ | |||
| Causal.apply | |||
| (arr (Bin.fromCanonicalWith Real.roundSimple)) $ | |||
| trackEnvelope params $ | |||
| map | |||
| (Causal.apply | |||
| (arr (^2) | |||
| <<< | |||
| dehum params | |||
| <<< | |||
| arr Bin.toCanonical)) $ | |||
| SVL.deinterleave (numChannels params) sig | |||
| --runDehum :: Flags -> FilePath -> FilePath -> IO () | |||
| --runDehum flags input output = | |||
| -- withSound flags input $ \fmtIn params sig -> | |||
| -- SoxLib.withWrite | |||
| -- (writerInfoFromFormat fmtIn params) | |||
| -- output $ \fmtOut -> | |||
| -- SoxLib.writeStorableVectorLazy fmtOut $ | |||
| -- SVL.interleaveFirstPattern $ | |||
| -- map | |||
| -- (Causal.apply | |||
| -- (arr (Bin.fromCanonicalWith Real.roundSimple) | |||
| -- <<< | |||
| -- dehum params | |||
| -- <<< | |||
| -- arr Bin.toCanonical)) $ | |||
| -- SVL.deinterleave (numChannels params) sig | |||
| runSizes :: Flags -> FilePath -> IO () | |||
| runSizes flags input = | |||
| withSound flags input $ \_fmt params sig -> | |||
| mapM_ print $ pieceDurations params sig | |||
| --runEnvelope :: Flags -> FilePath -> FilePath -> IO () | |||
| --runEnvelope flags input output = | |||
| -- withSound flags input $ \fmtIn params sig -> | |||
| -- SoxLib.withWrite | |||
| -- (monoInfoFromFormat fmtIn params) | |||
| -- output $ \fmtOut -> | |||
| -- SoxLib.writeStorableVectorLazy fmtOut $ | |||
| -- Causal.apply | |||
| -- (arr (Bin.fromCanonicalWith Real.roundSimple)) $ | |||
| -- trackEnvelope params $ | |||
| -- map | |||
| -- (Causal.apply | |||
| -- (arr (^2) | |||
| -- <<< | |||
| -- dehum params | |||
| -- <<< | |||
| -- arr Bin.toCanonical)) $ | |||
| -- SVL.deinterleave (numChannels params) sig | |||
| --runSizes :: Flags -> FilePath -> IO () | |||
| --runSizes flags input = | |||
| -- withSound flags input $ \_fmt params sig -> | |||
| -- mapM_ print $ pieceDurations params sig | |||
| runLabels :: Flags -> FilePath -> IO () | |||
| runLabels flags input = | |||
| @@ -61,6 +78,12 @@ runLabels flags input = | |||
| prefetch (preStart params) $ | |||
| pieceDurations params sig | |||
| getChops :: Flags -> FilePath -> IO [SVL.Vector Int32] | |||
| getChops flags input = withSound flags input $ \_ params sig -> do | |||
| let ps = chopLazy params sig | |||
| a = show ps | |||
| putStrLn $ [last a] | |||
| return $! ps | |||
| {- | | |||
| > runChop flags "in.wav" "%03d.wav" | |||
| -} | |||
| @@ -68,10 +91,11 @@ runChop :: Flags -> FilePath -> FilePath -> IO () | |||
| runChop flags input output = | |||
| withSound flags input $ \fmtIn params sig -> | |||
| forM_ (zip [(0::Int)..] $ chopLazy params sig) $ \(n,piece) -> | |||
| SoxLib.withWrite | |||
| (writerInfoFromFormat fmtIn params) | |||
| (printf output n) $ \fmtOut -> | |||
| SoxLib.writeStorableVectorLazy fmtOut piece | |||
| print piece | |||
| --SoxLib.withWrite | |||
| -- (writerInfoFromFormat fmtIn params) | |||
| -- (printf output n) $ \fmtOut -> | |||
| -- SoxLib.writeStorableVectorLazy fmtOut piece | |||
| main :: IO () | |||
| main = SoxLib.formatWith $ do | |||
| @@ -83,15 +107,15 @@ main = SoxLib.formatWith $ do | |||
| flags <- foldl (>>=) (return defltFlags) opts | |||
| if flagComputeEnvelope flags | |||
| then | |||
| case files of | |||
| [input,output] -> runEnvelope flags input output | |||
| [] -> exitFailureMsg "need input and output file envelope computation" | |||
| _ -> exitFailureMsg "more than two file names given" | |||
| else | |||
| case files of | |||
| [input,output] -> runChop flags input output | |||
| [input] -> runLabels flags input | |||
| [] -> exitFailureMsg "no input or output given" | |||
| _ -> exitFailureMsg "more than two file names given" | |||
| --if flagComputeEnvelope flags | |||
| -- then | |||
| -- case files of | |||
| -- [input,output] -> runEnvelope flags input output | |||
| -- [] -> exitFailureMsg "need input and output file envelope computation" | |||
| -- _ -> exitFailureMsg "more than two file names given" | |||
| -- else | |||
| case files of | |||
| --[input,output] -> runChop flags input output | |||
| [input] -> getChops flags input >>= print --runLabels flags input | |||
| [] -> exitFailureMsg "no input or output given" | |||
| _ -> exitFailureMsg "more than two file names given" | |||
| @@ -28,6 +28,7 @@ library | |||
| Driver | |||
| Lib | |||
| Parser | |||
| Plot | |||
| Types | |||
| other-modules: | |||
| Paths_autocut | |||
| @@ -36,6 +37,7 @@ library | |||
| ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints | |||
| build-depends: | |||
| base >=4.7 && <5 | |||
| , matplotlib | |||
| , numeric-prelude | |||
| , shell-utility | |||
| , soxlib | |||
| @@ -48,6 +50,7 @@ library | |||
| executable autocut-exe | |||
| main-is: Main.hs | |||
| other-modules: | |||
| Diffs | |||
| Paths_autocut | |||
| hs-source-dirs: | |||
| app | |||
| @@ -55,6 +58,7 @@ executable autocut-exe | |||
| build-depends: | |||
| autocut | |||
| , base >=4.7 && <5 | |||
| , matplotlib | |||
| , numeric-prelude | |||
| , shell-utility | |||
| , soxlib | |||
| @@ -75,6 +79,7 @@ test-suite autocut-test | |||
| build-depends: | |||
| autocut | |||
| , base >=4.7 && <5 | |||
| , matplotlib | |||
| , numeric-prelude | |||
| , shell-utility | |||
| , soxlib | |||
| @@ -28,6 +28,8 @@ dependencies: | |||
| - utility-ht | |||
| - storablevector | |||
| - shell-utility | |||
| - matplotlib | |||
| - webrtc-vad | |||
| ghc-options: | |||
| - -Wall | |||
| @@ -1,6 +1,10 @@ | |||
| module Driver where | |||
| module Driver (withSound, writerInfoFromFormat, monoInfoFromFormat, readAudio, withAudio, processAudio) where | |||
| import Data.Maybe (fromMaybe, ) | |||
| import Foreign.Storable (peek, ) | |||
| import qualified Data.StorableVector.Lazy as SVL | |||
| import qualified Sound.SoxLib as SoxLib | |||
| import Data.Int (Int32, ) | |||
| import Types | |||
| withSound :: | |||
| @@ -11,15 +15,13 @@ withSound :: | |||
| withSound flags path act = | |||
| SoxLib.withRead SoxLib.defaultReaderInfo path $ \fmtPtr -> do | |||
| fmt <- peek fmtPtr | |||
| let numChan = | |||
| fromMaybe 1 $ SoxLib.channels $ SoxLib.signalInfo fmt | |||
| rate = | |||
| case flagSampleRate flags of | |||
| Just r -> r | |||
| Nothing -> | |||
| case SoxLib.rate $ SoxLib.signalInfo fmt of | |||
| Just r -> r | |||
| Nothing -> defaultSampleRate | |||
| let numChan = fromMaybe 1 $ SoxLib.channels $ SoxLib.signalInfo fmt | |||
| rate = case flagSampleRate flags of | |||
| Just r -> r | |||
| Nothing -> | |||
| case SoxLib.rate $ SoxLib.signalInfo fmt of | |||
| Just r -> r | |||
| Nothing -> defaultSampleRate | |||
| params = | |||
| Params { | |||
| sampleRate = rate, | |||
| @@ -35,8 +37,7 @@ withSound flags path act = | |||
| (case flagBlocksize flags of | |||
| SVL.ChunkSize size -> SVL.ChunkSize $ numChan * size) | |||
| monoInfoFromFormat :: | |||
| SoxLib.Format mode -> Params -> SoxLib.WriterInfo | |||
| monoInfoFromFormat :: SoxLib.Format mode -> Params -> SoxLib.WriterInfo | |||
| monoInfoFromFormat fmtIn params = | |||
| SoxLib.defaultWriterInfo { | |||
| SoxLib.writerSignalInfo = Just $ | |||
| @@ -47,8 +48,7 @@ monoInfoFromFormat fmtIn params = | |||
| SoxLib.writerEncodingInfo = Just $ SoxLib.encodingInfo fmtIn | |||
| } | |||
| writerInfoFromFormat :: | |||
| SoxLib.Format mode -> Params -> SoxLib.WriterInfo | |||
| writerInfoFromFormat :: SoxLib.Format mode -> Params -> SoxLib.WriterInfo | |||
| writerInfoFromFormat fmtIn params = | |||
| SoxLib.defaultWriterInfo { | |||
| SoxLib.writerSignalInfo = Just $ | |||
| @@ -57,3 +57,30 @@ writerInfoFromFormat fmtIn params = | |||
| }, | |||
| SoxLib.writerEncodingInfo = Just $ SoxLib.encodingInfo fmtIn | |||
| } | |||
| readAudio :: FilePath -> IO (SVL.Vector Int32) | |||
| readAudio path = do | |||
| fmtPtr <- SoxLib.openRead SoxLib.defaultReaderInfo path | |||
| fmt <- peek fmtPtr | |||
| let numChan = fromMaybe 1 $ SoxLib.channels $ SoxLib.signalInfo fmt | |||
| print numChan | |||
| v <- SoxLib.readStorableVectorLazy fmtPtr (SVL.chunkSize $ 65536 * numChan) | |||
| SoxLib.close fmtPtr | |||
| return v | |||
| withAudio :: FilePath -> (SVL.Vector Int32 -> IO a) -> IO a | |||
| withAudio path action = do | |||
| SoxLib.withRead SoxLib.defaultReaderInfo path $ \fmtPtr -> do | |||
| fmt <- peek fmtPtr | |||
| let numChan = fromMaybe 1 $ SoxLib.channels $ SoxLib.signalInfo fmt | |||
| v <- SoxLib.readStorableVectorLazy fmtPtr (SVL.chunkSize $ 65536 * numChan) | |||
| action v | |||
| processAudio :: FilePath -> FilePath -> (SVL.Vector Int32 -> IO (SVL.Vector Int32)) -> IO () | |||
| processAudio input output action = | |||
| withSound defltFlags input $ \fmtIn params sig -> do | |||
| v <- action sig | |||
| SoxLib.withWrite | |||
| (writerInfoFromFormat fmtIn params) output | |||
| $ \fmtOut -> SoxLib.writeStorableVectorLazy fmtOut v | |||
| @@ -1,4 +1,4 @@ | |||
| module Lib (someFunc) where | |||
| module Lib (prefetch, pieceDurations, chopLazy, chop) where | |||
| import qualified Synthesizer.Storable.Signal as SigSt | |||
| import qualified Synthesizer.ChunkySize.Cut as CutCS | |||
| @@ -9,40 +9,22 @@ import qualified Synthesizer.State.Cut as Cut | |||
| import qualified Synthesizer.State.Signal as Sig | |||
| import qualified Synthesizer.Basic.Binary as Bin | |||
| import qualified Sound.SoxLib as SoxLib | |||
| import qualified Data.StorableVector.Lazy as SVL | |||
| import Foreign.Storable (peek, ) | |||
| import qualified Control.Monad.Trans.State as MS | |||
| import Control.Monad (when, ) | |||
| import Control.Arrow (arr, (<<<), (^<<), ) | |||
| import qualified Data.List.HT as ListHT | |||
| import qualified Data.List as List | |||
| import Data.Tuple.HT (swap, ) | |||
| import Data.Foldable (forM_, ) | |||
| import Data.Maybe (fromMaybe, ) | |||
| import qualified System.Console.GetOpt as Opt | |||
| import System.Console.GetOpt | |||
| (getOpt, usageInfo, ArgDescr(NoArg, ReqArg), ) | |||
| import System.Environment (getArgs, getProgName, ) | |||
| import Text.Printf (printf, ) | |||
| import qualified System.Exit as Exit | |||
| import Shell.Utility.Exit (exitFailureMsg) | |||
| import qualified Algebra.RealRing as Real | |||
| import NumericPrelude.Numeric | |||
| import NumericPrelude.Base | |||
| import Data.Int (Int32, ) | |||
| import Types | |||
| import Driver | |||
| import Prelude () | |||
| import Prelude (, ) | |||
| dehum :: Params -> Causal.T Float Float | |||
| dehum params = | |||
| @@ -63,9 +45,11 @@ trackEnvelope params = | |||
| . | |||
| foldl SigSt.mix SVL.empty | |||
| -- Float -> Bool | |||
| threshold :: Params -> Causal.T Float Bool | |||
| threshold params = Causal.map (< pauseVolume params) | |||
| -- Bool -> Bool | |||
| findStarts :: Params -> Causal.T Bool Bool | |||
| findStarts params = | |||
| flip Causal.fromState 0 $ \b -> | |||
| @@ -73,6 +57,7 @@ findStarts params = | |||
| then MS.modify succ >> evalReturn False | |||
| else do n <- MS.get; MS.put 0; return (n >= minPause params) | |||
| -- Bool -> Maybe Int | |||
| measurePauses :: Causal.T Bool (Maybe Int) | |||
| measurePauses = | |||
| flip Causal.fromState 0 $ \b -> | |||
| @@ -92,7 +77,9 @@ pieceDurations params = | |||
| (measurePauses <<< findStarts params <<< threshold params) . | |||
| Sig.fromStorableSignal . | |||
| trackEnvelope params . | |||
| -- on every channel: | |||
| map (Causal.apply (arr (^2) <<< dehum params <<< arr Bin.toCanonical)) . | |||
| -- seperate channels ? | |||
| SVL.deinterleave (numChannels params) | |||
| pieceDurationsPrefetchLazy :: Params -> SVL.Vector Int32 -> [ChunkySize.T] | |||
| @@ -114,8 +101,7 @@ prefetch n (s:ss) = | |||
| then prefetch (n-s) ss | |||
| else (s-n) : ss | |||
| chop, chopLazy :: | |||
| Params -> SVL.Vector Int32 -> [SVL.Vector Int32] | |||
| chop, chopLazy :: Params -> SVL.Vector Int32 -> [SVL.Vector Int32] | |||
| chop params sig0 = | |||
| snd $ | |||
| List.mapAccumL (\sig n -> swap $ SVL.splitAt n sig) sig0 $ | |||
| @@ -1,10 +1,23 @@ | |||
| module Parser where | |||
| import qualified Sound.SoxLib as SoxLib | |||
| import qualified System.Console.GetOpt as Opt | |||
| import qualified Data.StorableVector.Lazy as SVL | |||
| import qualified System.Exit as Exit | |||
| import qualified Algebra.RealRing as Real | |||
| import Shell.Utility.Exit (exitFailureMsg) | |||
| import System.Console.GetOpt | |||
| (getOpt, usageInfo, ArgDescr(NoArg, ReqArg), ) | |||
| import System.Environment (getArgs, getProgName, ) | |||
| import Text.Printf (printf, ) | |||
| import NumericPrelude.Numeric | |||
| import NumericPrelude.Base | |||
| import Types | |||
| import Driver | |||
| import Prelude () | |||
| parseCard :: (Read a, Real.C a) => String -> String -> IO a | |||
| parseCard name str = | |||
| case reads str of | |||
| @@ -67,21 +80,3 @@ description = | |||
| return $ flags{flagComputeEnvelope = True}) | |||
| "compute envelope for assistance in finding appropriate parameters" : | |||
| [] | |||
| defaultSampleRate :: SoxLib.Rate | |||
| defaultSampleRate = 44100 | |||
| freq :: SoxLib.Rate -> (Flags -> Freq) -> (Flags -> Float) | |||
| freq sr acc flags = | |||
| (case acc flags of Freq f -> f) / realToFrac sr | |||
| time :: SoxLib.Rate -> (Flags -> Time) -> (Flags -> Int) | |||
| time sr acc flags = | |||
| round ((case acc flags of Time t -> t) * realToFrac sr) | |||
| formatFreq :: Freq -> String | |||
| formatFreq (Freq t) = show t -- ++ "Hz" | |||
| formatTime :: Time -> String | |||
| formatTime (Time t) = show t -- ++ "s" | |||
| @@ -0,0 +1,10 @@ | |||
| module Plot where | |||
| import Graphics.Matplotlib | |||
| signal :: [Double] -> [Double] | |||
| signal xs = [ (sin (x*3.14159/45) + 1) / 2 * (sin (x*3.14159/5)) | x <- xs ] | |||
| plot :: IO () | |||
| plot = onscreen $ line xs $ signal xs | |||
| where xs = [1..1000] | |||
| @@ -1,5 +1,8 @@ | |||
| module Types where | |||
| import qualified Data.StorableVector.Lazy as SVL | |||
| import qualified Sound.SoxLib as SoxLib | |||
| newtype Time = Time Float | |||
| deriving (Eq, Show) | |||
| @@ -43,3 +46,20 @@ data Params = | |||
| pauseVolume :: Float, | |||
| minPause, preStart :: Int | |||
| } | |||
| formatFreq :: Freq -> String | |||
| formatFreq (Freq t) = show t -- ++ "Hz" | |||
| formatTime :: Time -> String | |||
| formatTime (Time t) = show t -- ++ "s" | |||
| defaultSampleRate :: SoxLib.Rate | |||
| defaultSampleRate = 44100 | |||
| freq :: SoxLib.Rate -> (Flags -> Freq) -> (Flags -> Float) | |||
| freq sr acc flags = | |||
| (case acc flags of Freq f -> f) / realToFrac sr | |||
| time :: SoxLib.Rate -> (Flags -> Time) -> (Flags -> Int) | |||
| time sr acc flags = | |||
| round ((case acc flags of Time t -> t) * realToFrac sr) | |||
| @@ -65,3 +65,5 @@ packages: | |||
| # | |||
| # Allow a newer minor version of GHC than the snapshot specifies | |||
| # compiler-check: newer-minor | |||
| extra-deps: | |||
| # - Chart-diagrams-1.9.3@sha256:63668daff044a79827b7edb265265a4a8237424abb8f808ad1fcbdb3d47e753d,1801 | |||