Procházet zdrojové kódy

update analysis in python, add some more hs

master
flavis před 3 roky
rodič
revize
2abb417adf
Podepsáno: christian <christian@flavigny.de> ID GPG klíče: D953D69721B948B3
13 změnil soubory, kde provedl 493 přidání a 175 odebrání
  1. +3
    -0
      .gitignore
  2. +12
    -9
      analysis.py
  3. +210
    -58
      analysis_cont.py
  4. +89
    -0
      app/Diffs.hs
  5. +78
    -54
      app/Main.hs
  6. +5
    -0
      autocut.cabal
  7. +2
    -0
      package.yaml
  8. +41
    -14
      src/Driver.hs
  9. +8
    -22
      src/Lib.hs
  10. +13
    -18
      src/Parser.hs
  11. +10
    -0
      src/Plot.hs
  12. +20
    -0
      src/Types.hs
  13. +2
    -0
      stack.yaml

+ 3
- 0
.gitignore Zobrazit soubor

@@ -5,3 +5,6 @@
*.wav *.wav
*.lock *.lock
*_data *_data
*.prof
labels.txt
*.png

+ 12
- 9
analysis.py Zobrazit soubor

@@ -133,7 +133,7 @@ def spl_on_silence():
def non_silent_chunks(song): def non_silent_chunks(song):
#song = AudioSegment.from_wav("recording.wav") #song = AudioSegment.from_wav("recording.wav")


return detect_nonsilent(song, min_silence_len=400, silence_thresh=-50)
return detect_nonsilent(song, min_silence_len=10, silence_thresh=-50)




def audiosegment_to_librosawav(audiosegment): def audiosegment_to_librosawav(audiosegment):
@@ -175,7 +175,8 @@ def seg_is_speech(seg):
offset = offset + n offset = offset + n
total += 1 total += 1


return speeches / total
#return speeches / total
return 1.0




def make_widths_equal(fig, rect, ax1, ax2, ax3, pad): def make_widths_equal(fig, rect, ax1, ax2, ax3, pad):
@@ -193,7 +194,7 @@ if __name__ == '__main__':
vad = webrtcvad.Vad() vad = webrtcvad.Vad()


frame_duration_ms = 10 frame_duration_ms = 10
fp = "hard_pieces.wav"
fp = "hard_piece_7.wav"
y, sr = librosa.load(fp, mono=True, sr=32000) y, sr = librosa.load(fp, mono=True, sr=32000)


#pcm_data = y.tobytes() #pcm_data = y.tobytes()
@@ -269,7 +270,8 @@ if __name__ == '__main__':
continue continue
max_j = i max_j = i
for j in range(i, n_segs): for j in range(i, n_segs):
if diffs_penalised[i,j] < 80:
if diffs[i,j] < 80:
#if diffs_penalised[i,j] < 80:
max_j = j max_j = j
delete_segs[i:max_j] = True delete_segs[i:max_j] = True
@@ -285,13 +287,14 @@ if __name__ == '__main__':
#print("{0}\t{1}\tvad {2}".format(s1/1000, e1/1000, vad_coeff)) #print("{0}\t{1}\tvad {2}".format(s1/1000, e1/1000, vad_coeff))




fig, ax = plt.subplots(nrows=3, sharex=True)
ax[0].imshow(diffs)
ax[1].imshow(diffs_penalised)
#fig, ax = plt.subplots(nrows=3, sharex=True)
fig, ax = plt.subplots(nrows=1, sharex=True)
ax.imshow(diffs)
#ax[1].imshow(diffs_penalised)
#ax[1].imshow(np.reshape(vad_coeffs, (1, n_segs))) #ax[1].imshow(np.reshape(vad_coeffs, (1, n_segs)))
ax[2].imshow(np.reshape(lengths, (1, n_segs)))
#ax[2].imshow(np.reshape(lengths, (1, n_segs)))


make_widths_equal(fig, 111, ax[0], ax[1], ax[2], pad=0.5)
#make_widths_equal(fig, 111, ax[0], ax[1], ax[2], pad=0.5)
plt.show() plt.show()
#for n, seg in enumerate(segs): #for n, seg in enumerate(segs):
# sf.write('part' + str(n) + '.wav', seg, sr) # sf.write('part' + str(n) + '.wav', seg, sr)


+ 210
- 58
analysis_cont.py Zobrazit soubor

@@ -12,10 +12,12 @@ import random
from mpl_toolkits.axes_grid1.axes_divider import VBoxDivider from mpl_toolkits.axes_grid1.axes_divider import VBoxDivider
import mpl_toolkits.axes_grid1.axes_size as Size import mpl_toolkits.axes_grid1.axes_size as Size
import cv2 import cv2
import sys


import webrtcvad import webrtcvad


min_silence_len = 400 min_silence_len = 400
frame_duration_ms = 10




def calc_dtw_sim(y1, y2, sr1, sr2, plot_result=False): def calc_dtw_sim(y1, y2, sr1, sr2, plot_result=False):
@@ -159,9 +161,13 @@ def samples_to_millisecond(samples, sr):
return (samples / sr) * 1000 return (samples / sr) * 1000




def samples_to_time(samples, sr):
return ms_to_time(samples_to_millisecond(samples, sr))


def ms_to_time(ms): def ms_to_time(ms):
secs = ms / 1000 secs = ms / 1000
return "{0}:{1}".format(math.floor(secs / 60), secs % 60)
return "{0}:{1:.4f}".format(math.floor(secs / 60), secs % 60)




def seg_is_speech(seg): def seg_is_speech(seg):
@@ -182,21 +188,29 @@ def seg_is_speech(seg):
offset = offset + n offset = offset + n
total += 1 total += 1


#return speeches / total
return 1.0
return speeches / total




def calculate_best_offset(mfcc_ref, mfcc_seg, sr): def calculate_best_offset(mfcc_ref, mfcc_seg, sr):
return librosa.segment.cross_similarity(mfcc_seg, mfcc_ref, mode='affinity', metric='cosine') return librosa.segment.cross_similarity(mfcc_seg, mfcc_ref, mode='affinity', metric='cosine')




def detect_lines(img, duration_x, duration_y):
def detect_lines(img, duration_x, duration_y, plot_result=False):
#print(img.shape) #print(img.shape)
#print(np.min(img), np.max(img)) #print(np.min(img), np.max(img))
img = cv2.imread('affine_similarity.png')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray = np.vectorize(int)((1-img) * 255).astype('uint8')
img = cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR)
#print(img, type(img))
#img = cv2.imread('affine_similarity_2.png')
#gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
#cv2.imshow("gray", gray)
#cv2.waitKey(0)
#print(gray, type(gray), gray.shape, gray.dtype)
#print(gray2, type(gray2), gray2.shape, gray2.dtype)
kernel_size = 5 kernel_size = 5
blur_gray = cv2.GaussianBlur(gray, (kernel_size, kernel_size), 0) blur_gray = cv2.GaussianBlur(gray, (kernel_size, kernel_size), 0)
#cv2.imshow("blur gray", blur_gray)
#cv2.waitKey(0)


low_threshold = 50 low_threshold = 50
high_threshold = 150 high_threshold = 150
@@ -207,7 +221,8 @@ def detect_lines(img, duration_x, duration_y):
threshold = 15 # minimum number of votes (intersections in Hough grid cell) threshold = 15 # minimum number of votes (intersections in Hough grid cell)
min_line_length = 50 # minimum number of pixels making up a line min_line_length = 50 # minimum number of pixels making up a line
max_line_gap = 20 # maximum gap in pixels between connectable line segments max_line_gap = 20 # maximum gap in pixels between connectable line segments
line_image = np.copy(img) * 0 # creating a blank to draw lines on
if plot_result:
line_image = np.copy(img) * 0 # creating a blank to draw lines on


# Run Hough on edge detected image # Run Hough on edge detected image
# Output "lines" is an array containing endpoints of detected line segments # Output "lines" is an array containing endpoints of detected line segments
@@ -218,35 +233,54 @@ def detect_lines(img, duration_x, duration_y):


scale_x = duration_x / width scale_x = duration_x / width
scale_y = duration_y / height scale_y = duration_y / height
print(img.shape, scale_x, scale_y, duration_x, duration_y)
#print(img.shape, scale_x, scale_y, duration_x, duration_y)


#slope = duration_y / duration_x #slope = duration_y / duration_x
slope = 1 slope = 1


expected_slope = scale_x / scale_y expected_slope = scale_x / scale_y
#print(expected_slope)
#expected_slope = 1.0 # y is inverted by opencv
#expected_slope = 0.101694915 #expected_slope = 0.101694915


print(expected_slope)
ls = []
offsets = [] offsets = []
for line in lines:
for x1,y1,x2,y2 in line:
# swapped y1 and y2 since y is measured from the top
slope = (y1-y2)/(x2-x1)
if abs(slope - expected_slope) < 0.03:
cv2.line(line_image,(x1,y1),(x2,y2),(255,0,0),5)
cv2.putText(img, "{:.2f}".format(slope), (x1, y1), fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=0.5,color=(0, 0, 255))
if (x1 / width) < 0.15:
print(height-y1)
y = height - y1
y0 = y - x1 * slope
offsets.append(y0 * scale_y)
#actual_lines.append((x1 * scale_x, (height - y1) * scale_y, x2 * scale_x, (height - y2) * scale_y))
xs = []
if lines is not None:
for line in lines:
for x1,y1,x2,y2 in line:
# swapped y1 and y2 since y is measured from the top
slope = (y2-y1)/(x2-x1) if x2 != x1 else 42
if abs(slope - expected_slope) < 0.15:#and (x1 / width) < 0.15:
y = y1
y0 = (y - x1 * slope)
if plot_result:
#cv2.line(line_image,(0,int(y0)),(x2,y2),(0,255,0),5)
cv2.line(line_image,(x1, y1),(x2,y2),(255,0,0),5)
cv2.putText(img, "{:.2f}".format(slope), (x1, y1), fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=2,color=(0, 0, 255))
#if (x1 / width) < 0.15:
#print(height-y1)
#y = height - y1
#y = y1
#y0 = y - x1 * slope
#offsets.append(y0 * scale_y)
#xs.append(x1)
ls.append((x1, y1, slope))
#actual_lines.append((x1 * scale_x, (height - y1) * scale_y, x2 * scale_x, (height - y2) * scale_y))
#print(max(slopes)) #print(max(slopes))
x_min = min(ls, key=lambda a: a[0])[0] if len(ls) > 0 else 42 # just something > 10
offsets = [ (y1 + (x_min - x1)*slope) * scale_y for x1, y1, slope in ls ]
if plot_result:
for x1, y1, slope in ls:
y = y1 + (x_min -x1)*slope
#cv2.line(line_image,(x_min,int(y)),(x1,y1),(0,255,0),5)


lines_edges = cv2.addWeighted(img, 0.8, line_image, 1, 0)
#cv2.imshow("lines", lines_edges)
#cv2.waitKey(0)
return offsets
#cv2.line(line_image, (x_min, 0), (x_min, height-1), (0, 0, 255), 2)
lines_edges = cv2.addWeighted(img, 0.8, line_image, 1, 0)
lines_edges_resized = cv2.resize(lines_edges, (int(1024 * duration_x / duration_y ), 1024))
cv2.imshow("lines", lines_edges_resized)
cv2.waitKey(0)
return (x_min*scale_x, offsets)




def map2d(x, y, f): def map2d(x, y, f):
@@ -259,6 +293,103 @@ def map2d(x, y, f):
return res return res




def find_repetition(mfcc_ref, seg, sr, hop_length, sentence_timestamps, plot_result=False):
mfcc_seg = librosa.feature.mfcc(y=seg, sr=sr, hop_length=hop_length, n_mfcc=n_mfcc)[1:,:]
xsim = calculate_best_offset(mfcc_ref, mfcc_seg, sr)
x_min, offsets = detect_lines(xsim, len(seg), mfcc_ref.shape[1] * hop_length, plot_result=plot_result)
found_starts = sorted([ samples_to_millisecond(y0, sr) for y0 in offsets ])

def f(ts, start):
return abs(ts - start)

closest = map2d(sentence_timestamps, found_starts, f)
if plot_result:
plt.imshow(closest)
plt.show()
latest = None
for i, row in enumerate(closest):
if len(row) == 0:
continue
if min(row) < min_silence_len / 2:
latest = sentence_timestamps[i]
return (samples_to_millisecond(x_min, sr), latest)


def samples_to_hops(samples, hop_length):
return round(samples / hop_length)


def hops_to_samples(hops, hop_length):
return round(hop_length * hops)


def cont_find_repetitions(y, sr, hop_length, sentence_timestamps):
assert sorted(sentence_timestamps, key=lambda t: t[0]) == sentence_timestamps
#print(y.shape)
mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=n_mfcc)[1:,:]

step_length_ms = 200
step_length_samples = millisecond_to_samples(step_length_ms, sr)
window_length_ms = 1500
window_length_samples = millisecond_to_samples(window_length_ms, sr)
ref_window_length_ms = 20*1000 # 10 sekunden
ref_window_length_samples = millisecond_to_samples(ref_window_length_ms, sr)
ref_window_length_hops = samples_to_hops(ref_window_length_samples, hop_length)

offset = 0
available_ts = sentence_timestamps
last_sentence_end = 0
deletion_suggestions = []

while offset + step_length_samples < len(y) and len(available_ts) > 0:
offset_ms = samples_to_millisecond(offset, sr)
#print(ms_to_time(offset_ms), file=sys.stderr)
if offset_ms < available_ts[0][0] and offset_ms >= last_sentence_end:
offset += step_length_samples
continue
seg = y[ offset : offset + window_length_samples ]
# no longer needed since skipping based on sentence timestamps?
#if seg_is_speech(seg) < 0.5:
# offset += step_length_samples
# continue
relevant_start = offset_ms
mfcc_window = mfcc[:,samples_to_hops(offset, hop_length) : samples_to_hops(offset, hop_length) + ref_window_length_hops]
x_offset_ms, ts_ms = find_repetition(mfcc_window,
seg,
sr,
hop_length,
[ t[0] - offset_ms for t in available_ts ])
if ts_ms is not None and x_offset_ms < step_length_ms:
print("delete from {0} to {1}".format(samples_to_time(offset + millisecond_to_samples(x_offset_ms, sr), sr), ms_to_time(offset_ms + ts_ms)))
deletion_suggestions.append((offset_ms + x_offset_ms, offset_ms + ts_ms))
#print("window {0} - {1} is repeated at: {2}".format(samples_to_time(offset, sr), samples_to_time(offset + window_length_samples, sr), ms_to_time(ts_ms)))
offset += step_length_samples
if offset_ms + step_length_ms > available_ts[0][0]:
last_sentence_end = available_ts[0][1]
available_ts = available_ts[1:]
#available_ts = [t for t in ts_non_sil_ms if t[0] > offset_ms ]
deletions = []
cur_deletion = None
for sugg in deletion_suggestions:
if cur_deletion is None:
cur_deletion = [sugg]
else:
if sugg[0] - cur_deletion[-1][0] < 250:
cur_deletion.append(sugg)
else:
deletions.append(cur_deletion)
cur_deletion = [sugg]
deletions = [(np.mean([d[0] for d in ds]), np.max([d[1] for d in ds])) for ds in deletions]
for n, d in enumerate(deletions):
offs = [abs(d[0]-ts[0]) for ts in sentence_timestamps]
i = np.argmin(offs)
if offs[i] < 150:
deletions[n] = (sentence_timestamps[i][0], d[1])
else:
deletions[n] = (d[0], d[1])
return deletions


def make_widths_equal(fig, rect, ax1, ax2, ax3, pad): def make_widths_equal(fig, rect, ax1, ax2, ax3, pad):
# pad in inches # pad in inches
divider = VBoxDivider( divider = VBoxDivider(
@@ -271,20 +402,36 @@ def make_widths_equal(fig, rect, ax1, ax2, ax3, pad):




if __name__ == '__main__': if __name__ == '__main__':
#vad = webrtcvad.Vad()
#hop_length = 128
#n_mfcc = 13

#frame_duration_ms = 10
fp = "hard_piece_7.wav"
y, sr = librosa.load(fp, mono=True)
vad = webrtcvad.Vad()
hop_length = 128
n_mfcc = 42

fp = "hard_pieces.wav"
print("loading file ...")
y, sr = librosa.load(fp, mono=True, sr=32000)
print("calculating mfcc ...")
mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=n_mfcc)[1:,:]
song = AudioSegment.from_wav(fp) song = AudioSegment.from_wav(fp)
mf_w = mfcc.shape[1]
l = y.shape[0]
print(l / mf_w)


ts_non_sil_ms = non_silent_chunks(song) ts_non_sil_ms = non_silent_chunks(song)


#print(y.shape)
#mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=n_mfcc)[1:,:]
#autocorr = librosa.autocorrelate(y)
#fig, ax = plt.subplots()
#ax.plot(autocorr)
#plt.show()
#ts_non_sil_ms = [ t[0] for t in non_silent_chunks(song) ]
#print(mfcc.shape) #print(mfcc.shape)
#print("finding reps ...")
dels = cont_find_repetitions(y, sr, hop_length, ts_non_sil_ms)

for d in dels:
print("{0}\t{1}\tdelete".format(d[0]/1000, d[1]/1000))
#window_length_ms = 1000
#window_length_samples = millisecond_to_samples(window_length_ms, sr)
#seg = y[25280 : 25280 + window_length_samples]


#seg_duration_ms = 100 #seg_duration_ms = 100
#seg_duration_samples = millisecond_to_samples(seg_duration_ms, sr) #seg_duration_samples = millisecond_to_samples(seg_duration_ms, sr)
@@ -305,16 +452,36 @@ if __name__ == '__main__':
##(seg, offset) = segs[0] ##(seg, offset) = segs[0]


fp_segment = "segment.wav" fp_segment = "segment.wav"
seg, sr_seg = librosa.load(fp_segment, mono=True)

assert sr==sr_seg

##for seg in segs:
#mfcc_seg = librosa.feature.mfcc(y=seg, sr=sr_seg, hop_length=hop_length, n_mfcc=n_mfcc)[1:,:]
#xsim = calculate_best_offset(mfcc, mfcc_seg, sr)

#seg = y
#sr_seg = sr
seg, sr_seg = librosa.load(fp_segment, mono=True, sr=32000)

#assert sr==sr_seg
#mfcc_window = mfcc[:,1000:]

#x_offset, ts_ms = find_repetition(mfcc_window, seg, sr, hop_length, [ t[0] for t in ts_non_sil_ms], plot_result=True)
#if ts_ms is not None:
# print("starting from {0} the seg is repeated at {1}".format(ms_to_time(x_offset), ms_to_time(ts_ms)))
#else:
# print("no rep found")

#cutoff = int(0.2*len(seg))
#print(samples_to_millisecond(cutoff, sr))

#print("calculating xcross ...")
#xsim = librosa.segment.cross_similarity(mfcc, mfcc, mode='affinity', metric='cosine')
#chroma = librosa.feature.chroma_cqt(y=y, sr=sr, hop_length=hop_length)
#mfcc_stack = librosa.feature.stack_memory(mfcc, n_steps=10, delay=3)
#xsim = librosa.segment.recurrence_matrix(mfcc, mode='affinity', metric='cosine')
#lag = librosa.segment.recurrence_to_lag(xsim, pad=False)

#xsim = librosa.segment.recurrence_matrix(mfcc, mode='affinity', metric='cosine',
# width=50)
#fig, ax = plt.subplots(nrows=1, sharex=True) #fig, ax = plt.subplots(nrows=1, sharex=True)
#img = librosa.display.specshow(xsim, x_axis='s', y_axis='s', hop_length=hop_length, ax=ax, cmap='magma_r') #img = librosa.display.specshow(xsim, x_axis='s', y_axis='s', hop_length=hop_length, ax=ax, cmap='magma_r')
#plt.show()
print("detecting lines ...")
#detect_lines(np.flip(xsim, 0), len(y), len(y), plot_result=True)
#print(detect_lines(xsim)) #print(detect_lines(xsim))
#ax.imshow(np.transpose(xsim), aspect='auto') #ax.imshow(np.transpose(xsim), aspect='auto')
#ax[1].imshow(diffs_penalised) #ax[1].imshow(diffs_penalised)
@@ -323,21 +490,6 @@ if __name__ == '__main__':


#make_widths_equal(fig, 111, ax[0], ax[1], ax[2], pad=0.5) #make_widths_equal(fig, 111, ax[0], ax[1], ax[2], pad=0.5)
#plt.show() #plt.show()
found_starts = sorted([ samples_to_millisecond(y0, sr) for y0 in detect_lines(None, len(seg), len(y))])

def f(ts, start):
return abs(ts[0] - start)

closest = map2d(ts_non_sil_ms, found_starts, f)
plt.imshow(closest)
plt.show()
latest = -1
for i, row in enumerate(closest):
# min silence len = 400
if min(row) < min_silence_len / 2:
latest = ts_non_sil_ms[i]
print("delete until:", ms_to_time(latest[0]))

#print("possible starts:", [ ms_to_time(t) for t in found_starts]) #print("possible starts:", [ ms_to_time(t) for t in found_starts])
#for n, seg in enumerate(segs): #for n, seg in enumerate(segs):
# sf.write('part' + str(n) + '.wav', seg, sr) # sf.write('part' + str(n) + '.wav', seg, sr)


+ 89
- 0
app/Diffs.hs Zobrazit soubor

@@ -0,0 +1,89 @@
module Diffs where

import Data.Int (Int32, )
import Control.Arrow (arr, (<<<), (^<<), )
import qualified Synthesizer.Causal.Process as Causal
import qualified Sound.SoxLib as SoxLib
import qualified Data.StorableVector.Lazy as SVL
import qualified Synthesizer.Storable.Signal as SigSt
import qualified Synthesizer.Generic.Signal as Sig
import qualified Synthesizer.Basic.Binary as Bin
import qualified Synthesizer.Generic.Analysis as Ana
import qualified Synthesizer.Generic.Cut as Cut
import qualified Synthesizer.Generic.Fourier as Four
import qualified Foreign.Storable as Stor

import qualified Number.Complex as C
import qualified Algebra.Additive as A
import qualified Algebra.Transcendental as T
import Graphics.Matplotlib

import Driver
import Types

calcDiff :: IO ()
calcDiff = withAudio "out002.wav" $ \a' -> withAudio "out004.wav" $ \b' -> do
let a = prepare a'
b = prepare b'
maxLen = min (Cut.length a) (Cut.length b)
l = maxLen `div` 5 -- take first 20%
sa = Cut.take l a
sb = Cut.take l b
fsa = fourTrafo $ padWithZeros sa
fsb = fourTrafo $ padWithZeros sb
--negb = Causal.apply (arr (*(-1))) sb
--conjb = Causal.apply (arr conjugate) sb
let corr = Four.transformBackward
(Sig.zipWith (*) (Four.transformForward fsa) (Causal.apply (arr C.conjugate) $ Four.transformForward fsb))
print $ Cut.length sa
print $ Cut.length sb
print $ Cut.length corr
let reals = (Causal.apply (arr $ C.real) corr) :: SVL.Vector Float
imgs = (Causal.apply (arr $ C.imag) corr) :: SVL.Vector Float
ys = SVL.unpack reals :: [Float]
--zs = SVL.unpack imgs :: [Float]
xs = [1..length ys]
onscreen $ line xs ys
--onscreen $ line xs zs


prepare :: SVL.Vector Int32 -> SVL.Vector (C.T Float)
prepare sig =
head .
map (Causal.apply (arr Bin.toCanonical)) .
SVL.deinterleave 2 $ sig

readFirst :: IO (SVL.Vector Int32)
readFirst = withAudio "out003.wav" $ \sig -> do
let s = Cut.take 100 sig
return s

--padWithZeros :: SVL.Vector (C.T Float) -> SVL.Vector (C.T Float)
padWithZeros x = pad <> x <> pad
where zeros = SVL.repeat SVL.defaultChunkSize 0
l = SVL.length x
pad = SVL.take (l `div` 2) zeros

sine :: SVL.Vector Float
sine = SVL.pack SVL.defaultChunkSize $ map sin [0::Float,0.1..]

plotVec :: SVL.Vector Float -> IO ()
plotVec v = let ys = SVL.unpack v
xs = [1..length ys]
in onscreen $ line xs ys

fourTrafo :: (A.C a, T.C a, Stor.Storable a) => SVL.Vector a -> SVL.Vector a
fourTrafo = Causal.apply (arr $ C.real) .
Four.transformForward .
Causal.apply (arr C.fromReal)

four :: FilePath -> IO ()
four input = withAudio input $ \a' -> do
let a = prepare a'
sa = a
r = Four.transformForward sa
reals = (Causal.apply (arr $ C.real) r) :: SVL.Vector Float
ys = SVL.unpack reals :: [Float]
xs = [1..length ys]
print $ SVL.length r
onscreen $ line xs ys

+ 78
- 54
app/Main.hs Zobrazit soubor

@@ -5,46 +5,63 @@ import Parser
import Driver import Driver
import Types import Types


runDehum :: Flags -> FilePath -> FilePath -> IO ()
runDehum flags input output =
withSound flags input $ \fmtIn params sig ->
SoxLib.withWrite
(writerInfoFromFormat fmtIn params)
output $ \fmtOut ->
SoxLib.writeStorableVectorLazy fmtOut $
SVL.interleaveFirstPattern $
map
(Causal.apply
(arr (Bin.fromCanonicalWith Real.roundSimple)
<<<
dehum params
<<<
arr Bin.toCanonical)) $
SVL.deinterleave (numChannels params) sig
import qualified System.Console.GetOpt as Opt
import qualified Algebra.RealRing as Real
import qualified Data.StorableVector.Lazy as SVL
import qualified Data.List.HT as ListHT
import qualified Synthesizer.Basic.Binary as Bin
import qualified Synthesizer.Causal.Process as Causal
import Shell.Utility.Exit (exitFailureMsg)
import System.Environment (getArgs, getProgName, )
import Control.Monad (when, )
import Text.Printf (printf, )
import Data.Foldable (forM_, )
import Control.Arrow (arr, (<<<), (^<<), )
import System.Console.GetOpt
(getOpt, usageInfo, ArgDescr(NoArg, ReqArg), )
import qualified Sound.SoxLib as SoxLib
import Data.Int (Int32, )


runEnvelope :: Flags -> FilePath -> FilePath -> IO ()
runEnvelope flags input output =
withSound flags input $ \fmtIn params sig ->
SoxLib.withWrite
(monoInfoFromFormat fmtIn params)
output $ \fmtOut ->
SoxLib.writeStorableVectorLazy fmtOut $
Causal.apply
(arr (Bin.fromCanonicalWith Real.roundSimple)) $
trackEnvelope params $
map
(Causal.apply
(arr (^2)
<<<
dehum params
<<<
arr Bin.toCanonical)) $
SVL.deinterleave (numChannels params) sig
--runDehum :: Flags -> FilePath -> FilePath -> IO ()
--runDehum flags input output =
-- withSound flags input $ \fmtIn params sig ->
-- SoxLib.withWrite
-- (writerInfoFromFormat fmtIn params)
-- output $ \fmtOut ->
-- SoxLib.writeStorableVectorLazy fmtOut $
-- SVL.interleaveFirstPattern $
-- map
-- (Causal.apply
-- (arr (Bin.fromCanonicalWith Real.roundSimple)
-- <<<
-- dehum params
-- <<<
-- arr Bin.toCanonical)) $
-- SVL.deinterleave (numChannels params) sig


runSizes :: Flags -> FilePath -> IO ()
runSizes flags input =
withSound flags input $ \_fmt params sig ->
mapM_ print $ pieceDurations params sig
--runEnvelope :: Flags -> FilePath -> FilePath -> IO ()
--runEnvelope flags input output =
-- withSound flags input $ \fmtIn params sig ->
-- SoxLib.withWrite
-- (monoInfoFromFormat fmtIn params)
-- output $ \fmtOut ->
-- SoxLib.writeStorableVectorLazy fmtOut $
-- Causal.apply
-- (arr (Bin.fromCanonicalWith Real.roundSimple)) $
-- trackEnvelope params $
-- map
-- (Causal.apply
-- (arr (^2)
-- <<<
-- dehum params
-- <<<
-- arr Bin.toCanonical)) $
-- SVL.deinterleave (numChannels params) sig

--runSizes :: Flags -> FilePath -> IO ()
--runSizes flags input =
-- withSound flags input $ \_fmt params sig ->
-- mapM_ print $ pieceDurations params sig


runLabels :: Flags -> FilePath -> IO () runLabels :: Flags -> FilePath -> IO ()
runLabels flags input = runLabels flags input =
@@ -61,6 +78,12 @@ runLabels flags input =
prefetch (preStart params) $ prefetch (preStart params) $
pieceDurations params sig pieceDurations params sig


getChops :: Flags -> FilePath -> IO [SVL.Vector Int32]
getChops flags input = withSound flags input $ \_ params sig -> do
let ps = chopLazy params sig
a = show ps
putStrLn $ [last a]
return $! ps
{- | {- |
> runChop flags "in.wav" "%03d.wav" > runChop flags "in.wav" "%03d.wav"
-} -}
@@ -68,10 +91,11 @@ runChop :: Flags -> FilePath -> FilePath -> IO ()
runChop flags input output = runChop flags input output =
withSound flags input $ \fmtIn params sig -> withSound flags input $ \fmtIn params sig ->
forM_ (zip [(0::Int)..] $ chopLazy params sig) $ \(n,piece) -> forM_ (zip [(0::Int)..] $ chopLazy params sig) $ \(n,piece) ->
SoxLib.withWrite
(writerInfoFromFormat fmtIn params)
(printf output n) $ \fmtOut ->
SoxLib.writeStorableVectorLazy fmtOut piece
print piece
--SoxLib.withWrite
-- (writerInfoFromFormat fmtIn params)
-- (printf output n) $ \fmtOut ->
-- SoxLib.writeStorableVectorLazy fmtOut piece


main :: IO () main :: IO ()
main = SoxLib.formatWith $ do main = SoxLib.formatWith $ do
@@ -83,15 +107,15 @@ main = SoxLib.formatWith $ do


flags <- foldl (>>=) (return defltFlags) opts flags <- foldl (>>=) (return defltFlags) opts


if flagComputeEnvelope flags
then
case files of
[input,output] -> runEnvelope flags input output
[] -> exitFailureMsg "need input and output file envelope computation"
_ -> exitFailureMsg "more than two file names given"
else
case files of
[input,output] -> runChop flags input output
[input] -> runLabels flags input
[] -> exitFailureMsg "no input or output given"
_ -> exitFailureMsg "more than two file names given"
--if flagComputeEnvelope flags
-- then
-- case files of
-- [input,output] -> runEnvelope flags input output
-- [] -> exitFailureMsg "need input and output file envelope computation"
-- _ -> exitFailureMsg "more than two file names given"
-- else
case files of
--[input,output] -> runChop flags input output
[input] -> getChops flags input >>= print --runLabels flags input
[] -> exitFailureMsg "no input or output given"
_ -> exitFailureMsg "more than two file names given"

+ 5
- 0
autocut.cabal Zobrazit soubor

@@ -28,6 +28,7 @@ library
Driver Driver
Lib Lib
Parser Parser
Plot
Types Types
other-modules: other-modules:
Paths_autocut Paths_autocut
@@ -36,6 +37,7 @@ library
ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints
build-depends: build-depends:
base >=4.7 && <5 base >=4.7 && <5
, matplotlib
, numeric-prelude , numeric-prelude
, shell-utility , shell-utility
, soxlib , soxlib
@@ -48,6 +50,7 @@ library
executable autocut-exe executable autocut-exe
main-is: Main.hs main-is: Main.hs
other-modules: other-modules:
Diffs
Paths_autocut Paths_autocut
hs-source-dirs: hs-source-dirs:
app app
@@ -55,6 +58,7 @@ executable autocut-exe
build-depends: build-depends:
autocut autocut
, base >=4.7 && <5 , base >=4.7 && <5
, matplotlib
, numeric-prelude , numeric-prelude
, shell-utility , shell-utility
, soxlib , soxlib
@@ -75,6 +79,7 @@ test-suite autocut-test
build-depends: build-depends:
autocut autocut
, base >=4.7 && <5 , base >=4.7 && <5
, matplotlib
, numeric-prelude , numeric-prelude
, shell-utility , shell-utility
, soxlib , soxlib


+ 2
- 0
package.yaml Zobrazit soubor

@@ -28,6 +28,8 @@ dependencies:
- utility-ht - utility-ht
- storablevector - storablevector
- shell-utility - shell-utility
- matplotlib
- webrtc-vad


ghc-options: ghc-options:
- -Wall - -Wall


+ 41
- 14
src/Driver.hs Zobrazit soubor

@@ -1,6 +1,10 @@
module Driver where
module Driver (withSound, writerInfoFromFormat, monoInfoFromFormat, readAudio, withAudio, processAudio) where


import Data.Maybe (fromMaybe, )
import Foreign.Storable (peek, )
import qualified Data.StorableVector.Lazy as SVL
import qualified Sound.SoxLib as SoxLib import qualified Sound.SoxLib as SoxLib
import Data.Int (Int32, )
import Types import Types


withSound :: withSound ::
@@ -11,15 +15,13 @@ withSound ::
withSound flags path act = withSound flags path act =
SoxLib.withRead SoxLib.defaultReaderInfo path $ \fmtPtr -> do SoxLib.withRead SoxLib.defaultReaderInfo path $ \fmtPtr -> do
fmt <- peek fmtPtr fmt <- peek fmtPtr
let numChan =
fromMaybe 1 $ SoxLib.channels $ SoxLib.signalInfo fmt
rate =
case flagSampleRate flags of
Just r -> r
Nothing ->
case SoxLib.rate $ SoxLib.signalInfo fmt of
Just r -> r
Nothing -> defaultSampleRate
let numChan = fromMaybe 1 $ SoxLib.channels $ SoxLib.signalInfo fmt
rate = case flagSampleRate flags of
Just r -> r
Nothing ->
case SoxLib.rate $ SoxLib.signalInfo fmt of
Just r -> r
Nothing -> defaultSampleRate
params = params =
Params { Params {
sampleRate = rate, sampleRate = rate,
@@ -35,8 +37,7 @@ withSound flags path act =
(case flagBlocksize flags of (case flagBlocksize flags of
SVL.ChunkSize size -> SVL.ChunkSize $ numChan * size) SVL.ChunkSize size -> SVL.ChunkSize $ numChan * size)


monoInfoFromFormat ::
SoxLib.Format mode -> Params -> SoxLib.WriterInfo
monoInfoFromFormat :: SoxLib.Format mode -> Params -> SoxLib.WriterInfo
monoInfoFromFormat fmtIn params = monoInfoFromFormat fmtIn params =
SoxLib.defaultWriterInfo { SoxLib.defaultWriterInfo {
SoxLib.writerSignalInfo = Just $ SoxLib.writerSignalInfo = Just $
@@ -47,8 +48,7 @@ monoInfoFromFormat fmtIn params =
SoxLib.writerEncodingInfo = Just $ SoxLib.encodingInfo fmtIn SoxLib.writerEncodingInfo = Just $ SoxLib.encodingInfo fmtIn
} }


writerInfoFromFormat ::
SoxLib.Format mode -> Params -> SoxLib.WriterInfo
writerInfoFromFormat :: SoxLib.Format mode -> Params -> SoxLib.WriterInfo
writerInfoFromFormat fmtIn params = writerInfoFromFormat fmtIn params =
SoxLib.defaultWriterInfo { SoxLib.defaultWriterInfo {
SoxLib.writerSignalInfo = Just $ SoxLib.writerSignalInfo = Just $
@@ -57,3 +57,30 @@ writerInfoFromFormat fmtIn params =
}, },
SoxLib.writerEncodingInfo = Just $ SoxLib.encodingInfo fmtIn SoxLib.writerEncodingInfo = Just $ SoxLib.encodingInfo fmtIn
} }

readAudio :: FilePath -> IO (SVL.Vector Int32)
readAudio path = do
fmtPtr <- SoxLib.openRead SoxLib.defaultReaderInfo path
fmt <- peek fmtPtr
let numChan = fromMaybe 1 $ SoxLib.channels $ SoxLib.signalInfo fmt
print numChan

v <- SoxLib.readStorableVectorLazy fmtPtr (SVL.chunkSize $ 65536 * numChan)
SoxLib.close fmtPtr
return v

withAudio :: FilePath -> (SVL.Vector Int32 -> IO a) -> IO a
withAudio path action = do
SoxLib.withRead SoxLib.defaultReaderInfo path $ \fmtPtr -> do
fmt <- peek fmtPtr
let numChan = fromMaybe 1 $ SoxLib.channels $ SoxLib.signalInfo fmt
v <- SoxLib.readStorableVectorLazy fmtPtr (SVL.chunkSize $ 65536 * numChan)
action v

processAudio :: FilePath -> FilePath -> (SVL.Vector Int32 -> IO (SVL.Vector Int32)) -> IO ()
processAudio input output action =
withSound defltFlags input $ \fmtIn params sig -> do
v <- action sig
SoxLib.withWrite
(writerInfoFromFormat fmtIn params) output
$ \fmtOut -> SoxLib.writeStorableVectorLazy fmtOut v

+ 8
- 22
src/Lib.hs Zobrazit soubor

@@ -1,4 +1,4 @@
module Lib (someFunc) where
module Lib (prefetch, pieceDurations, chopLazy, chop) where


import qualified Synthesizer.Storable.Signal as SigSt import qualified Synthesizer.Storable.Signal as SigSt
import qualified Synthesizer.ChunkySize.Cut as CutCS import qualified Synthesizer.ChunkySize.Cut as CutCS
@@ -9,40 +9,22 @@ import qualified Synthesizer.State.Cut as Cut
import qualified Synthesizer.State.Signal as Sig import qualified Synthesizer.State.Signal as Sig
import qualified Synthesizer.Basic.Binary as Bin import qualified Synthesizer.Basic.Binary as Bin


import qualified Sound.SoxLib as SoxLib

import qualified Data.StorableVector.Lazy as SVL import qualified Data.StorableVector.Lazy as SVL
import Foreign.Storable (peek, )


import qualified Control.Monad.Trans.State as MS import qualified Control.Monad.Trans.State as MS
import Control.Monad (when, )
import Control.Arrow (arr, (<<<), (^<<), ) import Control.Arrow (arr, (<<<), (^<<), )


import qualified Data.List.HT as ListHT
import qualified Data.List as List import qualified Data.List as List
import Data.Tuple.HT (swap, ) import Data.Tuple.HT (swap, )
import Data.Foldable (forM_, )
import Data.Maybe (fromMaybe, )

import qualified System.Console.GetOpt as Opt
import System.Console.GetOpt
(getOpt, usageInfo, ArgDescr(NoArg, ReqArg), )
import System.Environment (getArgs, getProgName, )
import Text.Printf (printf, )

import qualified System.Exit as Exit
import Shell.Utility.Exit (exitFailureMsg)


import qualified Algebra.RealRing as Real
import NumericPrelude.Numeric import NumericPrelude.Numeric
import NumericPrelude.Base import NumericPrelude.Base


import Data.Int (Int32, ) import Data.Int (Int32, )


import Types import Types
import Driver


import Prelude ()
import Prelude (, )


dehum :: Params -> Causal.T Float Float dehum :: Params -> Causal.T Float Float
dehum params = dehum params =
@@ -63,9 +45,11 @@ trackEnvelope params =
. .
foldl SigSt.mix SVL.empty foldl SigSt.mix SVL.empty


-- Float -> Bool
threshold :: Params -> Causal.T Float Bool threshold :: Params -> Causal.T Float Bool
threshold params = Causal.map (< pauseVolume params) threshold params = Causal.map (< pauseVolume params)


-- Bool -> Bool
findStarts :: Params -> Causal.T Bool Bool findStarts :: Params -> Causal.T Bool Bool
findStarts params = findStarts params =
flip Causal.fromState 0 $ \b -> flip Causal.fromState 0 $ \b ->
@@ -73,6 +57,7 @@ findStarts params =
then MS.modify succ >> evalReturn False then MS.modify succ >> evalReturn False
else do n <- MS.get; MS.put 0; return (n >= minPause params) else do n <- MS.get; MS.put 0; return (n >= minPause params)


-- Bool -> Maybe Int
measurePauses :: Causal.T Bool (Maybe Int) measurePauses :: Causal.T Bool (Maybe Int)
measurePauses = measurePauses =
flip Causal.fromState 0 $ \b -> flip Causal.fromState 0 $ \b ->
@@ -92,7 +77,9 @@ pieceDurations params =
(measurePauses <<< findStarts params <<< threshold params) . (measurePauses <<< findStarts params <<< threshold params) .
Sig.fromStorableSignal . Sig.fromStorableSignal .
trackEnvelope params . trackEnvelope params .
-- on every channel:
map (Causal.apply (arr (^2) <<< dehum params <<< arr Bin.toCanonical)) . map (Causal.apply (arr (^2) <<< dehum params <<< arr Bin.toCanonical)) .
-- seperate channels ?
SVL.deinterleave (numChannels params) SVL.deinterleave (numChannels params)


pieceDurationsPrefetchLazy :: Params -> SVL.Vector Int32 -> [ChunkySize.T] pieceDurationsPrefetchLazy :: Params -> SVL.Vector Int32 -> [ChunkySize.T]
@@ -114,8 +101,7 @@ prefetch n (s:ss) =
then prefetch (n-s) ss then prefetch (n-s) ss
else (s-n) : ss else (s-n) : ss


chop, chopLazy ::
Params -> SVL.Vector Int32 -> [SVL.Vector Int32]
chop, chopLazy :: Params -> SVL.Vector Int32 -> [SVL.Vector Int32]
chop params sig0 = chop params sig0 =
snd $ snd $
List.mapAccumL (\sig n -> swap $ SVL.splitAt n sig) sig0 $ List.mapAccumL (\sig n -> swap $ SVL.splitAt n sig) sig0 $


+ 13
- 18
src/Parser.hs Zobrazit soubor

@@ -1,10 +1,23 @@
module Parser where module Parser where


import qualified Sound.SoxLib as SoxLib import qualified Sound.SoxLib as SoxLib
import qualified System.Console.GetOpt as Opt
import qualified Data.StorableVector.Lazy as SVL
import qualified System.Exit as Exit
import qualified Algebra.RealRing as Real
import Shell.Utility.Exit (exitFailureMsg)
import System.Console.GetOpt
(getOpt, usageInfo, ArgDescr(NoArg, ReqArg), )
import System.Environment (getArgs, getProgName, )
import Text.Printf (printf, )
import NumericPrelude.Numeric
import NumericPrelude.Base


import Types import Types
import Driver import Driver


import Prelude ()

parseCard :: (Read a, Real.C a) => String -> String -> IO a parseCard :: (Read a, Real.C a) => String -> String -> IO a
parseCard name str = parseCard name str =
case reads str of case reads str of
@@ -67,21 +80,3 @@ description =
return $ flags{flagComputeEnvelope = True}) return $ flags{flagComputeEnvelope = True})
"compute envelope for assistance in finding appropriate parameters" : "compute envelope for assistance in finding appropriate parameters" :
[] []

defaultSampleRate :: SoxLib.Rate
defaultSampleRate = 44100

freq :: SoxLib.Rate -> (Flags -> Freq) -> (Flags -> Float)
freq sr acc flags =
(case acc flags of Freq f -> f) / realToFrac sr

time :: SoxLib.Rate -> (Flags -> Time) -> (Flags -> Int)
time sr acc flags =
round ((case acc flags of Time t -> t) * realToFrac sr)

formatFreq :: Freq -> String
formatFreq (Freq t) = show t -- ++ "Hz"

formatTime :: Time -> String
formatTime (Time t) = show t -- ++ "s"


+ 10
- 0
src/Plot.hs Zobrazit soubor

@@ -0,0 +1,10 @@
module Plot where

import Graphics.Matplotlib

signal :: [Double] -> [Double]
signal xs = [ (sin (x*3.14159/45) + 1) / 2 * (sin (x*3.14159/5)) | x <- xs ]

plot :: IO ()
plot = onscreen $ line xs $ signal xs
where xs = [1..1000]

+ 20
- 0
src/Types.hs Zobrazit soubor

@@ -1,5 +1,8 @@
module Types where module Types where


import qualified Data.StorableVector.Lazy as SVL
import qualified Sound.SoxLib as SoxLib

newtype Time = Time Float newtype Time = Time Float
deriving (Eq, Show) deriving (Eq, Show)


@@ -43,3 +46,20 @@ data Params =
pauseVolume :: Float, pauseVolume :: Float,
minPause, preStart :: Int minPause, preStart :: Int
} }

formatFreq :: Freq -> String
formatFreq (Freq t) = show t -- ++ "Hz"

formatTime :: Time -> String
formatTime (Time t) = show t -- ++ "s"

defaultSampleRate :: SoxLib.Rate
defaultSampleRate = 44100

freq :: SoxLib.Rate -> (Flags -> Freq) -> (Flags -> Float)
freq sr acc flags =
(case acc flags of Freq f -> f) / realToFrac sr

time :: SoxLib.Rate -> (Flags -> Time) -> (Flags -> Int)
time sr acc flags =
round ((case acc flags of Time t -> t) * realToFrac sr)

+ 2
- 0
stack.yaml Zobrazit soubor

@@ -65,3 +65,5 @@ packages:
# #
# Allow a newer minor version of GHC than the snapshot specifies # Allow a newer minor version of GHC than the snapshot specifies
# compiler-check: newer-minor # compiler-check: newer-minor
extra-deps:
# - Chart-diagrams-1.9.3@sha256:63668daff044a79827b7edb265265a4a8237424abb8f808ad1fcbdb3d47e753d,1801

Načítá se…
Zrušit
Uložit