1
2
3 """This module computes the uncompressed feature vectors for
4 music rhythm determination.
5
6 This can be run as a script for debugging/exploratory purposes.
7 When run as a script, it produces plots of various stages of the
8 signal processing sequence.
9
10 It is also imported as a module, in which case the plotting is off.
11 The normal call is to make_vector().
12 """
13
14
15 import os
16 import math
17 import numpy
18 import pylab
19 import cPickle
20 import subprocess
21 from gmisclib import wavio
22 from gmisclib import cache
23
24 from gpk_voicing import voice_misc as VM
25
26
27
28 from gpk_voicing import fv201105 as FV
29
30 CACHEROOT = '/tmp/artdist'
31 CWT = 1.0
32 MEDRATE = 1.0
33
35 """First stage.
36 This does the signal processing to (essentially) compute a spectrogram
37 of the music. The spectrogram is a two-dimensional representation of
38 the sound: time is one axis, and the other axis is frequency.
39
40 It uses a "perceptial spectrum" which is more representative of what
41 humans actually hear than the raw spectrogram. Essentially, it
42 matches the spectral bandwidth to the ear: it uses a filterbank where
43 each filter is 0.7 erb wide. Then, it half-wave rectified the signal
44 and takes the cube root, in order to match the amplitude compression
45 in the ear.
46
47 @param f: filename for the input audio file.
48 @type f: str
49 @param Dt: The desired sampling interval of the output feature vectors. (E.g. 10ms.)
50 @type Dt: float
51 @rtype: C{numpy.ndarray} 2-dimensional.
52 @return: A sequence of feature vectors that span the specified region.
53 This is a 2-dimensional spectrogram.
54 """
55 COL = 0
56 fwav = '/tmp/Mrhythm.wav'
57
58 try:
59 os.remove(fwav)
60 except OSError:
61 pass
62
63 subprocess.check_call(['sox', f, fwav, 'channels', '1'])
64 x = wavio.read(fwav)
65
66
67 ci = cache.cache_info(root=CACHEROOT, fname=f, info=(COL,))
68 print 'Dt=', Dt
69
70 o, descr, Dt, t0 = FV.feature_vec(x.column(COL), x.dt(), Dt, fdif2=-0.3, cache_info=ci)
71 assert len(o)==len(descr)
72 print 'Frequency bands:', [q.get('fc', 'all') for q in descr]
73 rv = numpy.transpose(numpy.array(o, numpy.float))
74 if plotit:
75 pylab.imshow(rv)
76 pylab.show()
77 return rv
78
79
80
81
82
84 """Plot the acoustic data.
85 """
86 pylab.imshow(smd.transpose(), aspect=3)
87 pylab.title("Rhythmic patterns")
88
89
91 """This is a logarithmically spaced frequency axis.
92 """
94 self.f0 = f0
95 self.fmx = fmx
96 self.r = r
97 self.n = int(math.floor(math.log(fmx/f0)/r))
98
99 - def vals(self, i=None):
100 """@param i: ask for the frequency at a certain index.
101 Or, if C{i=None} (the default), yield an array of
102 all the frequencies.
103 @type i: int or None.
104 """
105 if i == None:
106 i = numpy.arange(self.n)
107 return self.f0 * numpy.exp(i*self.r)
108
109
110
112 """Compute beat rates from a spectrogram.
113 @param sd: his starts from a spectrogram with one time axis and one frequency axis,
114 @type sd: numpy.ndarray 2-dimensional array of floats.
115 @return: It computes an array where the time axis is converted to a beat-rate axis.
116 So, the value of each element tells you how strong a particular beat
117 is, at a particular pitch. An element might reveal the strength of
118 a 60BPM beat in the high pitch range near 3 kHz (e.g. snare drum).
119 So, the frequency axis separates the track by instrument, and
120 the beats-per-minute axis lets you look at the rhythm of each instrument.
121 (Of course, that's a somewhat idealized view...)
122 @rtype: numpy.ndarray, 2-dimensional array of floats.
123 """
124 EDGE = sd.shape[0]*Dt/3.0
125 d = VM.dataprep2_flat_generic(sd, Dt, edgewidth=EDGE, pad=sd.shape[0]*Dt, isreal=True)
126 s = numpy.absolute(numpy.fft.rfft(d, axis=0)**2)
127 f = VM.real_fft_freq(s.shape[0], d=Dt)
128 df = f[1]-f[0]
129 print "df=", df, "length=", Dt*d.shape[0], Dt*sd.shape[0]
130 rv = []
131 fa = faxis(1./4.0, 0.03, 10.0)
132 stf = math.exp(fa.r/2.0)
133
134
135
136
137
138
139
140
141 for tau in fa.vals():
142 im = int(round(1.0/(tau*df*stf)))
143 ip = int(round(1.0/(tau*df/stf)))
144 rv.append(numpy.mean(s[im:ip+1,:], axis=0))
145 rv = numpy.array(rv)
146 if plotaudio:
147 plot_speech(rv)
148 pylab.show()
149 return (rv, fa)
150
151
152 -def stage2(s, fa, plotit=False):
153 r = numpy.argmax(s, axis=0)
154 rate = fa.vals(r)
155 rweight = s.take(r)
156 numpy.divide(rweight, rweight.mean(), rweight)
157 if plotit:
158 pylab.subplot(311)
159 pylab.plot(rate, 'g')
160 pylab.subplot(312)
161 pylab.plot(rweight, 'y')
162 c = []
163 k = s.shape[0] // 2
164 cwt = 1 + numpy.arange(s.shape[0]-k)
165 for i in range(s.shape[1]):
166 si = s[:,i] - numpy.median(s[:,i])
167 c.append( numpy.correlate(si, si, mode="same")[k:] * cwt )
168 if plotit:
169 pylab.subplot(313)
170 pylab.imshow(numpy.array(c), aspect=1)
171 pylab.show()
172 cc = numpy.array(c)
173 numpy.divide(cc, numpy.mean(cc), cc)
174 return (numpy.log(rate/MEDRATE)*rweight, cc*CWT)
175
176
178 """The basic idea here is that we should not expect a song to have a consistent
179 rhythmic pattern thoughout. So, we divide the track into blocks, and
180 nside each block, we assume a consistent pattern. (Obviously, this is not
181 perfect: we chop it into blocks without regard for the song. A better scheme
182 mght be to try to find boundaries in the song where the rhythm changes.)
183 If the rhythm changes within a block, you'll get a blurred rhythmic pattern.
184
185 At the end, we report the average rhythm pattern, over all the blocks.
186 @param f1: name of the audio file
187 @type f1: str
188 @param name: the track's label. (Note: this is just passed through to the output.)
189 @type name: normally L{str}
190 @param plotaudio: display plots (true) or run silently (false).
191 This particular routine plots the averaged beat rate as a function of frequency,
192 and also the averaged feature vector.
193 @type plotaudio: boolean
194 @rtype: A L{pickle} of C{(name, feature vector)}
195 @return: It returns an opaque blob (a L{pickle}) that you can write to a file and easily read back.
196 The pickle contains the track's label, along with the uncompressed feature vector.
197 This feature vector contains the beat rate as a function of frequency, followed by
198 the part of the feature vector that describes the averaged rhythmic pattern.
199 """
200 BLOCK_DUR = 32.0
201 Dt = 0.01
202 sd = s_preprocess(f1, Dt=Dt, plotit=plotaudio)
203 duration = sd.shape[0]*Dt
204 nblocks = int(math.ceil(duration/(0.5*BLOCK_DUR)))
205 rsum = None
206 csum = None
207 for i in range(nblocks):
208 if nblocks > 1:
209 s = i * (duration-BLOCK_DUR)/(nblocks-1)
210 e = s + BLOCK_DUR
211 else:
212 s = 0.0
213 e = duration
214 assert e is None or s is None or e > s
215 iis = int(round(s/Dt)) if s is not None else 0
216 iie = int(round(e/Dt)) if e is not None else duration
217 assert iie > iis
218 print 's, e=', s, iis, e, iie
219 sdc = sd[iis:iie,:]
220 s, fa = spectral(sdc, Dt, plotaudio=plotaudio)
221 rate, c = stage2(s, fa)
222 if rsum is None:
223 rsum = rate
224 else:
225 numpy.add(rsum, rate, rsum)
226 if csum is None:
227 csum = c
228 else:
229 numpy.add(csum, c, csum)
230 print 'nblocks=', nblocks, csum.shape, rsum.shape
231 numpy.divide(csum, nblocks, csum)
232 numpy.divide(rsum, nblocks, rsum)
233 if plotaudio:
234 pylab.subplot(211)
235 pylab.plot(rsum, 'g')
236 pylab.subplot(212)
237 pylab.imshow(numpy.array(csum), aspect=1)
238 pylab.show()
239 return cPickle.dumps((name, numpy.concatenate((rate, c.reshape((-1,))))))
240
241
242 if __name__ == '__main__':
243 import sys
244 if len(sys.argv) <= 1:
245 raise SystemExit
246
247
248
249
250 make_vector(sys.argv[1], sys.argv[1], plotaudio=True)
251