mirror of
https://github.com/correl/dejavu.git
synced 2024-11-23 19:19:53 +00:00
Added annotations for different fingerprinting parameters
This commit is contained in:
parent
a779ca4cf9
commit
60d9d6758e
1 changed files with 48 additions and 7 deletions
|
@ -7,20 +7,60 @@ from scipy.ndimage.morphology import (generate_binary_structure,
|
||||||
import hashlib
|
import hashlib
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
|
|
||||||
|
|
||||||
IDX_FREQ_I = 0
|
IDX_FREQ_I = 0
|
||||||
IDX_TIME_J = 1
|
IDX_TIME_J = 1
|
||||||
|
|
||||||
|
######################################################################
|
||||||
|
# Sampling rate, related to the Nyquist conditions, which affects
|
||||||
|
# the range frequencies we can detect.
|
||||||
DEFAULT_FS = 44100
|
DEFAULT_FS = 44100
|
||||||
|
|
||||||
|
######################################################################
|
||||||
|
# Size of the FFT window, affects frequency granularity
|
||||||
DEFAULT_WINDOW_SIZE = 4096
|
DEFAULT_WINDOW_SIZE = 4096
|
||||||
|
|
||||||
|
######################################################################
|
||||||
|
# Ratio by which each sequential window overlaps the last and the
|
||||||
|
# next window. Higher overlap will allow a higher granularity of offset
|
||||||
|
# matching, but potentially more fingerprints.
|
||||||
DEFAULT_OVERLAP_RATIO = 0.5
|
DEFAULT_OVERLAP_RATIO = 0.5
|
||||||
|
|
||||||
|
######################################################################
|
||||||
|
# Degree to which a fingerprint can be paired with its neighbors --
|
||||||
|
# higher will cause more fingerprints, but potentially better accuracy.
|
||||||
DEFAULT_FAN_VALUE = 15
|
DEFAULT_FAN_VALUE = 15
|
||||||
|
|
||||||
|
######################################################################
|
||||||
|
# Minimum amplitude in spectrogram in order to be considered a peak.
|
||||||
|
# This can be raised to reduce number of fingerprints, but can negatively
|
||||||
|
# affect accuracy.
|
||||||
DEFAULT_AMP_MIN = 10
|
DEFAULT_AMP_MIN = 10
|
||||||
|
|
||||||
|
######################################################################
|
||||||
|
# Number of cells around an amplitude peak in the spectrogram in order
|
||||||
|
# for Dejavu to consider it a spectral peak. Higher values mean less
|
||||||
|
# fingerprints and faster matching, but can potentially affect accuracy.
|
||||||
PEAK_NEIGHBORHOOD_SIZE = 20
|
PEAK_NEIGHBORHOOD_SIZE = 20
|
||||||
|
|
||||||
|
######################################################################
|
||||||
|
# Thresholds on how close or far fingerprints can be in time in order
|
||||||
|
# to be paired as a fingerprint. If your max is too low, higher values of
|
||||||
|
# DEFAULT_FAN_VALUE may not perform as expected.
|
||||||
MIN_HASH_TIME_DELTA = 0
|
MIN_HASH_TIME_DELTA = 0
|
||||||
MAX_HASH_TIME_DELTA = 200
|
MAX_HASH_TIME_DELTA = 200
|
||||||
|
|
||||||
|
######################################################################
|
||||||
|
# If True, will sort peaks temporally for fingerprinting;
|
||||||
|
# not sorting will cut down number of fingerprints, but potentially
|
||||||
|
# affect performance.
|
||||||
|
PEAK_SORT = True
|
||||||
|
|
||||||
|
######################################################################
|
||||||
|
# Number of bits to throw away from the front of the SHA1 hash in the
|
||||||
|
# fingerprint calculation. The more you throw away, the less storage, but
|
||||||
|
# potentially higher collisions and misclassifications when identifying songs.
|
||||||
|
FINGERPRINT_REDUCTION = 20
|
||||||
|
|
||||||
def fingerprint(channel_samples, Fs=DEFAULT_FS,
|
def fingerprint(channel_samples, Fs=DEFAULT_FS,
|
||||||
wsize=DEFAULT_WINDOW_SIZE,
|
wsize=DEFAULT_WINDOW_SIZE,
|
||||||
wratio=DEFAULT_OVERLAP_RATIO,
|
wratio=DEFAULT_OVERLAP_RATIO,
|
||||||
|
@ -98,6 +138,7 @@ def generate_hashes(peaks, fan_value=DEFAULT_FAN_VALUE):
|
||||||
"""
|
"""
|
||||||
fingerprinted = set() # to avoid rehashing same pairs
|
fingerprinted = set() # to avoid rehashing same pairs
|
||||||
|
|
||||||
|
if PEAK_SORT:
|
||||||
peaks.sort(key=itemgetter(1))
|
peaks.sort(key=itemgetter(1))
|
||||||
|
|
||||||
for i in range(len(peaks)):
|
for i in range(len(peaks)):
|
||||||
|
@ -114,7 +155,7 @@ def generate_hashes(peaks, fan_value=DEFAULT_FAN_VALUE):
|
||||||
if t_delta >= MIN_HASH_TIME_DELTA and t_delta <= MAX_HASH_TIME_DELTA:
|
if t_delta >= MIN_HASH_TIME_DELTA and t_delta <= MAX_HASH_TIME_DELTA:
|
||||||
h = hashlib.sha1(
|
h = hashlib.sha1(
|
||||||
"%s|%s|%s" % (str(freq1), str(freq2), str(t_delta)))
|
"%s|%s|%s" % (str(freq1), str(freq2), str(t_delta)))
|
||||||
yield (h.hexdigest()[0:20], t1)
|
yield (h.hexdigest()[0:FINGERPRINT_REDUCTION], t1)
|
||||||
|
|
||||||
# ensure we don't repeat hashing
|
# ensure we don't repeat hashing
|
||||||
fingerprinted.add((i, i + j))
|
fingerprinted.add((i, i + j))
|
||||||
|
|
Loading…
Reference in a new issue