mirror of
https://github.com/correl/dejavu.git
synced 2024-11-23 11:09:52 +00:00
Added annotations for different fingerprinting parameters
This commit is contained in:
parent
a779ca4cf9
commit
60d9d6758e
1 changed files with 48 additions and 7 deletions
|
@ -7,20 +7,60 @@ from scipy.ndimage.morphology import (generate_binary_structure,
|
|||
import hashlib
|
||||
from operator import itemgetter
|
||||
|
||||
|
||||
IDX_FREQ_I = 0
|
||||
IDX_TIME_J = 1
|
||||
|
||||
######################################################################
|
||||
# Sampling rate, related to the Nyquist conditions, which affects
|
||||
# the range frequencies we can detect.
|
||||
DEFAULT_FS = 44100
|
||||
|
||||
######################################################################
|
||||
# Size of the FFT window, affects frequency granularity
|
||||
DEFAULT_WINDOW_SIZE = 4096
|
||||
|
||||
######################################################################
|
||||
# Ratio by which each sequential window overlaps the last and the
|
||||
# next window. Higher overlap will allow a higher granularity of offset
|
||||
# matching, but potentially more fingerprints.
|
||||
DEFAULT_OVERLAP_RATIO = 0.5
|
||||
|
||||
######################################################################
|
||||
# Degree to which a fingerprint can be paired with its neighbors --
|
||||
# higher will cause more fingerprints, but potentially better accuracy.
|
||||
DEFAULT_FAN_VALUE = 15
|
||||
|
||||
######################################################################
|
||||
# Minimum amplitude in spectrogram in order to be considered a peak.
|
||||
# This can be raised to reduce number of fingerprints, but can negatively
|
||||
# affect accuracy.
|
||||
DEFAULT_AMP_MIN = 10
|
||||
|
||||
######################################################################
|
||||
# Number of cells around an amplitude peak in the spectrogram in order
|
||||
# for Dejavu to consider it a spectral peak. Higher values mean less
|
||||
# fingerprints and faster matching, but can potentially affect accuracy.
|
||||
PEAK_NEIGHBORHOOD_SIZE = 20
|
||||
|
||||
######################################################################
|
||||
# Thresholds on how close or far fingerprints can be in time in order
|
||||
# to be paired as a fingerprint. If your max is too low, higher values of
|
||||
# DEFAULT_FAN_VALUE may not perform as expected.
|
||||
MIN_HASH_TIME_DELTA = 0
|
||||
MAX_HASH_TIME_DELTA = 200
|
||||
|
||||
######################################################################
|
||||
# If True, will sort peaks temporally for fingerprinting;
|
||||
# not sorting will cut down number of fingerprints, but potentially
|
||||
# affect performance.
|
||||
PEAK_SORT = True
|
||||
|
||||
######################################################################
|
||||
# Number of bits to throw away from the front of the SHA1 hash in the
|
||||
# fingerprint calculation. The more you throw away, the less storage, but
|
||||
# potentially higher collisions and misclassifications when identifying songs.
|
||||
FINGERPRINT_REDUCTION = 20
|
||||
|
||||
def fingerprint(channel_samples, Fs=DEFAULT_FS,
|
||||
wsize=DEFAULT_WINDOW_SIZE,
|
||||
wratio=DEFAULT_OVERLAP_RATIO,
|
||||
|
@ -98,7 +138,8 @@ def generate_hashes(peaks, fan_value=DEFAULT_FAN_VALUE):
|
|||
"""
|
||||
fingerprinted = set() # to avoid rehashing same pairs
|
||||
|
||||
peaks.sort(key=itemgetter(1))
|
||||
if PEAK_SORT:
|
||||
peaks.sort(key=itemgetter(1))
|
||||
|
||||
for i in range(len(peaks)):
|
||||
for j in range(1, fan_value):
|
||||
|
@ -114,7 +155,7 @@ def generate_hashes(peaks, fan_value=DEFAULT_FAN_VALUE):
|
|||
if t_delta >= MIN_HASH_TIME_DELTA and t_delta <= MAX_HASH_TIME_DELTA:
|
||||
h = hashlib.sha1(
|
||||
"%s|%s|%s" % (str(freq1), str(freq2), str(t_delta)))
|
||||
yield (h.hexdigest()[0:20], t1)
|
||||
yield (h.hexdigest()[0:FINGERPRINT_REDUCTION], t1)
|
||||
|
||||
# ensure we don't repeat hashing
|
||||
fingerprinted.add((i, i + j))
|
||||
|
|
Loading…
Reference in a new issue