Added annotations for different fingerprinting parameters

2024-11-23 19:19:53 +00:00 · 2014-07-03 00:43:40 -04:00 · 2014-07-03 00:43:40 -04:00 · 60d9d6758e
commit 60d9d6758e
parent a779ca4cf9
1 changed files with 48 additions and 7 deletions
--- a/dejavu/fingerprint.py
+++ b/dejavu/fingerprint.py
@ -7,20 +7,60 @@ from scipy.ndimage.morphology import (generate_binary_structure,
 import hashlib
 from operator import itemgetter
 IDX_FREQ_I = 0
 IDX_TIME_J = 1
 ######################################################################
 # Sampling rate, related to the Nyquist conditions, which affects
 # the range frequencies we can detect. 
 DEFAULT_FS = 44100 
 ######################################################################
 # Size of the FFT window, affects frequency granularity
 DEFAULT_WINDOW_SIZE = 4096
 ######################################################################
 # Ratio by which each sequential window overlaps the last and the
 # next window. Higher overlap will allow a higher granularity of offset
 # matching, but potentially more fingerprints.
 DEFAULT_OVERLAP_RATIO = 0.5  
 ######################################################################
 # Degree to which a fingerprint can be paired with its neighbors --
 # higher will cause more fingerprints, but potentially better accuracy. 
 DEFAULT_FAN_VALUE = 15 
 ######################################################################
 # Minimum amplitude in spectrogram in order to be considered a peak. 
 # This can be raised to reduce number of fingerprints, but can negatively
 # affect accuracy.
 DEFAULT_AMP_MIN = 10
 ######################################################################
 # Number of cells around an amplitude peak in the spectrogram in order
 # for Dejavu to consider it a spectral peak. Higher values mean less
 # fingerprints and faster matching, but can potentially affect accuracy. 
 PEAK_NEIGHBORHOOD_SIZE = 20
 ######################################################################
 # Thresholds on how close or far fingerprints can be in time in order 
 # to be paired as a fingerprint. If your max is too low, higher values of
 # DEFAULT_FAN_VALUE may not perform as expected. 
 MIN_HASH_TIME_DELTA = 0
 MAX_HASH_TIME_DELTA = 200
 ######################################################################
 # If True, will sort peaks temporally for fingerprinting;
 # not sorting will cut down number of fingerprints, but potentially
 # affect performance.
 PEAK_SORT = True
 ######################################################################
 # Number of bits to throw away from the front of the SHA1 hash in the 
 # fingerprint calculation. The more you throw away, the less storage, but
 # potentially higher collisions and misclassifications when identifying songs.
 FINGERPRINT_REDUCTION = 20
 def fingerprint(channel_samples, Fs=DEFAULT_FS,
                wsize=DEFAULT_WINDOW_SIZE,
                wratio=DEFAULT_OVERLAP_RATIO,
@ -98,7 +138,8 @@ def generate_hashes(peaks, fan_value=DEFAULT_FAN_VALUE):
    """
    fingerprinted = set()  # to avoid rehashing same pairs
-    peaks.sort(key=itemgetter(1))
+    if PEAK_SORT:
        peaks.sort(key=itemgetter(1))
    for i in range(len(peaks)):
        for j in range(1, fan_value):
@ -114,7 +155,7 @@ def generate_hashes(peaks, fan_value=DEFAULT_FAN_VALUE):
                if t_delta >= MIN_HASH_TIME_DELTA and t_delta <= MAX_HASH_TIME_DELTA:
                    h = hashlib.sha1(
                        "%s|%s|%s" % (str(freq1), str(freq2), str(t_delta)))
-                    yield (h.hexdigest()[0:20], t1)
+                    yield (h.hexdigest()[0:FINGERPRINT_REDUCTION], t1)
                # ensure we don't repeat hashing
                fingerprinted.add((i, i + j))