Added annotations for different fingerprinting parameters

2024-11-23 11:09:52 +00:00 · 2014-07-03 00:43:40 -04:00 · 2014-07-03 00:43:40 -04:00 · 60d9d6758e
commit 60d9d6758e
parent a779ca4cf9
1 changed files with 48 additions and 7 deletions
--- a/dejavu/fingerprint.py
+++ b/dejavu/fingerprint.py
@ -7,20 +7,60 @@ from scipy.ndimage.morphology import (generate_binary_structure,
 import hashlib
 from operator import itemgetter

-
 IDX_FREQ_I = 0
 IDX_TIME_J = 1

+######################################################################
+# Sampling rate, related to the Nyquist conditions, which affects
+# the range frequencies we can detect. 
 DEFAULT_FS = 44100 
+
+######################################################################
+# Size of the FFT window, affects frequency granularity
 DEFAULT_WINDOW_SIZE = 4096
+
+######################################################################
+# Ratio by which each sequential window overlaps the last and the
+# next window. Higher overlap will allow a higher granularity of offset
+# matching, but potentially more fingerprints.
 DEFAULT_OVERLAP_RATIO = 0.5  
+
+######################################################################
+# Degree to which a fingerprint can be paired with its neighbors --
+# higher will cause more fingerprints, but potentially better accuracy. 
 DEFAULT_FAN_VALUE = 15 

+######################################################################
+# Minimum amplitude in spectrogram in order to be considered a peak. 
+# This can be raised to reduce number of fingerprints, but can negatively
+# affect accuracy.
 DEFAULT_AMP_MIN = 10
+
+######################################################################
+# Number of cells around an amplitude peak in the spectrogram in order
+# for Dejavu to consider it a spectral peak. Higher values mean less
+# fingerprints and faster matching, but can potentially affect accuracy. 
 PEAK_NEIGHBORHOOD_SIZE = 20
+
+######################################################################
+# Thresholds on how close or far fingerprints can be in time in order 
+# to be paired as a fingerprint. If your max is too low, higher values of
+# DEFAULT_FAN_VALUE may not perform as expected. 
 MIN_HASH_TIME_DELTA = 0
 MAX_HASH_TIME_DELTA = 200

+######################################################################
+# If True, will sort peaks temporally for fingerprinting;
+# not sorting will cut down number of fingerprints, but potentially
+# affect performance.
+PEAK_SORT = True
+
+######################################################################
+# Number of bits to throw away from the front of the SHA1 hash in the 
+# fingerprint calculation. The more you throw away, the less storage, but
+# potentially higher collisions and misclassifications when identifying songs.
+FINGERPRINT_REDUCTION = 20
+
 def fingerprint(channel_samples, Fs=DEFAULT_FS,
                wsize=DEFAULT_WINDOW_SIZE,
                wratio=DEFAULT_OVERLAP_RATIO,
@ -98,7 +138,8 @@ def generate_hashes(peaks, fan_value=DEFAULT_FAN_VALUE):
    """
    fingerprinted = set()  # to avoid rehashing same pairs
    
-    peaks.sort(key=itemgetter(1))
+    if PEAK_SORT:
+        peaks.sort(key=itemgetter(1))

    for i in range(len(peaks)):
        for j in range(1, fan_value):
@ -114,7 +155,7 @@ def generate_hashes(peaks, fan_value=DEFAULT_FAN_VALUE):
                if t_delta >= MIN_HASH_TIME_DELTA and t_delta <= MAX_HASH_TIME_DELTA:
                    h = hashlib.sha1(
                        "%s|%s|%s" % (str(freq1), str(freq2), str(t_delta)))
-                    yield (h.hexdigest()[0:20], t1)
+                    yield (h.hexdigest()[0:FINGERPRINT_REDUCTION], t1)

                # ensure we don't repeat hashing
                fingerprinted.add((i, i + j))