Limit logging when disconnected.

Instead of logging a warning every time that the connection fails, only log the first 100 of them, then log every 100th time thereafter.
2024-11-24 03:00:18 +00:00 · 2021-05-09 15:48:18 -04:00 · 2021-05-09 15:48:18 -04:00 · ed67689fe2
commit ed67689fe2
parent a48453b70e
3 changed files with 87 additions and 5 deletions
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@ -3,6 +3,7 @@
 - Added :envvar:`STATSD_ENABLED` environment variable to disable the Tornado integration
 - Tornado application mixin automatically installs start/stop hooks if the application
  quacks like a ``sprockets.http.app.Application``.
 - Limit logging when disconnected from statsd
 :tag:`0.0.1 <832f8af7...0.0.1>` (08-Apr-2021)
 ---------------------------------------------
--- a/sprockets_statsd/statsd.py
+++ b/sprockets_statsd/statsd.py
@ -4,6 +4,47 @@ import socket
 import typing
 class ThrottleGuard:
    """Prevent code from executing repeatedly.
    :param threshold: guarding threshold
    This abstraction allows code to execute the first "threshold"
    times and then only once per "threshold" times afterwards.  Use
    it to ensure that log statements are continuously written during
    persistent error conditions.  The goal is to provide regular
    feedback while limiting the amount of log spam.
    The following snippet will log the first 100 failures and then
    once every 100 failures thereafter:
    .. code-block:: python
       executions = 0
       guard = ThrottleGuard(100)
       for _ in range(1000):
           if guard.allow_execution():
               executions += 1
               logging.info('called %s times instead of %s times',
                            executions, guard.counter)
    """
    def __init__(self, threshold: int):
        self.counter = 0
        self.threshold = threshold
    def allow_execution(self) -> bool:
        """Should this execution be allowed?"""
        self.counter += 1
        allow = (self.counter < self.threshold
                 or (self.counter % self.threshold) == 0)
        return allow
    def reset(self) -> None:
        """Reset counter after error has resolved."""
        self.counter = 0
 class AbstractConnector:
    """StatsD connector that does not send metrics or connect.
@ -137,6 +178,7 @@ class Connector(AbstractConnector):
        self.logger = logging.getLogger(__package__).getChild('Connector')
        self.prefix = f'{prefix}.' if prefix else prefix
        self.processor = Processor(host=host, port=port, **kwargs)
        self._enqueue_log_guard = ThrottleGuard(100)
        self._processor_task: typing.Optional[asyncio.Task[None]] = None
    async def start(self) -> None:
@ -174,7 +216,9 @@ class Connector(AbstractConnector):
        payload = f'{self.prefix}{path}:{value}|{type_code}'
        try:
            self.processor.enqueue(payload.encode('utf-8'))
            self._enqueue_log_guard.reset()
        except asyncio.QueueFull:
            if self._enqueue_log_guard.allow_execution():
                self.logger.warning('statsd queue is full, discarding metric')
@ -389,6 +433,7 @@ class Processor:
        self.host = host
        self.port = port
        self._ip_protocol = ip_protocol
        self._connect_log_guard = ThrottleGuard(100)
        self._reconnect_sleep = reconnect_sleep
        self._wait_timeout = wait_timeout
@ -479,14 +524,21 @@ class Processor:
                buffered_data = b''
                if self.protocol is not None:
                    buffered_data = self.protocol.buffered_data
                t, p = await self._create_transport()  # type: ignore[misc]
                transport, self.protocol = t, p
                self.protocol.buffered_data = buffered_data
-                self.logger.info('connection established to %s',
+                self.logger.info(
-                                 transport.get_extra_info('peername'))
+                    'connection established to %s after %s attempts',
                    transport.get_extra_info('peername'),
                    self._connect_log_guard.counter)
                self._connect_log_guard.reset()
            except IOError as error:
-                self.logger.warning('connection to %s:%s failed: %s',
+                if self._connect_log_guard.allow_execution():
-                                    self.host, self.port, error)
+                    self.logger.warning(
                        'connection to %s:%s failed: %s (%s attempts)',
                        self.host, self.port, error,
                        self._connect_log_guard.counter)
                await asyncio.sleep(self._reconnect_sleep)
    async def _process_metric(self) -> None:
--- a/tests/test_processor.py
+++ b/tests/test_processor.py
@ -3,6 +3,7 @@ import logging
 import socket
 import time
 import typing
 import unittest.mock
 import asynctest
@ -203,6 +204,17 @@ class TCPProcessingTests(ProcessorTestCase):
        self.processor.queue.put_nowait(b'counter:1|c')
        await self.wait_for(self.statsd_server.message_received.acquire())
    async def test_that_disconnected_logging_is_throttled(self):
        self.statsd_server.close()
        await self.statsd_server.wait_closed()
        self.processor.logger = unittest.mock.Mock()
        self.processor._connect_log_guard.threshold = 10
        self.processor._reconnect_sleep = 0
        while self.processor._connect_log_guard.counter < (20 + 1):
            await asyncio.sleep(0)
        self.assertLess(self.processor.logger.warning.call_count, 20)
 class UDPProcessingTests(ProcessorTestCase):
    ip_protocol = socket.IPPROTO_UDP
@ -354,6 +366,23 @@ class ConnectorTests(ProcessorTestCase):
            self.assertEqual(f'counters.counter:{value}|c'.encode(),
                             self.statsd_server.metrics.pop(0))
    async def test_that_queue_full_logging_is_throttled(self):
        await self.connector.processor.stop()
        self.connector.logger = unittest.mock.Mock()
        self.connector._enqueue_log_guard.threshold = 10
        # fill up the queue
        for _ in range(self.connector.processor.queue.maxsize):
            self.connector.incr('counter')
        # then overflow it a bunch of times
        overflow_count = self.connector._enqueue_log_guard.threshold * 5
        for value in range(overflow_count):
            self.connector.incr('counter')
        self.assertLess(self.connector.logger.warning.call_count,
                        overflow_count)
 class ConnectorOptionTests(ProcessorTestCase):
    ip_protocol = socket.IPPROTO_TCP