Make statsd metric sending resilient.

This commit is contained in:
Dave Shawley 2021-03-07 14:37:24 -05:00
parent ff6f13591c
commit ed6e479e2a
No known key found for this signature in database
GPG key ID: 44A9C9992CCFAB82
2 changed files with 61 additions and 6 deletions

View file

@ -16,6 +16,7 @@ class Processor(asyncio.Protocol):
self.transport = None
self._queue = asyncio.Queue()
self._failed_sends = []
async def run(self):
self.running = True
@ -84,10 +85,35 @@ class Processor(asyncio.Protocol):
self.host, self.port, error)
async def _process_metric(self):
try:
metric = await asyncio.wait_for(self._queue.get(), 0.1)
except asyncio.TimeoutError:
return # nothing to do
processing_failed_send = False
if self._failed_sends:
self.logger.debug('using previous send attempt')
metric = self._failed_sends[0]
processing_failed_send = True
else:
self.transport.write(metric)
self._queue.task_done()
try:
metric = await asyncio.wait_for(self._queue.get(), 0.1)
self.logger.debug('received %r from queue', metric)
except asyncio.TimeoutError:
return
else:
# Since we `await`d the state of the transport may have
# changed. Sending on the closed transport won't return
# an error since the send is async. We can catch the
# problem here though.
if self.transport.is_closing():
self.logger.debug('preventing send on closed transport')
self._failed_sends.append(metric)
return
self.transport.write(metric)
if self.transport.is_closing():
# Writing to a transport does not raise exceptions, it
# will close the transport if a low-level error occurs.
self.logger.debug('transport closed by writing')
else:
self.logger.debug('sent %r to statsd', metric)
if processing_failed_send:
self._failed_sends.pop(0)
else:
self._queue.task_done()

View file

@ -181,3 +181,32 @@ class MetricProcessingTests(ProcessorTestCase):
await self.wait_for(self.statsd_server.message_received.acquire())
await self.wait_for(self.statsd_server.message_received.acquire())
await self.wait_for(self.statsd_server.message_received.acquire())
async def test_metrics_sent_while_disconnected_are_queued(self):
self.statsd_server.close()
await self.statsd_server.wait_closed()
for value in range(50):
self.processor.inject_metric('counter', value, 'c')
asyncio.create_task(self.statsd_server.run())
await self.wait_for(self.statsd_server.client_connected.acquire())
for value in range(50):
await self.wait_for(self.statsd_server.message_received.acquire())
self.assertEqual(f'counter:{value}|c'.encode(),
self.statsd_server.metrics.pop(0))
async def test_socket_closure_while_processing_failed_event(self):
state = {'first_time': True}
real_process_metric = self.processor._process_metric
async def fake_process_metric():
if state['first_time']:
self.processor._failed_sends.append(b'counter:1|c\n')
self.processor.transport.close()
state['first_time'] = False
return await real_process_metric()
self.processor._process_metric = fake_process_metric
await self.wait_for(self.statsd_server.message_received.acquire())