mirror of
https://github.com/sprockets/sprockets-influxdb.git
synced 2024-11-15 03:00:24 +00:00
Submit measurements one at a time for a rejected batch
Better edge case exception handling
This commit is contained in:
parent
1affb16eb6
commit
ead27c3942
4 changed files with 124 additions and 32 deletions
|
@ -3,6 +3,10 @@
|
|||
Release History
|
||||
===============
|
||||
|
||||
`1.1.0`_ (23 Sep 2016)
|
||||
----------------------
|
||||
- Submit measurements one at a time for a rejected batch, logging error responses
|
||||
|
||||
`1.0.7`_ (14 Sep 2016)
|
||||
----------------------
|
||||
- Have a default content length for responses without one
|
||||
|
@ -24,7 +28,6 @@ Release History
|
|||
- Change the precision to second precision, per the InfluxDB docs (use the most
|
||||
coarse precision for better compression)
|
||||
|
||||
|
||||
`1.0.3`_ (13 Sep 2016)
|
||||
----------------------
|
||||
- Add a response ``content_length`` field, an ``accept`` tag (if set in request
|
||||
|
@ -42,6 +45,8 @@ Release History
|
|||
----------------------
|
||||
- Initial release
|
||||
|
||||
.. _Next Release: https://github.com/sprockets/sprockets-influxdb/compare/1.1.0...master
|
||||
.. _1.1.0: https://github.com/sprockets/sprockets-influxdb/compare/1.0.7...1.1.0
|
||||
.. _1.0.7: https://github.com/sprockets/sprockets-influxdb/compare/1.0.6...1.0.7
|
||||
.. _1.0.6: https://github.com/sprockets/sprockets-influxdb/compare/1.0.5...1.0.6
|
||||
.. _1.0.5: https://github.com/sprockets/sprockets-influxdb/compare/1.0.4...1.0.5
|
||||
|
@ -50,4 +55,3 @@ Release History
|
|||
.. _1.0.2: https://github.com/sprockets/sprockets-influxdb/compare/1.0.1...1.0.2
|
||||
.. _1.0.1: https://github.com/sprockets/sprockets-influxdb/compare/1.0.0...1.0.1
|
||||
.. _1.0.0: https://github.com/sprockets/sprockets-influxdb/compare/0.0.0...1.0.0
|
||||
.. _Next Release: https://github.com/sprockets/sprockets-influxdb/compare/1.0.7...master
|
||||
|
|
|
@ -11,3 +11,4 @@ exclude = env,build
|
|||
cover-branches = 1
|
||||
cover-erase = 1
|
||||
cover-package = sprockets_influxdb
|
||||
verbose = 1
|
||||
|
|
|
@ -8,8 +8,11 @@ RequestHandler mixin.
|
|||
import contextlib
|
||||
import logging
|
||||
import os
|
||||
import select
|
||||
import socket
|
||||
import ssl
|
||||
import time
|
||||
import uuid
|
||||
|
||||
try:
|
||||
from tornado import concurrent, httpclient, ioloop
|
||||
|
@ -17,7 +20,7 @@ except ImportError: # pragma: no cover
|
|||
logging.critical('Could not import Tornado')
|
||||
concurrent, httpclient, ioloop = None, None, None
|
||||
|
||||
version_info = (1, 0, 7)
|
||||
version_info = (1, 1, 0)
|
||||
__version__ = '.'.join(str(v) for v in version_info)
|
||||
__all__ = ['__version__', 'version_info', 'add_measurement', 'flush',
|
||||
'install', 'shutdown', 'Measurement']
|
||||
|
@ -387,21 +390,27 @@ def _futures_wait(wait_future, futures):
|
|||
global _writing
|
||||
|
||||
remaining = []
|
||||
for (future, database, measurements) in futures:
|
||||
for (future, batch, database, measurements) in futures:
|
||||
|
||||
# If the future hasn't completed, add it to the remaining stack
|
||||
if not future.done():
|
||||
remaining.append((future, database, measurements))
|
||||
remaining.append((future, batch, database, measurements))
|
||||
continue
|
||||
|
||||
# Get the result of the HTTP request, processing any errors
|
||||
try:
|
||||
result = future.result()
|
||||
except (httpclient.HTTPError, OSError, socket.error) as error:
|
||||
_on_request_error(error, database, measurements)
|
||||
error = future.exception()
|
||||
if isinstance(error, httpclient.HTTPError):
|
||||
if error.code == 400:
|
||||
_write_error_batch(batch, database, measurements)
|
||||
elif error.code >= 500:
|
||||
_on_5xx_error(batch, error, database, measurements)
|
||||
else:
|
||||
if result.code >= 400:
|
||||
_on_request_error(result.code, database, measurements)
|
||||
LOGGER.error('Error submitting %s batch %s to InfluxDB (%s): '
|
||||
'%s', database, batch, error.code,
|
||||
error.response.body)
|
||||
elif isinstance(error, (TimeoutError, OSError, socket.error,
|
||||
select.error, ssl.socket_error)):
|
||||
_on_5xx_error(batch, error, database, measurements)
|
||||
|
||||
# If there are futures that remain, try again in 100ms.
|
||||
if remaining:
|
||||
|
@ -427,6 +436,21 @@ def _maybe_warn_about_buffer_size():
|
|||
LOGGER.warning('InfluxDB measurement buffer has %i entries', count)
|
||||
|
||||
|
||||
def _on_5xx_error(batch, error, database, measurements):
|
||||
"""Handle a batch submission error, logging the problem and adding the
|
||||
measurements back to the stack.
|
||||
|
||||
:param str batch: The batch ID
|
||||
:param mixed error: The error that was returned
|
||||
:param str database: The database the submission failed for
|
||||
:param list measurements: The measurements to add back to the stack
|
||||
|
||||
"""
|
||||
LOGGER.info('Appending %s measurements to stack due to batch %s %r',
|
||||
database, batch, error)
|
||||
_measurements[database] = _measurements[database] + measurements
|
||||
|
||||
|
||||
def _on_periodic_callback():
|
||||
"""Invoked periodically to ensure that metrics that have been collected
|
||||
are submitted to InfluxDB. If metrics are still being written when it
|
||||
|
@ -446,19 +470,6 @@ def _on_periodic_callback():
|
|||
return _periodic_future
|
||||
|
||||
|
||||
def _on_request_error(error, database, measurements):
|
||||
"""Handle a batch submission error, logging the problem and adding the
|
||||
measurements back to the stack.
|
||||
|
||||
:param mixed error: The error that was returned
|
||||
:param str database: The database the submission failed for
|
||||
:param list measurements: The measurements to add back to the stack
|
||||
|
||||
"""
|
||||
LOGGER.error('Error submitting batch to %s: %r', database, error)
|
||||
_measurements[database] = measurements + _measurements[database]
|
||||
|
||||
|
||||
def _pending_measurements():
|
||||
"""Return the number of measurements that have not been submitted to
|
||||
InfluxDB.
|
||||
|
@ -512,7 +523,7 @@ def _write_measurements():
|
|||
url, method='POST', body='\n'.join(measurements).encode('utf-8'))
|
||||
|
||||
# Keep track of each request in our future stack
|
||||
futures.append((request, database, measurements))
|
||||
futures.append((request, str(uuid.uuid4()), database, measurements))
|
||||
|
||||
# Start the wait cycle for all the requests to complete
|
||||
_writing = True
|
||||
|
@ -521,6 +532,87 @@ def _write_measurements():
|
|||
return future
|
||||
|
||||
|
||||
def _write_error_batch(batch, database, measurements):
|
||||
"""Invoked when a batch submission fails, this method will submit one
|
||||
measurement to InfluxDB. It then adds a timeout to the IOLoop which will
|
||||
invoke :meth:`_write_error_batch_wait` which will evaluate the result and
|
||||
then determine what to do next.
|
||||
|
||||
:param str batch: The batch ID for correlation purposes
|
||||
:param str database: The database name for the measurements
|
||||
:param list measurements: The measurements that failed to write as a batch
|
||||
|
||||
"""
|
||||
if not measurements:
|
||||
LOGGER.info('All %s measurements from batch %s processed',
|
||||
database, batch)
|
||||
return
|
||||
|
||||
LOGGER.debug('Processing batch %s for %s by measurement, %i left',
|
||||
batch, database, len(measurements))
|
||||
|
||||
url = '{}?db={}&precision=ms'.format(_base_url, database)
|
||||
|
||||
measurement = measurements.pop(0)
|
||||
|
||||
# Create the request future
|
||||
future = _http_client.fetch(
|
||||
url, method='POST', body=measurement.encode('utf-8'))
|
||||
|
||||
# Check in 25ms to see if it's done
|
||||
_io_loop.add_timeout(_io_loop.time() + 0.025, _write_error_batch_wait,
|
||||
future, batch, database, measurement, measurements)
|
||||
|
||||
|
||||
def _write_error_batch_wait(future, batch, database, measurement, measurements):
|
||||
"""Invoked by the IOLoop, this method checks if the HTTP request future
|
||||
created by :meth:`_write_error_batch` is done. If it's done it will
|
||||
evaluate the result, logging any error and moving on to the next
|
||||
measurement. If there are no measurements left in the `measurements`
|
||||
argument, it will consider the batch complete.
|
||||
|
||||
|
||||
:param tornado.concurrent.Future future: The AsyncHTTPClient request future
|
||||
:param str batch: The batch ID
|
||||
:param str database: The database name for the measurements
|
||||
:param str measurement: The measurement the future is for
|
||||
:param list measurements: The measurements that failed to write as a batch
|
||||
|
||||
"""
|
||||
if not future.done():
|
||||
_io_loop.add_timeout(_io_loop.time() + 0.025, _write_error_batch_wait,
|
||||
future, batch, database, measurement,
|
||||
measurements)
|
||||
return
|
||||
|
||||
error = future.exception()
|
||||
if isinstance(error, httpclient.HTTPError):
|
||||
if error.code == 400:
|
||||
LOGGER.error('Error writing %s measurement from batch %s to '
|
||||
'InfluxDB (%s): %s', database, batch, error.code,
|
||||
error.response.body)
|
||||
LOGGER.info('Bad %s measurement from batch %s: %s',
|
||||
database, batch, measurement)
|
||||
else:
|
||||
LOGGER.error('Error submitting individual metric for %s from batch '
|
||||
'%s to InfluxDB (%s): %s', database, batch, error.code)
|
||||
measurements = measurements + [measurement]
|
||||
elif isinstance(error, (TimeoutError, OSError, socket.error,
|
||||
select.error, ssl.socket_error)):
|
||||
LOGGER.error('Error submitting individual metric for %s from batch '
|
||||
'%s to InfluxDB (%s)', database, batch, error)
|
||||
_write_error_batch(batch, database, measurements + [measurement])
|
||||
measurements = measurements + [measurement]
|
||||
|
||||
if not measurements:
|
||||
LOGGER.info('All %s measurements from batch %s processed',
|
||||
database, batch)
|
||||
return
|
||||
|
||||
# Continue writing measurements
|
||||
_write_error_batch(batch, database, measurements)
|
||||
|
||||
|
||||
class Measurement(object):
|
||||
"""The :class:`Measurement` class represents what will become a single row
|
||||
in an InfluxDB database. Measurements are added to InfluxDB via the
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
import base64
|
||||
import random
|
||||
import mock
|
||||
import time
|
||||
import uuid
|
||||
|
||||
from tornado import concurrent, gen, httpclient
|
||||
|
@ -110,11 +109,7 @@ class MeasurementTestCase(base.AsyncServerTestCase):
|
|||
with mock.patch('tornado.httpclient.AsyncHTTPClient.fetch') as fetch:
|
||||
future = concurrent.Future()
|
||||
fetch.return_value = future
|
||||
request = httpclient.HTTPRequest('http://localhost/write?db')
|
||||
future.set_result(
|
||||
httpclient.HTTPResponse(
|
||||
request, 599, error=OSError(),
|
||||
request_time=time.time() - request.start_time))
|
||||
future.set_exception(httpclient.HTTPError(599, 'TestError'))
|
||||
self.flush()
|
||||
self.assertEqual(influxdb._pending_measurements(), 1)
|
||||
self.flush()
|
||||
|
|
Loading…
Reference in a new issue