mirror of
https://github.com/sprockets/sprockets-influxdb.git
synced 2024-11-15 03:00:24 +00:00
602 lines
19 KiB
Python
602 lines
19 KiB
Python
"""
|
|
Sprockets InfluxDB
|
|
==================
|
|
`sprockets_influxdb` includes both a buffering InfluxDB client and a Tornado
|
|
RequestHandler mixin.
|
|
|
|
"""
|
|
import contextlib
|
|
import logging
|
|
import os
|
|
import socket
|
|
import time
|
|
|
|
try:
|
|
from tornado import concurrent, httpclient, ioloop
|
|
except ImportError: # pragma: no cover
|
|
logging.critical('Could not import Tornado')
|
|
concurrent, httpclient, ioloop = None, None, None
|
|
|
|
version_info = (1, 0, 0)
|
|
__version__ = '.'.join(str(v) for v in version_info)
|
|
__all__ = ['__version__', 'version_info', 'add_measurement', 'flush',
|
|
'install', 'shutdown', 'Measurement']
|
|
|
|
LOGGER = logging.getLogger(__name__)
|
|
|
|
REQUEST_DATABASE = 'sprockets_influxdb.database'
|
|
USER_AGENT = 'sprockets-influxdb/v{}'.format(__version__)
|
|
|
|
_base_tags = {}
|
|
_base_url = 'http://localhost:8086/write'
|
|
_credentials = None, None
|
|
_dirty = False
|
|
_http_client = None
|
|
_installed = False
|
|
_io_loop = None
|
|
_last_warning = None
|
|
_measurements = {}
|
|
_max_batch_size = 5000
|
|
_max_clients = 10
|
|
_periodic_callback = None
|
|
_periodic_future = None
|
|
_stopping = False
|
|
_warn_threshold = 5000
|
|
_writing = False
|
|
|
|
|
|
class InfluxDBMixin(object):
|
|
"""Mixin that automatically submits per-request measurements to InfluxDB
|
|
with the request duration.
|
|
|
|
The measurements will automatically add the following tags:
|
|
|
|
- Request `handler`
|
|
- Request `endpoint` (if enabled via a named URL)
|
|
- Request `method`
|
|
- Request `correlation_id` (if set)
|
|
- Response `status_code`
|
|
|
|
To add additional tags and fields, use the
|
|
:meth:`~sprockets_influxdb.Measurement.set_field`,
|
|
:meth:`~sprockets_influxdb.Measurement.set_tag`,
|
|
:meth:`~sprockets_influxdb.Measurement.set_tags`, and
|
|
:meth:`~sprockets_influxdb.Measurement.timer` methods of the
|
|
`influxdb` attribute of the `RequestHandler`.
|
|
|
|
"""
|
|
def __init__(self, application, request, **kwargs):
|
|
self.application = application # Set this here for reverse_url
|
|
self.__metrics = []
|
|
handler = '{}.{}'.format(self.__module__, self.__class__.__name__)
|
|
|
|
self.influxdb = Measurement(
|
|
application.settings[REQUEST_DATABASE],
|
|
application.settings.get('service', 'request'))
|
|
self.influxdb.set_tags({'handler': handler, 'method': request.method})
|
|
try:
|
|
self.influxdb.set_tag('endpoint', self.reverse_url(handler))
|
|
except KeyError:
|
|
pass
|
|
|
|
# Call to super().__init__() needs to be *AFTER* we create our
|
|
# properties since it calls initialize() which may want to call
|
|
# methods like ``set_metric_tag``
|
|
super(InfluxDBMixin, self).__init__(application, request, **kwargs)
|
|
|
|
def on_finish(self):
|
|
super(InfluxDBMixin, self).on_finish()
|
|
if hasattr(self, 'correlation_id'):
|
|
self.influxdb.set_tag('correlation_id', self.correlation_id)
|
|
self.influxdb.set_tag('status_code', self._status_code)
|
|
self.influxdb.set_field('duration', self.request.request_time())
|
|
add_measurement(self.influxdb)
|
|
|
|
|
|
def add_measurement(measurement):
|
|
"""Add measurement data to the stack of measurements to submit to InfluxDB
|
|
|
|
:param measurement: The measurement to add
|
|
:type: measurement: sprockets.clients.influxdb.client.Measurement
|
|
|
|
"""
|
|
if _stopping:
|
|
LOGGER.warning('Discarding measurement for %s while stopping',
|
|
measurement.database)
|
|
return
|
|
|
|
if not measurement.fields:
|
|
raise ValueError('Measurement does not contain a field')
|
|
|
|
if measurement.database not in _measurements:
|
|
_measurements[measurement.database] = []
|
|
|
|
tags = ','.join(['{}={}'.format(k, v)
|
|
for k, v in measurement.tags.items()])
|
|
fields = ' '.join(['{}={}'.format(k, v)
|
|
for k, v in measurement.fields.items()])
|
|
|
|
LOGGER.debug('Appending measurement to %s', measurement.database)
|
|
_measurements[measurement.database].append(
|
|
'{},{} {} {:d}'.format(
|
|
measurement.name, tags, fields, int(time.time() * 1000000000)))
|
|
|
|
_maybe_warn_about_buffer_size()
|
|
|
|
|
|
def flush():
|
|
"""Flush all pending measurements to InfluxDB
|
|
|
|
:rtype: :cls:`~tornado.concurrent.Future`
|
|
|
|
"""
|
|
LOGGER.debug('Flushing')
|
|
flush_future = concurrent.TracebackFuture()
|
|
if _periodic_future and not _periodic_future.done():
|
|
LOGGER.debug('Waiting on _periodic_future instead')
|
|
write_future = _periodic_future
|
|
else:
|
|
write_future = _write_measurements()
|
|
_flush_wait(flush_future, write_future)
|
|
return flush_future
|
|
|
|
|
|
def install(**kwargs):
|
|
"""Call this to install/setup the InfluxDB client collector
|
|
|
|
:param kwargs: keyword parameters to pass to the
|
|
:class:`InfluxDBCollector` initializer.
|
|
:returns: :data:`True` if the client was installed by this call
|
|
and :data:`False` otherwise.
|
|
|
|
Optional configuration values:
|
|
|
|
- **url** The InfluxDB API URL. If URL is not specified, the
|
|
``INFLUX_SCHEME``, ``INFLUX_HOST`` and ``INFLUX_PORT`` environment
|
|
variables will be used to construct the base URL.
|
|
- **io_loop** A :class:`~tornado.ioloop.IOLoop` to use
|
|
- **submission_interval** How often to submit metric batches in
|
|
milliseconds. Default: ``5000``
|
|
- **max_batch_size** The number of measurements to be submitted in a
|
|
single HTTP request. Default: ``1000``
|
|
- **tags** Default tags that are to be submitted with each metric.
|
|
- **auth_username** A username to use for InfluxDB authentication
|
|
- **auth_password** A password to use for InfluxDB authentication
|
|
- **curl_client** If specified, use
|
|
|
|
If ``auth_password`` is specified as an environment variable, it will be
|
|
masked in the Python process.
|
|
|
|
:param dict kwargs: Keyword Arguments
|
|
:rtype: bool
|
|
|
|
"""
|
|
global _base_tags, _base_url, _credentials, _installed, _io_loop, \
|
|
_max_batch_size, _max_clients, _periodic_callback
|
|
|
|
if _installed:
|
|
LOGGER.warning('InfluxDB client already installed')
|
|
return False
|
|
|
|
_base_url = kwargs.get('url', '{}://{}:{}/write'.format(
|
|
os.environ.get('INFLUX_SCHEME', 'http'),
|
|
os.environ.get('INFLUX_HOST', 'localhost'),
|
|
os.environ.get('INFLUX_PORT', 8086)))
|
|
|
|
_credentials = (kwargs.get('auth_username',
|
|
os.environ.get('INFLUX_USER', None)),
|
|
kwargs.get('auth_password',
|
|
os.environ.get('INFLUX_PASSWORD', None)))
|
|
|
|
# Don't leave the environment variable out there with the password
|
|
if os.environ.get('INFLUX_PASSWORD'):
|
|
os.environ['INFLUX_PASSWORD'] = \
|
|
'X' * len(os.environ['INFLUX_PASSWORD'])
|
|
|
|
# Submission related values
|
|
_io_loop = kwargs.get('io_loop', ioloop.IOLoop.current())
|
|
_max_batch_size = kwargs.get('max_batch_size', 1000)
|
|
_max_clients = kwargs.get('max_clients', 10)
|
|
_periodic_callback = ioloop.PeriodicCallback(
|
|
_on_periodic_callback, kwargs.get('submission_interval', 5000),
|
|
_io_loop)
|
|
|
|
# Set the base tags
|
|
_base_tags.setdefault('hostname', socket.gethostname())
|
|
if os.environ.get('ENVIRONMENT'):
|
|
_base_tags.setdefault('environment', os.environ['ENVIRONMENT'])
|
|
if os.environ.get('SERVICE'):
|
|
_base_tags.setdefault('service', os.environ['SERVICE'])
|
|
_base_tags.update(kwargs.get('tags', {}))
|
|
|
|
# If specified, use CurlAsyncHTTPClient
|
|
if kwargs.get('curl_client'):
|
|
httpclient.AsyncHTTPClient.configure(
|
|
'tornado.curl_httpclient.CurlAsyncHTTPClient')
|
|
|
|
# Start the periodic callback on IOLoop start
|
|
_io_loop.add_callback(_periodic_callback.start)
|
|
|
|
# Don't let this run multiple times
|
|
_installed = True
|
|
|
|
return True
|
|
|
|
|
|
def set_auth_credentials(username, password):
|
|
"""Override the default authentication credentials obtained from the
|
|
environment variable configuration.
|
|
|
|
:param str username: The username to use
|
|
:param str password: The password to use
|
|
|
|
"""
|
|
global _credentials, _dirty
|
|
|
|
LOGGER.debug('Setting authentication credentials')
|
|
_credentials = username, password
|
|
_dirty = True
|
|
|
|
|
|
def set_base_url(url):
|
|
"""Override the default base URL value created from the environment
|
|
variable configuration.
|
|
|
|
:param str url: The base URL to use when submitting measurements
|
|
|
|
"""
|
|
global _base_url, _dirty
|
|
|
|
LOGGER.debug('Setting base URL to %s', url)
|
|
_base_url = url
|
|
_dirty = True
|
|
|
|
|
|
def set_io_loop(io_loop):
|
|
"""Override the use of the default IOLoop.
|
|
|
|
:param tornado.ioloop.IOLoop io_loop: The IOLoop to use
|
|
:raises: ValueError
|
|
|
|
"""
|
|
global _dirty, _io_loop
|
|
|
|
if not isinstance(io_loop, ioloop.IOLoop):
|
|
raise ValueError('Invalid io_loop value')
|
|
|
|
LOGGER.debug('Overriding the default IOLoop, using %r', io_loop)
|
|
_dirty = True
|
|
_io_loop = io_loop
|
|
|
|
|
|
def set_max_batch_size(limit):
|
|
"""Set a limit to the number of measurements that are submitted in
|
|
a single batch that is submitted per databases.
|
|
|
|
:param int limit: The maximum number of measurements per batch
|
|
|
|
|
|
"""
|
|
global _max_batch_size
|
|
|
|
LOGGER.debug('Setting maximum batch size to %i', limit)
|
|
_max_batch_size = limit
|
|
|
|
|
|
def set_max_clients(limit):
|
|
"""Set the maximum number of simultaneous batch submission that can execute
|
|
in parallel.
|
|
|
|
:param int limit: The maximum number of simultaneous batch submissions
|
|
|
|
"""
|
|
global _dirty, _max_clients
|
|
|
|
LOGGER.debug('Setting maximum client limit to %i', limit)
|
|
_dirty = True
|
|
_max_clients = limit
|
|
|
|
|
|
def set_submission_interval(seconds):
|
|
"""Override how often to submit measurements to InfluxDB.
|
|
|
|
:param int seconds: How often to wait in seconds
|
|
|
|
"""
|
|
global _periodic_callback
|
|
|
|
LOGGER.debug('Setting submission interval to %s seconds', seconds)
|
|
if _periodic_callback.is_running():
|
|
_periodic_callback.stop()
|
|
_periodic_callback = ioloop.PeriodicCallback(_on_periodic_callback,
|
|
seconds)
|
|
# Start the periodic callback on IOLoop start if it's not already started
|
|
_io_loop.add_callback(_periodic_callback.start)
|
|
|
|
|
|
def shutdown():
|
|
"""Invoke on shutdown of your application to stop the periodic
|
|
callbacks and flush any remaining metrics.
|
|
|
|
Returns a future that is complete when all pending metrics have been
|
|
submitted.
|
|
|
|
:rtype: :class:`~tornado.concurrent.TracebackFuture()`
|
|
|
|
"""
|
|
global _stopping
|
|
|
|
if _stopping:
|
|
LOGGER.warning('Already shutting down')
|
|
return
|
|
|
|
_stopping = True
|
|
if _periodic_callback.is_running():
|
|
_periodic_callback.stop()
|
|
LOGGER.info('Stopped periodic measurement submission and writing current '
|
|
'buffer to InfluxDB')
|
|
return flush()
|
|
|
|
|
|
def _create_http_client():
|
|
"""Create the HTTP client with authentication credentials if required."""
|
|
global _http_client
|
|
|
|
defaults = {'user_agent': USER_AGENT}
|
|
auth_username, auth_password = _credentials
|
|
if auth_username and auth_password:
|
|
defaults['auth_username'] = auth_username
|
|
defaults['auth_password'] = auth_password
|
|
|
|
_http_client = httpclient.AsyncHTTPClient(
|
|
force_instance=True, defaults=defaults, io_loop=_io_loop,
|
|
max_clients=_max_clients)
|
|
|
|
|
|
def _escape_str(value):
|
|
"""Escape the value with InfluxDB's wonderful escaping logic:
|
|
|
|
"Measurement names, tag keys, and tag values must escape any spaces or
|
|
commas using a backslash (\). For example: \ and \,. All tag values are
|
|
stored as strings and should not be surrounded in quotes."
|
|
|
|
:param str value: The value to be escaped
|
|
:rtype: str
|
|
|
|
"""
|
|
return str(value).replace(' ', '\ ').replace(',', '\,')
|
|
|
|
|
|
def _flush_wait(flush_future, write_future):
|
|
"""Pause briefly allowing any pending metric writes to complete before
|
|
shutting down.
|
|
|
|
:param future tornado.concurrent.TracebackFuture: The future to resolve
|
|
when the shutdown is complete.
|
|
|
|
"""
|
|
if write_future.done():
|
|
if not _pending_measurements():
|
|
flush_future.set_result(True)
|
|
return
|
|
else:
|
|
write_future = _write_measurements()
|
|
_io_loop.add_timeout(
|
|
_io_loop.time() + 0.25, _flush_wait, flush_future, write_future)
|
|
|
|
|
|
def _futures_wait(wait_future, futures):
|
|
"""Waits for all futures to be completed. If the futures are not done,
|
|
wait 100ms and then invoke itself via the ioloop and check again. If
|
|
they are done, set a result on `wait_future` indicating the list of
|
|
futures are done.
|
|
|
|
:param wait_future: The future to complete when all `futures` are done
|
|
:type wait_future: tornado.concurrent.Future
|
|
:param list futures: The list of futures to watch for completion
|
|
|
|
"""
|
|
global _writing
|
|
|
|
remaining = []
|
|
for (future, database, measurements) in futures:
|
|
|
|
# If the future hasn't completed, add it to the remaining stack
|
|
if not future.done():
|
|
remaining.append((future, database, measurements))
|
|
continue
|
|
|
|
# Get the result of the HTTP request, processing any errors
|
|
try:
|
|
result = future.result()
|
|
except (httpclient.HTTPError, OSError, socket.error) as error:
|
|
_on_request_error(error, database, measurements)
|
|
else:
|
|
if result.code >= 400:
|
|
_on_request_error(result.code, database, measurements)
|
|
|
|
# If there are futures that remain, try again in 100ms.
|
|
if remaining:
|
|
return _io_loop.add_timeout(
|
|
_io_loop.time() + 0.1, _futures_wait, wait_future, remaining)
|
|
|
|
_writing = False
|
|
wait_future.set_result(True)
|
|
|
|
|
|
def _maybe_warn_about_buffer_size():
|
|
"""Check the buffer size and issue a warning if it's too large and
|
|
a warning has not been issued for more than 60 seconds.
|
|
|
|
"""
|
|
global _last_warning
|
|
|
|
if not _last_warning:
|
|
_last_warning = time.time()
|
|
|
|
count = _pending_measurements()
|
|
if count > _warn_threshold and (time.time() - _last_warning) > 60:
|
|
LOGGER.warning('InfluxDB measurement buffer has %i entries', count)
|
|
|
|
|
|
def _on_periodic_callback():
|
|
"""Invoked periodically to ensure that metrics that have been collected
|
|
are submitted to InfluxDB. If metrics are still being written when it
|
|
is invoked, pass until the next time.
|
|
|
|
:rtype: tornado.concurrent.Future
|
|
|
|
"""
|
|
global _periodic_future
|
|
|
|
if isinstance(_periodic_future, concurrent.Future) \
|
|
and not _periodic_future.done():
|
|
LOGGER.warning('Metrics are currently being written, '
|
|
'skipping write interval')
|
|
return
|
|
_periodic_future = _write_measurements()
|
|
return _periodic_future
|
|
|
|
|
|
def _on_request_error(error, database, measurements):
|
|
"""Handle a batch submission error, logging the problem and adding the
|
|
measurements back to the stack.
|
|
|
|
:param mixed error: The error that was returned
|
|
:param str database: The database the submission failed for
|
|
:param list measurements: The measurements to add back to the stack
|
|
|
|
"""
|
|
LOGGER.error('Error submitting batch to %s: %r', database, error)
|
|
_measurements[database] = measurements + _measurements[database]
|
|
|
|
|
|
def _pending_measurements():
|
|
"""Return the number of measurements that have not been submitted to
|
|
InfluxDB.
|
|
|
|
:rtype: int
|
|
|
|
"""
|
|
return sum([len(_measurements[dbname]) for dbname in _measurements])
|
|
|
|
|
|
def _write_measurements():
|
|
"""Write out all of the metrics in each of the databases,
|
|
returning a future that will indicate all metrics have been written
|
|
when that future is done.
|
|
|
|
:rtype: tornado.concurrent.Future
|
|
|
|
"""
|
|
global _writing
|
|
|
|
future = concurrent.TracebackFuture()
|
|
|
|
if _writing:
|
|
LOGGER.warning('Currently writing measurements, skipping write')
|
|
future.set_result(False)
|
|
elif not _pending_measurements():
|
|
LOGGER.debug('No pending measurements, skipping write')
|
|
future.set_result(True)
|
|
|
|
# Exit early if there's an error condition
|
|
if future.done():
|
|
return future
|
|
|
|
if not _http_client or _dirty:
|
|
_create_http_client()
|
|
|
|
# Keep track of the futures for each batch submission
|
|
futures = []
|
|
|
|
# Submit a batch for each database
|
|
for database in _measurements:
|
|
url = '{}?db={}'.format(_base_url, database)
|
|
|
|
# Get the measurements to submit
|
|
measurements = _measurements[database][:_max_batch_size]
|
|
|
|
# Pop them off the stack of pending measurements
|
|
_measurements[database] = _measurements[database][_max_batch_size:]
|
|
|
|
# Create the request future
|
|
request = _http_client.fetch(
|
|
url, method='POST', body='\n'.join(measurements).encode('utf-8'))
|
|
|
|
# Keep track of each request in our future stack
|
|
futures.append((request, database, measurements))
|
|
|
|
# Start the wait cycle for all the requests to complete
|
|
_writing = True
|
|
_futures_wait(future, futures)
|
|
|
|
return future
|
|
|
|
|
|
class Measurement(object):
|
|
"""The :cls:`Measurement` class represents what will become a single row in
|
|
an InfluxDB database.
|
|
|
|
:param str database: The database name to use when submitting
|
|
:param str name: The measurement name
|
|
|
|
"""
|
|
def __init__(self, database, name):
|
|
self.database = database
|
|
self.name = _escape_str(name)
|
|
self.fields = {}
|
|
self.tags = dict(_base_tags)
|
|
|
|
@contextlib.contextmanager
|
|
def duration(self, name):
|
|
"""Record the time it takes to run an arbitrary code block.
|
|
|
|
:param str name: The field name to record the timing in
|
|
|
|
This method returns a context manager that records the amount
|
|
of time spent inside of the context, adding the timing to the
|
|
measurement.
|
|
|
|
"""
|
|
start = time.time()
|
|
try:
|
|
yield
|
|
finally:
|
|
self.set_field(name, max(time.time(), start) - start)
|
|
|
|
def set_field(self, name, value):
|
|
"""Set the value of a field in the measurement.
|
|
|
|
:param str name: The name of the field to set the value for
|
|
:param int|float value: The value of the field
|
|
:raises: ValueError
|
|
|
|
"""
|
|
if not isinstance(value, int) and not isinstance(value, float):
|
|
raise ValueError('Value must be an integer or float')
|
|
self.fields[_escape_str(name)] = str(value)
|
|
|
|
def set_tag(self, name, value):
|
|
"""Set a tag on the measurement.
|
|
|
|
:param str name: name of the tag to set
|
|
:param str value: value to assign
|
|
|
|
This will overwrite the current value assigned to a tag
|
|
if one exists.
|
|
|
|
"""
|
|
self.tags[_escape_str(name)] = _escape_str(value)
|
|
|
|
def set_tags(self, tags):
|
|
"""Set multiple tags for the measurement.
|
|
|
|
:param dict tags: Tag key/value pairs to assign
|
|
|
|
This will overwrite the current value assigned to a tag
|
|
if one exists with the same name.
|
|
|
|
"""
|
|
for key, value in tags.items():
|
|
self.set_tag(key, value)
|