From f0d62cffee2f1f0c01f92f2b78e12e95248dc027 Mon Sep 17 00:00:00 2001 From: Braxton Date: Fri, 17 Dec 2021 22:06:46 -0700 Subject: [PATCH 1/4] Sampling (#77) initial rough draft --- README.md | 8 ++++++++ logdna/logdna.py | 7 +++++++ logdna/sampling.py | 39 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+) create mode 100644 logdna/sampling.py diff --git a/README.md b/README.md index 5140281..ccd5145 100644 --- a/README.md +++ b/README.md @@ -250,6 +250,14 @@ A custom ingestion endpoint to stream log lines into. List of fields out of `record` object to include in the `meta` object. By default, `args`, `name`, `pathname`, and `lineno` will be included. +##### sampling_instance + +* _Optional_ +* Type: Sampling class instance +* Default: [`Sampling()`](logdna/sampling.py), which sends everything + +Instance of a "sampling class". Used to decide if a log should be sent via a random selection over some distribution. The default sends everything. However, the `UniformSampling` class is included and extension of `Sampling` is welcome. + ### log(line, [options]) #### line diff --git a/logdna/logdna.py b/logdna/logdna.py index 412ae7a..80b3325 100644 --- a/logdna/logdna.py +++ b/logdna/logdna.py @@ -9,6 +9,7 @@ from .configs import defaults from .utils import sanitize_meta, get_ip, normalize_list_option +from .sampling import Sampling class LogDNAHandler(logging.Handler): @@ -77,6 +78,9 @@ def __init__(self, key, options={}): self.setLevel(logging.DEBUG) self.lock = threading.RLock() + # Set up sampling (class instance). Defaults to send everything via base class + self.sampling_instance = options.get('sampling_instance', Sampling()) + def start_flusher(self): if not self.flusher: self.flusher = threading.Timer(self.flush_interval_secs, @@ -178,6 +182,9 @@ def try_request(self): self.exception_flag = True def send_request(self, data): + # If sampling function's send_check fails, drop the log. Naive approach, probably a better place to put this. + if not self.sampling_instance.send_check(): return True + try: response = requests.post(url=self.url, json=data, diff --git a/logdna/sampling.py b/logdna/sampling.py new file mode 100644 index 0000000..e5b4a34 --- /dev/null +++ b/logdna/sampling.py @@ -0,0 +1,39 @@ +import random + +_DEV = False + +class Sampling: + """ + Sampling class boilerplate that returns True no matter what. + + send_check is required. It is the "decision interface". + """ + + def __init__(self): pass + + def send_check(self): + """ + Decide if a log should be sent using a random selection over some distribution. + """ + return True + + +class UniformSampling(Sampling): + """ + Uniform distribution + """ + + def __init__(self, send_ratio=1.0): + """ + send_ratio: percentage of logs to let through. Defaults to always send + """ + if send_ratio > 1.0 or send_ratio < 0.0: + send_ratio = 1.0 # set to 1.0 if out of bounds: [0,1] + self.send_ratio = send_ratio + + def send_check(self): + """ + Binary send decision based on a uniform distribution. + """ + # Send if random number is less than send_ratio + return True if random.uniform(0.0,1.0) < self.send_ratio else False From 46aa7c644ad3ecd01b08fe05c0dbea1fd5bd1c00 Mon Sep 17 00:00:00 2001 From: Braxton Date: Fri, 17 Dec 2021 22:32:49 -0700 Subject: [PATCH 2/4] Sampling Example (#78) added sampling usage example to repo --- README.md | 16 ++++++++++++++++ logdna/__init__.py | 1 + 2 files changed, 17 insertions(+) diff --git a/README.md b/README.md index ccd5145..02d0e98 100644 --- a/README.md +++ b/README.md @@ -258,6 +258,22 @@ List of fields out of `record` object to include in the `meta` object. By defaul Instance of a "sampling class". Used to decide if a log should be sent via a random selection over some distribution. The default sends everything. However, the `UniformSampling` class is included and extension of `Sampling` is welcome. +Example usage to send 75% of your logs +```python +import logging +from logdna import LogDNAHandler, UniformSampling + +log.getLogger('logdna_sample') + +uniform_sampler = UniformSampling(send_ratio=.75) +options = {'hostname':'py_sample_test', 'sampling_instance':uniform_sampler} + +logdna_handler = LogDNAHandler("API_KEY",options) +log.addHandler(logdna_handler) + +log.info("Maybe I'll send, maybe I won't.") +``` + ### log(line, [options]) #### line diff --git a/logdna/__init__.py b/logdna/__init__.py index 27abc1a..f95c5ee 100644 --- a/logdna/__init__.py +++ b/logdna/__init__.py @@ -1,4 +1,5 @@ from .logdna import LogDNAHandler +from .sampling import Sampling, UniformSampling __all__ = ['LogDNAHandler'] # Publish this class to the "logging.handlers" module so that it can be use From a05def018eaa8eda8d44b8e68c8d702d11e63f75 Mon Sep 17 00:00:00 2001 From: Braxton Date: Fri, 17 Dec 2021 22:34:41 -0700 Subject: [PATCH 3/4] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 02d0e98..ef8d73c 100644 --- a/README.md +++ b/README.md @@ -258,7 +258,7 @@ List of fields out of `record` object to include in the `meta` object. By defaul Instance of a "sampling class". Used to decide if a log should be sent via a random selection over some distribution. The default sends everything. However, the `UniformSampling` class is included and extension of `Sampling` is welcome. -Example usage to send 75% of your logs +Example usage to uniformly send 75% of your logs ```python import logging from logdna import LogDNAHandler, UniformSampling From 4d9792437269c9077f7662e2d8e975c4adc6f9fb Mon Sep 17 00:00:00 2001 From: Braxton Date: Fri, 17 Dec 2021 22:35:26 -0700 Subject: [PATCH 4/4] Update sampling.py --- logdna/sampling.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/logdna/sampling.py b/logdna/sampling.py index e5b4a34..739e04b 100644 --- a/logdna/sampling.py +++ b/logdna/sampling.py @@ -1,7 +1,5 @@ import random -_DEV = False - class Sampling: """ Sampling class boilerplate that returns True no matter what.