From 9033104fb5f785e8cf60b0df2f1f7aca6d7d8ca0 Mon Sep 17 00:00:00 2001 From: Matthew Craig Date: Mon, 13 Aug 2018 19:58:06 -0500 Subject: [PATCH 01/18] Move calculation of image size into function --- ccdproc/combiner.py | 56 ++++++++++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 24 deletions(-) diff --git a/ccdproc/combiner.py b/ccdproc/combiner.py index 38dcce6d..b9b96900 100644 --- a/ccdproc/combiner.py +++ b/ccdproc/combiner.py @@ -523,6 +523,36 @@ def _calculate_step_sizes(x_size, y_size, num_chunks): return xstep, ystep +def _calculate_size_of_image(ccd, + combine_uncertainty_function): + # If uncertainty_func is given for combine this will create an uncertainty + # even if the originals did not have one. In that case we need to create + # an empty placeholder. + if ccd.uncertainty is None and combine_uncertainty_function is not None: + ccd.uncertainty = StdDevUncertainty(np.zeros(ccd.data.shape)) + + size_of_an_img = ccd.data.nbytes + try: + size_of_an_img += ccd.uncertainty.array.nbytes + # In case uncertainty is None it has no "array" and in case the "array" is + # not a numpy array: + except AttributeError: + pass + # Mask is enforced to be a numpy.array across astropy versions + if ccd.mask is not None: + size_of_an_img += ccd.mask.nbytes + # flags is not necessarily a numpy array so do not fail with an + # AttributeError in case something was set! + # TODO: Flags are not taken into account in Combiner. This number is added + # nevertheless for future compatibility. + try: + size_of_an_img += ccd.flags.nbytes + except AttributeError: + pass + + return size_of_an_img + + def combine(img_list, output_file=None, method='average', weights=None, scale=None, mem_limit=16e9, clip_extrema=False, nlow=1, nhigh=1, @@ -662,12 +692,6 @@ def combine(img_list, output_file=None, # User has provided fits filenames to read from ccd = CCDData.read(img_list[0], **ccdkwargs) - # If uncertainty_func is given for combine this will create an uncertainty - # even if the originals did not have one. In that case we need to create - # an empty placeholder. - if ccd.uncertainty is None and combine_uncertainty_function is not None: - ccd.uncertainty = StdDevUncertainty(np.zeros(ccd.data.shape)) - if dtype is None: dtype = np.float64 @@ -677,24 +701,8 @@ def combine(img_list, output_file=None, if ccd.data.dtype != dtype: ccd.data = ccd.data.astype(dtype) - size_of_an_img = ccd.data.nbytes - try: - size_of_an_img += ccd.uncertainty.array.nbytes - # In case uncertainty is None it has no "array" and in case the "array" is - # not a numpy array: - except AttributeError: - pass - # Mask is enforced to be a numpy.array across astropy versions - if ccd.mask is not None: - size_of_an_img += ccd.mask.nbytes - # flags is not necessarily a numpy array so do not fail with an - # AttributeError in case something was set! - # TODO: Flags are not taken into account in Combiner. This number is added - # nevertheless for future compatibility. - try: - size_of_an_img += ccd.flags.nbytes - except AttributeError: - pass + size_of_an_img = _calculate_size_of_image(ccd, + combine_uncertainty_function) no_of_img = len(img_list) From e9e63308bdcf3363cd063a7c1f56bec967b97c90 Mon Sep 17 00:00:00 2001 From: Matthew Craig Date: Mon, 13 Aug 2018 20:00:03 -0500 Subject: [PATCH 02/18] Add profiling utilities --- ccdproc/tests/run_for_memory_profile.py | 150 ++++ ccdproc/tests/run_profile.ipynb | 1003 +++++++++++++++++++++++ 2 files changed, 1153 insertions(+) create mode 100644 ccdproc/tests/run_for_memory_profile.py create mode 100644 ccdproc/tests/run_profile.ipynb diff --git a/ccdproc/tests/run_for_memory_profile.py b/ccdproc/tests/run_for_memory_profile.py new file mode 100644 index 00000000..ae50ce66 --- /dev/null +++ b/ccdproc/tests/run_for_memory_profile.py @@ -0,0 +1,150 @@ +from argparse import ArgumentParser +from tempfile import TemporaryDirectory +from pathlib import Path +import sys +import gc + +import psutil +from memory_profiler import memory_usage + +import numpy as np +from astropy.io import fits +from astropy.stats import median_absolute_deviation +from astropy.nddata import CCDData + +# This bit of hackery ensures that we can see ccdproc from within +# the test suite +sys.path.append(str(Path().cwd())) +from ccdproc import combine, ImageFileCollection +from ccdproc.combiner import _calculate_size_of_image + +# Do not combine these into one statement. When all references are lost +# to a TemporaryDirectory the directory is automatically deleted. _TMPDIR +# creates a reference that will stick around. +_TMPDIR = TemporaryDirectory() +TMPPATH = Path(_TMPDIR.name) + + +def generate_fits_files(n_images, size=None, seed=1523): + if size is None: + use_size = (2024, 2031) + else: + use_size = (size, size) + + np.random.seed(seed) + + base_name = 'test-combine-{num:03d}.fits' + + for num in range(n_images): + data = np.random.normal(size=use_size) + # Now add some outlying pixels so there is something to clip + n_bad = 50000 + bad_x = np.random.randint(0, high=use_size[0] - 1, size=n_bad) + bad_y = np.random.randint(0, high=use_size[1] - 1, size=n_bad) + data[bad_x, bad_y] = (np.random.choice([-1, 1], size=n_bad) * + (10 + np.random.rand(n_bad))) + hdu = fits.PrimaryHDU(data=np.asarray(data, dtype='float32')) + hdu.header['for_prof'] = 'yes' + hdu.header['bunit'] = 'adu' + path = TMPPATH.resolve() / base_name.format(num=num) + hdu.writeto(path, overwrite=True) + + +def run_with_limit(n_files, sampling_interval, size=None, sigma_clip=False, + combine_method=None, memory_limit=None): + """ + Try opening a bunch of files with a relatively low limit on the number + of open files. + + Parameters + ---------- + + n_files : int + Number of files to combine. + + sampling_interval : float + Time, in seconds, between memory samples. + + size : int, optional + Size of one side of the image (the image is always square). + + sigma_clip : bool, optional + If true, sigma clip the data before combining. + + combine_method : str, optional + Should be one of the combine methods accepted by + ccdproc.combine + + memory_limit : int, optional + Cap on memory use during image combination. + """ + # Do a little input validation + if n_files <= 0: + raise ValueError("Argument 'n' must be a positive integer") + + proc = psutil.Process() + + print('Process ID is: ', proc.pid, flush=True) + ic = ImageFileCollection(TMPPATH) + files = ic.files_filtered(for_prof='yes', include_path=True) + + kwargs = {'method': combine_method} + + if sigma_clip: + kwargs.update( + {'sigma_clip_low_thresh': 5, + 'sigma_clip_high_thresh': 5, + 'sigma_clip_func': np.ma.median, + 'sigma_clip_dev_func': median_absolute_deviation} + ) + + ccd = CCDData.read(files[0]) + expected_img_size = _calculate_size_of_image(ccd, None) + + if memory_limit: + kwargs['mem_limit'] = memory_limit + + pre_mem_use = memory_usage(-1, interval=sampling_interval, timeout=1) + print(pre_mem_use) + mem_use = memory_usage((combine, (files,), kwargs), + interval=sampling_interval, timeout=None) + return mem_use, expected_img_size + + +if __name__ == '__main__': + parser = ArgumentParser() + parser.add_argument('number', type=int, + help='Number of files to combine.') + parser.add_argument('--size', type=int, action='store', + help='Size of one side of image to create. ' + 'All images are square, so only give ' + 'a single number for the size.') + parser.add_argument('--combine-method', '-c', + choices=('average', 'median'), + help='Method to use to combine images.') + parser.add_argument('--memory-limit', type=int, + help='Limit combination to this amount of memory') + parser.add_argument('--sigma-clip', action='store_true', + help='If set, sigma-clip before combining. Clipping ' + 'will be done with high/low limit of 5. ' + 'The central function is the median, the ' + 'deviation is the median_absolute_deviation.') + parser.add_argument('--sampling-freq', type=float, default=0.05, + help='Time, in seconds, between memory samples.') + parser.add_argument('--frequent-gc', action='store_true', + help='If set, perform garbage collection ' + 'much more frequently than the default.') + args = parser.parse_args() + + if args.frequent_gc: + gc.set_threshold(10, 10, 10) + + print("Garbage collection thresholds: ", gc.get_threshold()) + + mem_use = run_with_limit(args.number, args.sampling_freq, + size=args.size, + sigma_clip=args.sigma_clip, + combine_method=args.combine_method, + memory_limit=args.memory_limit) + print('Max memory usage (MB): ', np.max(mem_use)) + print('Baseline memory usage (MB): ', mem_use[0]) diff --git a/ccdproc/tests/run_profile.ipynb b/ccdproc/tests/run_profile.ipynb new file mode 100644 index 00000000..7a25eeb8 --- /dev/null +++ b/ccdproc/tests/run_profile.ipynb @@ -0,0 +1,1003 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import gc\n", + "\n", + "%matplotlib notebook \n", + "from matplotlib import pyplot as plt\n", + "import numpy as np\n", + "\n", + "from run_for_memory_profile import run_with_limit, generate_fits_files\n", + "\n", + "from ccdproc import __version__ as ccdp_version\n", + "from astropy import __version__ as apy_version" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "image_size = 4000\n", + "num_files = 10\n", + "sampling_interval = 0.01 # sec\n", + "memory_limit = 1000000000\n", + "combine_method = 'average'" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "generate_fits_files(num_files, size=image_size)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "runs = {\n", + " combine_method: {\n", + " 'times': [],\n", + " 'memory': [],\n", + " 'image_size': 0.\n", + " }\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Process ID is: 62228\n", + "[477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375, 477.84375]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:astropy:splitting each image into 2 chunks to limit memory usage to 1000000000 bytes.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO: splitting each image into 2 chunks to limit memory usage to 1000000000 bytes. [ccdproc.combiner]\n", + "Process ID is: 62228\n", + "[894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375, 894.99609375]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:astropy:splitting each image into 2 chunks to limit memory usage to 1000000000 bytes.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO: splitting each image into 2 chunks to limit memory usage to 1000000000 bytes. [ccdproc.combiner]\n", + "Process ID is: 62228\n", + "[895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625, 895.06640625]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:astropy:splitting each image into 2 chunks to limit memory usage to 1000000000 bytes.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO: splitting each image into 2 chunks to limit memory usage to 1000000000 bytes. [ccdproc.combiner]\n", + "Process ID is: 62228\n", + "[895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625, 895.140625]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:astropy:splitting each image into 2 chunks to limit memory usage to 1000000000 bytes.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO: splitting each image into 2 chunks to limit memory usage to 1000000000 bytes. [ccdproc.combiner]\n" + ] + } + ], + "source": [ + "n_repetitions = 4\n", + "\n", + "for _ in range(n_repetitions):\n", + " mem_use, img_size = run_with_limit(num_files, sampling_interval, size=image_size, memory_limit=memory_limit, combine_method=combine_method)\n", + " gc.collect()\n", + " runs[combine_method]['times'].append(np.arange(len(mem_use)) * sampling_interval)\n", + " runs[combine_method]['memory'].append(mem_use)\n", + " runs[combine_method]['image_size'] = img_size\n", + " runs[combine_method]['memory_limit'] = memory_limit\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "application/javascript": [ + "/* Put everything inside the global mpl namespace */\n", + "window.mpl = {};\n", + "\n", + "\n", + "mpl.get_websocket_type = function() {\n", + " if (typeof(WebSocket) !== 'undefined') {\n", + " return WebSocket;\n", + " } else if (typeof(MozWebSocket) !== 'undefined') {\n", + " return MozWebSocket;\n", + " } else {\n", + " alert('Your browser does not have WebSocket support.' +\n", + " 'Please try Chrome, Safari or Firefox ≥ 6. ' +\n", + " 'Firefox 4 and 5 are also supported but you ' +\n", + " 'have to enable WebSockets in about:config.');\n", + " };\n", + "}\n", + "\n", + "mpl.figure = function(figure_id, websocket, ondownload, parent_element) {\n", + " this.id = figure_id;\n", + "\n", + " this.ws = websocket;\n", + "\n", + " this.supports_binary = (this.ws.binaryType != undefined);\n", + "\n", + " if (!this.supports_binary) {\n", + " var warnings = document.getElementById(\"mpl-warnings\");\n", + " if (warnings) {\n", + " warnings.style.display = 'block';\n", + " warnings.textContent = (\n", + " \"This browser does not support binary websocket messages. \" +\n", + " \"Performance may be slow.\");\n", + " }\n", + " }\n", + "\n", + " this.imageObj = new Image();\n", + "\n", + " this.context = undefined;\n", + " this.message = undefined;\n", + " this.canvas = undefined;\n", + " this.rubberband_canvas = undefined;\n", + " this.rubberband_context = undefined;\n", + " this.format_dropdown = undefined;\n", + "\n", + " this.image_mode = 'full';\n", + "\n", + " this.root = $('
');\n", + " this._root_extra_style(this.root)\n", + " this.root.attr('style', 'display: inline-block');\n", + "\n", + " $(parent_element).append(this.root);\n", + "\n", + " this._init_header(this);\n", + " this._init_canvas(this);\n", + " this._init_toolbar(this);\n", + "\n", + " var fig = this;\n", + "\n", + " this.waiting = false;\n", + "\n", + " this.ws.onopen = function () {\n", + " fig.send_message(\"supports_binary\", {value: fig.supports_binary});\n", + " fig.send_message(\"send_image_mode\", {});\n", + " if (mpl.ratio != 1) {\n", + " fig.send_message(\"set_dpi_ratio\", {'dpi_ratio': mpl.ratio});\n", + " }\n", + " fig.send_message(\"refresh\", {});\n", + " }\n", + "\n", + " this.imageObj.onload = function() {\n", + " if (fig.image_mode == 'full') {\n", + " // Full images could contain transparency (where diff images\n", + " // almost always do), so we need to clear the canvas so that\n", + " // there is no ghosting.\n", + " fig.context.clearRect(0, 0, fig.canvas.width, fig.canvas.height);\n", + " }\n", + " fig.context.drawImage(fig.imageObj, 0, 0);\n", + " };\n", + "\n", + " this.imageObj.onunload = function() {\n", + " fig.ws.close();\n", + " }\n", + "\n", + " this.ws.onmessage = this._make_on_message_function(this);\n", + "\n", + " this.ondownload = ondownload;\n", + "}\n", + "\n", + "mpl.figure.prototype._init_header = function() {\n", + " var titlebar = $(\n", + " '
');\n", + " var titletext = $(\n", + " '
');\n", + " titlebar.append(titletext)\n", + " this.root.append(titlebar);\n", + " this.header = titletext[0];\n", + "}\n", + "\n", + "\n", + "\n", + "mpl.figure.prototype._canvas_extra_style = function(canvas_div) {\n", + "\n", + "}\n", + "\n", + "\n", + "mpl.figure.prototype._root_extra_style = function(canvas_div) {\n", + "\n", + "}\n", + "\n", + "mpl.figure.prototype._init_canvas = function() {\n", + " var fig = this;\n", + "\n", + " var canvas_div = $('
');\n", + "\n", + " canvas_div.attr('style', 'position: relative; clear: both; outline: 0');\n", + "\n", + " function canvas_keyboard_event(event) {\n", + " return fig.key_event(event, event['data']);\n", + " }\n", + "\n", + " canvas_div.keydown('key_press', canvas_keyboard_event);\n", + " canvas_div.keyup('key_release', canvas_keyboard_event);\n", + " this.canvas_div = canvas_div\n", + " this._canvas_extra_style(canvas_div)\n", + " this.root.append(canvas_div);\n", + "\n", + " var canvas = $('');\n", + " canvas.addClass('mpl-canvas');\n", + " canvas.attr('style', \"left: 0; top: 0; z-index: 0; outline: 0\")\n", + "\n", + " this.canvas = canvas[0];\n", + " this.context = canvas[0].getContext(\"2d\");\n", + "\n", + " var backingStore = this.context.backingStorePixelRatio ||\n", + "\tthis.context.webkitBackingStorePixelRatio ||\n", + "\tthis.context.mozBackingStorePixelRatio ||\n", + "\tthis.context.msBackingStorePixelRatio ||\n", + "\tthis.context.oBackingStorePixelRatio ||\n", + "\tthis.context.backingStorePixelRatio || 1;\n", + "\n", + " mpl.ratio = (window.devicePixelRatio || 1) / backingStore;\n", + "\n", + " var rubberband = $('');\n", + " rubberband.attr('style', \"position: absolute; left: 0; top: 0; z-index: 1;\")\n", + "\n", + " var pass_mouse_events = true;\n", + "\n", + " canvas_div.resizable({\n", + " start: function(event, ui) {\n", + " pass_mouse_events = false;\n", + " },\n", + " resize: function(event, ui) {\n", + " fig.request_resize(ui.size.width, ui.size.height);\n", + " },\n", + " stop: function(event, ui) {\n", + " pass_mouse_events = true;\n", + " fig.request_resize(ui.size.width, ui.size.height);\n", + " },\n", + " });\n", + "\n", + " function mouse_event_fn(event) {\n", + " if (pass_mouse_events)\n", + " return fig.mouse_event(event, event['data']);\n", + " }\n", + "\n", + " rubberband.mousedown('button_press', mouse_event_fn);\n", + " rubberband.mouseup('button_release', mouse_event_fn);\n", + " // Throttle sequential mouse events to 1 every 20ms.\n", + " rubberband.mousemove('motion_notify', mouse_event_fn);\n", + "\n", + " rubberband.mouseenter('figure_enter', mouse_event_fn);\n", + " rubberband.mouseleave('figure_leave', mouse_event_fn);\n", + "\n", + " canvas_div.on(\"wheel\", function (event) {\n", + " event = event.originalEvent;\n", + " event['data'] = 'scroll'\n", + " if (event.deltaY < 0) {\n", + " event.step = 1;\n", + " } else {\n", + " event.step = -1;\n", + " }\n", + " mouse_event_fn(event);\n", + " });\n", + "\n", + " canvas_div.append(canvas);\n", + " canvas_div.append(rubberband);\n", + "\n", + " this.rubberband = rubberband;\n", + " this.rubberband_canvas = rubberband[0];\n", + " this.rubberband_context = rubberband[0].getContext(\"2d\");\n", + " this.rubberband_context.strokeStyle = \"#000000\";\n", + "\n", + " this._resize_canvas = function(width, height) {\n", + " // Keep the size of the canvas, canvas container, and rubber band\n", + " // canvas in synch.\n", + " canvas_div.css('width', width)\n", + " canvas_div.css('height', height)\n", + "\n", + " canvas.attr('width', width * mpl.ratio);\n", + " canvas.attr('height', height * mpl.ratio);\n", + " canvas.attr('style', 'width: ' + width + 'px; height: ' + height + 'px;');\n", + "\n", + " rubberband.attr('width', width);\n", + " rubberband.attr('height', height);\n", + " }\n", + "\n", + " // Set the figure to an initial 600x600px, this will subsequently be updated\n", + " // upon first draw.\n", + " this._resize_canvas(600, 600);\n", + "\n", + " // Disable right mouse context menu.\n", + " $(this.rubberband_canvas).bind(\"contextmenu\",function(e){\n", + " return false;\n", + " });\n", + "\n", + " function set_focus () {\n", + " canvas.focus();\n", + " canvas_div.focus();\n", + " }\n", + "\n", + " window.setTimeout(set_focus, 100);\n", + "}\n", + "\n", + "mpl.figure.prototype._init_toolbar = function() {\n", + " var fig = this;\n", + "\n", + " var nav_element = $('
')\n", + " nav_element.attr('style', 'width: 100%');\n", + " this.root.append(nav_element);\n", + "\n", + " // Define a callback function for later on.\n", + " function toolbar_event(event) {\n", + " return fig.toolbar_button_onclick(event['data']);\n", + " }\n", + " function toolbar_mouse_event(event) {\n", + " return fig.toolbar_button_onmouseover(event['data']);\n", + " }\n", + "\n", + " for(var toolbar_ind in mpl.toolbar_items) {\n", + " var name = mpl.toolbar_items[toolbar_ind][0];\n", + " var tooltip = mpl.toolbar_items[toolbar_ind][1];\n", + " var image = mpl.toolbar_items[toolbar_ind][2];\n", + " var method_name = mpl.toolbar_items[toolbar_ind][3];\n", + "\n", + " if (!name) {\n", + " // put a spacer in here.\n", + " continue;\n", + " }\n", + " var button = $('');\n", - " button.click(method_name, toolbar_event);\n", - " button.mouseover(tooltip, toolbar_mouse_event);\n", - " nav_element.append(button);\n", - " }\n", - "\n", - " // Add the status bar.\n", - " var status_bar = $('');\n", - " nav_element.append(status_bar);\n", - " this.message = status_bar[0];\n", - "\n", - " // Add the close button to the window.\n", - " var buttongrp = $('
');\n", - " var button = $('');\n", - " button.click(function (evt) { fig.handle_close(fig, {}); } );\n", - " button.mouseover('Stop Interaction', toolbar_mouse_event);\n", - " buttongrp.append(button);\n", - " var titlebar = this.root.find($('.ui-dialog-titlebar'));\n", - " titlebar.prepend(buttongrp);\n", - "}\n", - "\n", - "mpl.figure.prototype._root_extra_style = function(el){\n", - " var fig = this\n", - " el.on(\"remove\", function(){\n", - "\tfig.close_ws(fig, {});\n", - " });\n", - "}\n", - "\n", - "mpl.figure.prototype._canvas_extra_style = function(el){\n", - " // this is important to make the div 'focusable\n", - " el.attr('tabindex', 0)\n", - " // reach out to IPython and tell the keyboard manager to turn it's self\n", - " // off when our div gets focus\n", - "\n", - " // location in version 3\n", - " if (IPython.notebook.keyboard_manager) {\n", - " IPython.notebook.keyboard_manager.register_events(el);\n", - " }\n", - " else {\n", - " // location in version 2\n", - " IPython.keyboard_manager.register_events(el);\n", - " }\n", - "\n", - "}\n", - "\n", - "mpl.figure.prototype._key_event_extra = function(event, name) {\n", - " var manager = IPython.notebook.keyboard_manager;\n", - " if (!manager)\n", - " manager = IPython.keyboard_manager;\n", - "\n", - " // Check for shift+enter\n", - " if (event.shiftKey && event.which == 13) {\n", - " this.canvas_div.blur();\n", - " event.shiftKey = false;\n", - " // Send a \"J\" for go to next cell\n", - " event.which = 74;\n", - " event.keyCode = 74;\n", - " manager.command_mode();\n", - " manager.handle_keydown(event);\n", - " }\n", - "}\n", - "\n", - "mpl.figure.prototype.handle_save = function(fig, msg) {\n", - " fig.ondownload(fig, null);\n", - "}\n", - "\n", - "\n", - "mpl.find_output_cell = function(html_output) {\n", - " // Return the cell and output element which can be found *uniquely* in the notebook.\n", - " // Note - this is a bit hacky, but it is done because the \"notebook_saving.Notebook\"\n", - " // IPython event is triggered only after the cells have been serialised, which for\n", - " // our purposes (turning an active figure into a static one), is too late.\n", - " var cells = IPython.notebook.get_cells();\n", - " var ncells = cells.length;\n", - " for (var i=0; i= 3 moved mimebundle to data attribute of output\n", - " data = data.data;\n", - " }\n", - " if (data['text/html'] == html_output) {\n", - " return [cell, data, j];\n", - " }\n", - " }\n", - " }\n", - " }\n", - "}\n", - "\n", - "// Register the function which deals with the matplotlib target/channel.\n", - "// The kernel may be null if the page has been refreshed.\n", - "if (IPython.notebook.kernel != null) {\n", - " IPython.notebook.kernel.comm_manager.register_target('matplotlib', mpl.mpl_figure_comm);\n", - "}\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "for i, data in enumerate(zip(runs[combine_method]['times'], runs[combine_method]['memory'])):\n", - " time, mem_use = data\n", - " plt.plot(time, mem_use, label='Memory use in combine {}'.format(i))\n", + "def run_them(runs, clipping=False):\n", + " for combine_method in runs.keys():\n", + " for _ in range(n_repetitions):\n", + " mem_use, img_size = run_with_limit(num_files, sampling_interval, size=image_size, \n", + " memory_limit=memory_limit, combine_method=combine_method,\n", + " sigma_clip=clipping)\n", + " gc.collect()\n", + " runs[combine_method]['times'].append(np.arange(len(mem_use)) * sampling_interval)\n", + " runs[combine_method]['memory'].append(mem_use)\n", + " runs[combine_method]['image_size'] = img_size\n", + " runs[combine_method]['memory_limit'] = memory_limit\n", + " runs[combine_method]['clipping'] = clipping" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run_them(runs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "styles = ['solid', 'dashed', 'dotted']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(20, 10))\n", + "\n", + "for idx, method in enumerate(runs.keys()):\n", + " style = styles[idx % len(styles)]\n", + " for i, data in enumerate(zip(runs[method]['times'], runs[method]['memory'])):\n", + " time, mem_use = data \n", + " if i == 0:\n", + " label = 'Memory use in {} combine (repeated runs same style)'.format(method)\n", + " alpha = 1.0\n", + " else:\n", + " label = ''\n", + " alpha = 0.4\n", + " plt.plot(time, mem_use, linestyle=style, label=label, alpha=alpha)\n", "\n", "plt.vlines(-40 * sampling_interval, mem_use[0], mem_use[0] + memory_limit/1e6, colors='red', label='Memory use limit')\n", - "plt.vlines(-20 * sampling_interval, mem_use[0], mem_use[0] + img_size/1e6, label='size of one image')\n", + "plt.vlines(-20 * sampling_interval, mem_use[0], mem_use[0] + runs[method]['image_size']/1e6, label='size of one image')\n", "plt.grid()\n", - "plt.legend()" + "clipped = 'ON' if runs[method]['clipping'] else 'OFF'\n", + "plt.title('ccdproc commit {}; {} repetitions per method; sigma_clip {}'.format(commit, n_repetitions, clipped))\n", + "plt.legend()\n", + "plt.savefig('commit_{}_reps_{}_clip_{}.png'.format(commit, n_repetitions, clipped))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Memory profile with sigma clipping" ] }, { @@ -968,7 +183,7 @@ "metadata": {}, "outputs": [], "source": [ - "ccdp_version, apy_version" + "run_them(runs_clip, clipping=True)" ] }, { @@ -976,7 +191,29 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "plt.figure(figsize=(20, 10))\n", + "\n", + "for idx, method in enumerate(runs_clip.keys()):\n", + " style = styles[idx % len(styles)]\n", + " for i, data in enumerate(zip(runs_clip[method]['times'], runs_clip[method]['memory'])):\n", + " time, mem_use = data \n", + " if i == 0:\n", + " label = 'Memory use in {} combine (repeated runs same style)'.format(method)\n", + " alpha = 1.0\n", + " else:\n", + " label = ''\n", + " alpha = 0.4\n", + " plt.plot(time, mem_use, linestyle=style, label=label, alpha=alpha)\n", + "\n", + "plt.vlines(-40 * sampling_interval, mem_use[0], mem_use[0] + memory_limit/1e6, colors='red', label='Memory use limit')\n", + "plt.vlines(-20 * sampling_interval, mem_use[0], mem_use[0] + runs_clip[method]['image_size']/1e6, label='size of one image')\n", + "plt.grid()\n", + "clipped = 'ON' if runs_clip[method]['clipping'] else 'OFF'\n", + "plt.title('ccdproc commit {}; {} repetitions per method; sigma_clip {}'.format(commit, n_repetitions, clipped))\n", + "plt.legend()\n", + "plt.savefig('commit_{}_reps_{}_clip_{}.png'.format(commit, n_repetitions, clipped))" + ] } ], "metadata": { @@ -995,7 +232,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.5" + "version": "3.6.6" } }, "nbformat": 4, From 38b9300bb357bbed8ea1b21730500f377b92b996 Mon Sep 17 00:00:00 2001 From: Matt Craig Date: Tue, 14 Aug 2018 00:07:18 -0500 Subject: [PATCH 05/18] Get the git hash from the install --- ccdproc/tests/run_profile.ipynb | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/ccdproc/tests/run_profile.ipynb b/ccdproc/tests/run_profile.ipynb index 4a56fad8..12ce780a 100644 --- a/ccdproc/tests/run_profile.ipynb +++ b/ccdproc/tests/run_profile.ipynb @@ -15,10 +15,19 @@ "\n", "from run_for_memory_profile import run_with_limit, generate_fits_files\n", "\n", - "from ccdproc import __version__ as ccdp_version\n", + "from ccdproc.version import get_git_devstr\n", "from astropy import __version__ as apy_version" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print('Astropy version: ', apy_version)" + ] + }, { "cell_type": "code", "execution_count": null, @@ -30,7 +39,7 @@ "sampling_interval = 0.01 # sec\n", "memory_limit = 1000000000\n", "\n", - "commit = " + "commit = get_git_devstr(sha=True)[:7]" ] }, { From 1d446c8acc80bf5b84a993f00285623ebb9935af Mon Sep 17 00:00:00 2001 From: Matt Craig Date: Tue, 14 Aug 2018 00:07:55 -0500 Subject: [PATCH 06/18] Provide fallback for calculating image size --- ccdproc/tests/run_for_memory_profile.py | 34 ++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/ccdproc/tests/run_for_memory_profile.py b/ccdproc/tests/run_for_memory_profile.py index bdd62220..b656c398 100644 --- a/ccdproc/tests/run_for_memory_profile.py +++ b/ccdproc/tests/run_for_memory_profile.py @@ -16,7 +16,39 @@ # the test suite sys.path.append(str(Path().cwd())) from ccdproc import combine, ImageFileCollection -from ccdproc.combiner import _calculate_size_of_image + +try: + from ccdproc.combiner import _calculate_size_of_image +except ImportError: + def _calculate_size_of_image(ccd, + combine_uncertainty_function): + # If uncertainty_func is given for combine this will create an uncertainty + # even if the originals did not have one. In that case we need to create + # an empty placeholder. + if ccd.uncertainty is None and combine_uncertainty_function is not None: + ccd.uncertainty = StdDevUncertainty(np.zeros(ccd.data.shape)) + + size_of_an_img = ccd.data.nbytes + try: + size_of_an_img += ccd.uncertainty.array.nbytes + # In case uncertainty is None it has no "array" and in case the "array" is + # not a numpy array: + except AttributeError: + pass + # Mask is enforced to be a numpy.array across astropy versions + if ccd.mask is not None: + size_of_an_img += ccd.mask.nbytes + # flags is not necessarily a numpy array so do not fail with an + # AttributeError in case something was set! + # TODO: Flags are not taken into account in Combiner. This number is added + # nevertheless for future compatibility. + try: + size_of_an_img += ccd.flags.nbytes + except AttributeError: + pass + + return size_of_an_img + # Do not combine these into one statement. When all references are lost # to a TemporaryDirectory the directory is automatically deleted. _TMPDIR From 6876de7c69ff82768d650567565647637f09497c Mon Sep 17 00:00:00 2001 From: Matt Craig Date: Tue, 14 Aug 2018 00:09:55 -0500 Subject: [PATCH 07/18] Add a memory use factor to account for overhead in combining --- ccdproc/combiner.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/ccdproc/combiner.py b/ccdproc/combiner.py index b9b96900..3e47a5ce 100644 --- a/ccdproc/combiner.py +++ b/ccdproc/combiner.py @@ -706,8 +706,14 @@ def combine(img_list, output_file=None, no_of_img = len(img_list) + # Set a memory use factor based on profiling + if method == 'median': + memory_factor = 3 + else: + memory_factor = 2 + # determine the number of chunks to split the images into - no_chunks = int((size_of_an_img * no_of_img) / mem_limit) + 1 + no_chunks = int((memory_factor * size_of_an_img * no_of_img) / mem_limit) + 1 if no_chunks > 1: log.info('splitting each image into {0} chunks to limit memory usage ' 'to {1} bytes.'.format(no_chunks, mem_limit)) From 8c04bb0b328e77dc4be566db8a14bb5a4b972d78 Mon Sep 17 00:00:00 2001 From: Matt Craig Date: Tue, 14 Aug 2018 00:20:24 -0500 Subject: [PATCH 08/18] Modify calculation of the memory factor --- ccdproc/combiner.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ccdproc/combiner.py b/ccdproc/combiner.py index 3e47a5ce..a5a9e095 100644 --- a/ccdproc/combiner.py +++ b/ccdproc/combiner.py @@ -712,6 +712,8 @@ def combine(img_list, output_file=None, else: memory_factor = 2 + memory_factor *= 1.5 + # determine the number of chunks to split the images into no_chunks = int((memory_factor * size_of_an_img * no_of_img) / mem_limit) + 1 if no_chunks > 1: From 4677436fbb556f10952bcb9de0ed1f92c74aa2e0 Mon Sep 17 00:00:00 2001 From: Matt Craig Date: Tue, 14 Aug 2018 12:27:56 -0500 Subject: [PATCH 09/18] Make a couple more improvements to plots in memory profile notebook --- ccdproc/tests/run_profile.ipynb | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/ccdproc/tests/run_profile.ipynb b/ccdproc/tests/run_profile.ipynb index 12ce780a..f70de9c5 100644 --- a/ccdproc/tests/run_profile.ipynb +++ b/ccdproc/tests/run_profile.ipynb @@ -34,12 +34,13 @@ "metadata": {}, "outputs": [], "source": [ - "image_size = 4000\n", + "image_size = 4000 # Square image, so 4000 x 4000\n", "num_files = 10\n", "sampling_interval = 0.01 # sec\n", - "memory_limit = 1000000000\n", + "memory_limit = 1000000000 # bytes, roughly 1GB\n", "\n", - "commit = get_git_devstr(sha=True)[:7]" + "commit = get_git_devstr(sha=True)[:7]\n", + "print(commit)" ] }, { @@ -172,11 +173,17 @@ "\n", "plt.vlines(-40 * sampling_interval, mem_use[0], mem_use[0] + memory_limit/1e6, colors='red', label='Memory use limit')\n", "plt.vlines(-20 * sampling_interval, mem_use[0], mem_use[0] + runs[method]['image_size']/1e6, label='size of one image')\n", + "\n", "plt.grid()\n", "clipped = 'ON' if runs[method]['clipping'] else 'OFF'\n", - "plt.title('ccdproc commit {}; {} repetitions per method; sigma_clip {}'.format(commit, n_repetitions, clipped))\n", - "plt.legend()\n", - "plt.savefig('commit_{}_reps_{}_clip_{}.png'.format(commit, n_repetitions, clipped))" + "\n", + "plt.title('ccdproc commit {}; {} repetitions per method; sigma_clip {}'.format(commit, n_repetitions, clipped),\n", + " fontsize=20)\n", + "plt.xlabel('Time (sec)', fontsize=20)\n", + "plt.ylabel('Memory use (MB)', fontsize=20)\n", + "\n", + "plt.legend(fontsize=20)\n", + "plt.savefig('commit_{}_reps_{}_clip_{}_memlim_{}GB.png'.format(commit, n_repetitions, clipped, memory_limit/1e9))" ] }, { @@ -217,11 +224,17 @@ "\n", "plt.vlines(-40 * sampling_interval, mem_use[0], mem_use[0] + memory_limit/1e6, colors='red', label='Memory use limit')\n", "plt.vlines(-20 * sampling_interval, mem_use[0], mem_use[0] + runs_clip[method]['image_size']/1e6, label='size of one image')\n", + "\n", "plt.grid()\n", "clipped = 'ON' if runs_clip[method]['clipping'] else 'OFF'\n", - "plt.title('ccdproc commit {}; {} repetitions per method; sigma_clip {}'.format(commit, n_repetitions, clipped))\n", - "plt.legend()\n", - "plt.savefig('commit_{}_reps_{}_clip_{}.png'.format(commit, n_repetitions, clipped))" + "\n", + "plt.title('ccdproc commit {}; {} repetitions per method; sigma_clip {}'.format(commit, n_repetitions, clipped),\n", + " fontsize=20)\n", + "plt.xlabel('Time (sec)', fontsize=20)\n", + "plt.ylabel('Memory use (MB)', fontsize=20)\n", + "\n", + "plt.legend(fontsize=20)\n", + "plt.savefig('commit_{}_reps_{}_clip_{}_memlim_{}GB.png'.format(commit, n_repetitions, clipped, memory_limit/1e9))" ] } ], From dca97330bf94866b59a968fa6dbee2b547ce396d Mon Sep 17 00:00:00 2001 From: Matt Craig Date: Tue, 14 Aug 2018 18:13:24 -0500 Subject: [PATCH 10/18] First draft of tests --- ccdproc/tests/run_for_memory_profile.py | 4 +- ccdproc/tests/test_memory_use.py | 52 +++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 2 deletions(-) create mode 100644 ccdproc/tests/test_memory_use.py diff --git a/ccdproc/tests/run_for_memory_profile.py b/ccdproc/tests/run_for_memory_profile.py index b656c398..67c34d1c 100644 --- a/ccdproc/tests/run_for_memory_profile.py +++ b/ccdproc/tests/run_for_memory_profile.py @@ -82,8 +82,8 @@ def generate_fits_files(n_images, size=None, seed=1523): hdu.writeto(path, overwrite=True) -def run_with_limit(n_files, sampling_interval, size=None, sigma_clip=False, - combine_method=None, memory_limit=None): +def run_memory_profile(n_files, sampling_interval, size=None, sigma_clip=False, + combine_method=None, memory_limit=None): """ Try opening a bunch of files with a relatively low limit on the number of open files. diff --git a/ccdproc/tests/test_memory_use.py b/ccdproc/tests/test_memory_use.py new file mode 100644 index 00000000..1d66874f --- /dev/null +++ b/ccdproc/tests/test_memory_use.py @@ -0,0 +1,52 @@ +# Licensed under a 3-clause BSD style license - see LICENSE.rst + +import numpy as np + +from ..combiner import Combiner, combine +from .run_for_memory_profile import run_memory_profile, generate_fits_files + + +@pytest.mark.parametrize('combine_method', + ['average', 'sum', 'median']) +def test_memory_use_in_combine(combine_method): + # This is essentially a regression test for + # https://github.com/astropy/ccdproc/issues/638 + # + # Parameters are taken from profiling notebook + image_size = 2000 # Square image, so 4000 x 4000 + num_files = 10 + sampling_interval = 0.01 # sec + memory_limit = 500000000 # bytes, roughly 0.5GB + + generate_fits_files(num_files, size=image_size) + + mem_use, _ = run_memory_profile(num_files, sampling_interval, + size=image_size, memory_limit=memory_limit, + combine_method=combine_method) + + # We do not expect memory use to be strictly less than memory_limit + # throughout the combination. The factor below allows for that. + # It may need to be raised in the future...that is fine, there is a + # separate test for average memory use. + overhead_allowance = 1.5 + + # memory_profile reports in MB (no, this is not the correct conversion) + memory_limit_mb = memory_limit / 1e6 + + # Checks for TOO MUCH MEMORY USED + + # Check peak memory use + assert np.max(mem_use) <= overhead_allowance * memory_limit_mb + + # Also check average, which gets no allowance + assert np.mean(mem_use) < memory_limit_mb + + # Checks for NOT ENOUGH MEMORY USED; if these fail it means that + # memory_factor in the combine function should perhaps be modified + + # If the peak is coming in under the limit something need to be fixed + assert np.max(mem_use) >= 0.95 * memory_limit_mb + + # If the average is really low perhaps we should look at reducing peak + # usage. Nothing special, really, about the factor 0.5 below. + assert np.mean(mem_use) > 0.5 * memory_limit_mb From 6d1262057559e6eda7573b412c657651ea1af661 Mon Sep 17 00:00:00 2001 From: Matthew Craig Date: Tue, 14 Aug 2018 18:40:11 -0500 Subject: [PATCH 11/18] Add memory_profiler to test requirements --- .travis.yml | 2 +- appveyor.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index f0808f14..508136ae 100644 --- a/.travis.yml +++ b/.travis.yml @@ -23,7 +23,7 @@ env: # to repeat them for all configurations. - NUMPY_VERSION=stable - ASTROPY_VERSION=stable - - CONDA_DEPENDENCIES='scipy reproject psutil cython astroscrappy scikit-image' + - CONDA_DEPENDENCIES='scipy reproject psutil cython astroscrappy scikit-image memory_profiler' - PIP_DEPENDENCIES='' - MAIN_CMD='python setup.py' - SETUP_CMD='test' diff --git a/appveyor.yml b/appveyor.yml index f11e6bc4..6b9ea9ca 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -10,7 +10,7 @@ environment: PYTHON_ARCH: "64" # needs to be set for CMD_IN_ENV to succeed. If a mix # of 32 bit and 64 bit builds are needed, move this # to the matrix section. - CONDA_DEPENDENCIES: "scipy reproject cython astroscrappy" + CONDA_DEPENDENCIES: "scipy reproject cython astroscrappy scikit-image memory_profiler" # Need the latest scikit-image (0.14.2 or higher), which is not in # anaconda yet. PIP_DEPENDENCIES: "scikit-image" From bd4a92845ee44a63821ed3c6843433712f716eae Mon Sep 17 00:00:00 2001 From: Matthew Craig Date: Tue, 14 Aug 2018 18:51:12 -0500 Subject: [PATCH 12/18] Only create test files once (and fix an import)(and try to close open files) --- ccdproc/tests/test_memory_use.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/ccdproc/tests/test_memory_use.py b/ccdproc/tests/test_memory_use.py index 1d66874f..5b6e87fb 100644 --- a/ccdproc/tests/test_memory_use.py +++ b/ccdproc/tests/test_memory_use.py @@ -2,8 +2,21 @@ import numpy as np -from ..combiner import Combiner, combine -from .run_for_memory_profile import run_memory_profile, generate_fits_files +import pytest + +from .run_for_memory_profile import run_memory_profile, generate_fits_files, TMPPATH + +image_size = 2000 # Square image, so 4000 x 4000 +num_files = 10 + + +def setup_module(): + generate_fits_files(num_files, size=image_size) + + +def teardown_module(): + for fil in TMPPATH.glob('*.fit'): + fil.unlink() @pytest.mark.parametrize('combine_method', @@ -12,14 +25,9 @@ def test_memory_use_in_combine(combine_method): # This is essentially a regression test for # https://github.com/astropy/ccdproc/issues/638 # - # Parameters are taken from profiling notebook - image_size = 2000 # Square image, so 4000 x 4000 - num_files = 10 sampling_interval = 0.01 # sec memory_limit = 500000000 # bytes, roughly 0.5GB - generate_fits_files(num_files, size=image_size) - mem_use, _ = run_memory_profile(num_files, sampling_interval, size=image_size, memory_limit=memory_limit, combine_method=combine_method) From 2f724d35908a3a4cb29df2b3133975eb57f0a255 Mon Sep 17 00:00:00 2001 From: Matt Craig Date: Fri, 17 Aug 2018 20:42:10 -0500 Subject: [PATCH 13/18] Wrap Path in str to fix breaking tests on some python versions --- ccdproc/tests/run_for_memory_profile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ccdproc/tests/run_for_memory_profile.py b/ccdproc/tests/run_for_memory_profile.py index 67c34d1c..c51512ec 100644 --- a/ccdproc/tests/run_for_memory_profile.py +++ b/ccdproc/tests/run_for_memory_profile.py @@ -117,7 +117,7 @@ def run_memory_profile(n_files, sampling_interval, size=None, sigma_clip=False, proc = psutil.Process() print('Process ID is: ', proc.pid, flush=True) - ic = ImageFileCollection(TMPPATH) + ic = ImageFileCollection(str(TMPPATH)) files = ic.files_filtered(for_prof='yes', include_path=True) kwargs = {'method': combine_method} From 8b25fb9307b37537ade7157ea91a1ff6b9239a64 Mon Sep 17 00:00:00 2001 From: Matt Craig Date: Fri, 17 Aug 2018 20:42:30 -0500 Subject: [PATCH 14/18] Fix error in sigma clip setup --- ccdproc/tests/run_for_memory_profile.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ccdproc/tests/run_for_memory_profile.py b/ccdproc/tests/run_for_memory_profile.py index c51512ec..20cbaecb 100644 --- a/ccdproc/tests/run_for_memory_profile.py +++ b/ccdproc/tests/run_for_memory_profile.py @@ -124,7 +124,8 @@ def run_memory_profile(n_files, sampling_interval, size=None, sigma_clip=False, if sigma_clip: kwargs.update( - {'sigma_clip_low_thresh': 5, + {'sigma_clip': True, + 'sigma_clip_low_thresh': 5, 'sigma_clip_high_thresh': 5, 'sigma_clip_func': np.ma.median, 'sigma_clip_dev_func': median_absolute_deviation} From b3b5bdc99c6eca787dc3e8df4e7b0a416f0687be Mon Sep 17 00:00:00 2001 From: Matt Craig Date: Fri, 17 Aug 2018 21:08:26 -0500 Subject: [PATCH 15/18] Raise memory overhead allowance in memory test because apparently numpy 1.13 can be a little more memory-intensive than others --- ccdproc/tests/test_memory_use.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ccdproc/tests/test_memory_use.py b/ccdproc/tests/test_memory_use.py index 5b6e87fb..140eda7d 100644 --- a/ccdproc/tests/test_memory_use.py +++ b/ccdproc/tests/test_memory_use.py @@ -36,7 +36,7 @@ def test_memory_use_in_combine(combine_method): # throughout the combination. The factor below allows for that. # It may need to be raised in the future...that is fine, there is a # separate test for average memory use. - overhead_allowance = 1.5 + overhead_allowance = 1.75 # memory_profile reports in MB (no, this is not the correct conversion) memory_limit_mb = memory_limit / 1e6 From 04867794ee362687f22a61803d855d25d63a1a66 Mon Sep 17 00:00:00 2001 From: Matthew Craig Date: Thu, 25 Jul 2019 11:15:20 -0400 Subject: [PATCH 16/18] Add changelog --- CHANGES.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index c4fcb862..4174aebd 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -26,6 +26,9 @@ Bug Fixes - Function ``combine`` avoids keeping files open unnecessarily. [#629, #630] +- Function ``combine`` more accurately estimates memory use + when deciding how to chunk files. [#638, #642] + 1.3.0 (2017-11-1) ----------------- From 5a3dc2062e8ae7019e05e380e344b1ec82af6a32 Mon Sep 17 00:00:00 2001 From: Matthew Craig Date: Thu, 25 Jul 2019 16:36:47 -0400 Subject: [PATCH 17/18] Skip appropriate tests if memory_profiler is not installed --- ccdproc/tests/test_memory_use.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/ccdproc/tests/test_memory_use.py b/ccdproc/tests/test_memory_use.py index 140eda7d..e0e3fb98 100644 --- a/ccdproc/tests/test_memory_use.py +++ b/ccdproc/tests/test_memory_use.py @@ -4,21 +4,30 @@ import pytest -from .run_for_memory_profile import run_memory_profile, generate_fits_files, TMPPATH +try: + from .run_for_memory_profile import run_memory_profile, generate_fits_files, TMPPATH +except ImportError: + memory_profile_present = False +else: + memory_profile_present = True image_size = 2000 # Square image, so 4000 x 4000 num_files = 10 def setup_module(): - generate_fits_files(num_files, size=image_size) + if memory_profile_present: + generate_fits_files(num_files, size=image_size) def teardown_module(): - for fil in TMPPATH.glob('*.fit'): - fil.unlink() + if memory_profile_present: + for fil in TMPPATH.glob('*.fit'): + fil.unlink() +@pytest.mark.skipif(not memory_profile_present, + reason='memory_profiler not installed') @pytest.mark.parametrize('combine_method', ['average', 'sum', 'median']) def test_memory_use_in_combine(combine_method): @@ -56,5 +65,5 @@ def test_memory_use_in_combine(combine_method): assert np.max(mem_use) >= 0.95 * memory_limit_mb # If the average is really low perhaps we should look at reducing peak - # usage. Nothing special, really, about the factor 0.5 below. - assert np.mean(mem_use) > 0.5 * memory_limit_mb + # usage. Nothing special, really, about the factor 0.4 below. + assert np.mean(mem_use) > 0.4 * memory_limit_mb From 848e73ddd8b663e39a7801eba59ed5ec554b7f7a Mon Sep 17 00:00:00 2001 From: Matthew Craig Date: Thu, 25 Jul 2019 16:37:14 -0400 Subject: [PATCH 18/18] Give a more helpful error message if memory_profiler is not installed --- ccdproc/tests/run_profile.ipynb | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/ccdproc/tests/run_profile.ipynb b/ccdproc/tests/run_profile.ipynb index f70de9c5..136a9468 100644 --- a/ccdproc/tests/run_profile.ipynb +++ b/ccdproc/tests/run_profile.ipynb @@ -13,7 +13,10 @@ "from matplotlib import pyplot as plt\n", "import numpy as np\n", "\n", - "from run_for_memory_profile import run_with_limit, generate_fits_files\n", + "try:\n", + " from run_for_memory_profile import run_memory_profile, generate_fits_files\n", + "except ImportError:\n", + " raise ImportError('Please install memory_profiler before running this notebook.')\n", "\n", "from ccdproc.version import get_git_devstr\n", "from astropy import __version__ as apy_version" @@ -93,8 +96,8 @@ "metadata": {}, "outputs": [], "source": [ - "_, _ = run_with_limit(num_files, sampling_interval, size=image_size, \n", - " memory_limit=memory_limit, combine_method='average')" + "_, _ = run_memory_profile(num_files, sampling_interval, size=image_size, \n", + " memory_limit=memory_limit, combine_method='average')" ] }, { @@ -122,9 +125,9 @@ "def run_them(runs, clipping=False):\n", " for combine_method in runs.keys():\n", " for _ in range(n_repetitions):\n", - " mem_use, img_size = run_with_limit(num_files, sampling_interval, size=image_size, \n", - " memory_limit=memory_limit, combine_method=combine_method,\n", - " sigma_clip=clipping)\n", + " mem_use, img_size = run_memory_profile(num_files, sampling_interval, size=image_size, \n", + " memory_limit=memory_limit, combine_method=combine_method,\n", + " sigma_clip=clipping)\n", " gc.collect()\n", " runs[combine_method]['times'].append(np.arange(len(mem_use)) * sampling_interval)\n", " runs[combine_method]['memory'].append(mem_use)\n", @@ -254,7 +257,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.6" + "version": "3.6.7" } }, "nbformat": 4,