在 Linux 服务器上运行 Django 项目

Question

Ivan Mukhin

Asked:2024-03-01 04:46:57 +0000 UTC2024-03-01 04:46:57 +0000 UTC 2024-03-01 04:46:57 +0000 UTC

Python 数据集迭代器中的内存泄漏

772

有必要对目录中的图像实现迭代器。

class DatasetReader(Reader):
    def __init__(self, log):
        super().__init__(log)

    @memory_profiler.profile
    def _get_arguments(self):
        self.dataset_name = self.args['DatasetName']
        self.dataset_path = self.args['DatasetPath']

        self.channel_swap = (np.asarray(ast.literal_eval(self.args['ChannelSwap']), dtype=np.float32)
                             if self.args['ChannelSwap'] is not None else [2, 1, 0])

        self.norm = (ast.literal_eval(self.args['Normalization'])
                     if self.args['Normalization'] is not None else False)

        self.layout = self.args['Layout']

        self.mean = (np.asarray(ast.literal_eval(self.args['Mean']), dtype=np.float32)
                     if self.args['Mean'] is not None else [0., 0., 0.])

        self.std = (np.asarray(ast.literal_eval(self.args['Std']), dtype=np.float32)
                    if self.args['Std'] is not None else [1., 1., 1.])

        self.image_size = ast.literal_eval(self.args['ImageSize'])
        self.dataset = list(Path(self.dataset_path).glob('*'))
        self.batch = int(self.args['BatchSize'])
        self.gg = Path(self.dataset_path).glob('*')
        self.max = len(self.dataset)
        self.transformer = Transformer(self.dict_for_transformer())

    def dict_for_transformer(self):
        dictionary = {
            'channel_swap': self.channel_swap,
            'mean': self.mean,
            'std': self.std,
            'norm': self.norm,
            'layout': self.layout,
        }
        return dictionary

    @memory_profiler.profile
    def _preprocess_image(self, image_path):
        image = cv2.imread(image_path)
        image_resize = cv2.resize(image, self.image_size)
        del image
        return image_resize

    def __iter__(self):
        self.n = 0
        return self

    #@memory_profiler.profile
    def __next__(self):
        if self.n <= self.max:
            res = []
            for _ in range(self.batch):
                self.n += 1
                image = self._preprocess_image(str(next(self.gg).absolute()))
                res.append(image)
            res = np.array(res)
            result = self.transformer.transform_images(res, None, np.float32, 'data')
            del image
            del res
            return result
        else:
            raise StopIteration

该对象被传递给外部框架函数。然而，每次迭代之后，内存消耗都会增加很多（del我插入它是为了尝试解决这个问题）。10 次迭代的示例。

Line #    Mem usage    Increment  Occurrences   Line Contents
=============================================================
    67    615.1 MiB    615.1 MiB           1       @memory_profiler.profile
    68                                             def _preprocess_image(self, image_path):
    69    618.5 MiB      3.4 MiB           1           image = cv2.imread(image_path)
    70    618.8 MiB      0.4 MiB           1           image_resize = cv2.resize(image, self.image_size)
    71    618.8 MiB      0.0 MiB           1           del image
    72    618.8 MiB      0.0 MiB           1           return image_resize


Filename: /home/vanya/projects/dl-benchmark/src/quantization/tvm/parameters.py

Line #    Mem usage    Increment  Occurrences   Line Contents
=============================================================
    67    882.0 MiB    882.0 MiB           1       @memory_profiler.profile
    68                                             def _preprocess_image(self, image_path):
    69    883.1 MiB      1.1 MiB           1           image = cv2.imread(image_path)
    70    883.1 MiB      0.0 MiB           1           image_resize = cv2.resize(image, self.image_size)
    71    883.1 MiB      0.0 MiB           1           del image
    72    883.1 MiB      0.0 MiB           1           return image_resize


Filename: /home/vanya/projects/dl-benchmark/src/quantization/tvm/parameters.py

Line #    Mem usage    Increment  Occurrences   Line Contents
=============================================================
    67   1001.2 MiB   1001.2 MiB           1       @memory_profiler.profile
    68                                             def _preprocess_image(self, image_path):
    69   1002.1 MiB      0.9 MiB           1           image = cv2.imread(image_path)
    70   1002.1 MiB      0.0 MiB           1           image_resize = cv2.resize(image, self.image_size)
    71   1002.1 MiB      0.0 MiB           1           del image
    72   1002.1 MiB      0.0 MiB           1           return image_resize


Filename: /home/vanya/projects/dl-benchmark/src/quantization/tvm/parameters.py

Line #    Mem usage    Increment  Occurrences   Line Contents
=============================================================
    67   1121.3 MiB   1121.3 MiB           1       @memory_profiler.profile
    68                                             def _preprocess_image(self, image_path):
    69   1122.5 MiB      1.1 MiB           1           image = cv2.imread(image_path)
    70   1122.5 MiB      0.0 MiB           1           image_resize = cv2.resize(image, self.image_size)
    71   1122.5 MiB      0.0 MiB           1           del image
    72   1122.5 MiB      0.0 MiB           1           return image_resize


Filename: /home/vanya/projects/dl-benchmark/src/quantization/tvm/parameters.py

Line #    Mem usage    Increment  Occurrences   Line Contents
=============================================================
    67   1241.6 MiB   1241.6 MiB           1       @memory_profiler.profile
    68                                             def _preprocess_image(self, image_path):
    69   1243.7 MiB      2.1 MiB           1           image = cv2.imread(image_path)
    70   1243.7 MiB      0.0 MiB           1           image_resize = cv2.resize(image, self.image_size)
    71   1243.7 MiB      0.0 MiB           1           del image
    72   1243.7 MiB      0.0 MiB           1           return image_resize


Filename: /home/vanya/projects/dl-benchmark/src/quantization/tvm/parameters.py

Line #    Mem usage    Increment  Occurrences   Line Contents
=============================================================
    67   1361.7 MiB   1361.7 MiB           1       @memory_profiler.profile
    68                                             def _preprocess_image(self, image_path):
    69   1362.7 MiB      1.0 MiB           1           image = cv2.imread(image_path)
    70   1362.7 MiB      0.0 MiB           1           image_resize = cv2.resize(image, self.image_size)
    71   1362.7 MiB      0.0 MiB           1           del image
    72   1362.7 MiB      0.0 MiB           1           return image_resize


Filename: /home/vanya/projects/dl-benchmark/src/quantization/tvm/parameters.py

Line #    Mem usage    Increment  Occurrences   Line Contents
=============================================================
    67   1482.0 MiB   1482.0 MiB           1       @memory_profiler.profile
    68                                             def _preprocess_image(self, image_path):
    69   1482.3 MiB      0.4 MiB           1           image = cv2.imread(image_path)
    70   1482.3 MiB      0.0 MiB           1           image_resize = cv2.resize(image, self.image_size)
    71   1482.3 MiB      0.0 MiB           1           del image
    72   1482.3 MiB      0.0 MiB           1           return image_resize


Filename: /home/vanya/projects/dl-benchmark/src/quantization/tvm/parameters.py

Line #    Mem usage    Increment  Occurrences   Line Contents
=============================================================
    67   1602.2 MiB   1602.2 MiB           1       @memory_profiler.profile
    68                                             def _preprocess_image(self, image_path):
    69   1603.2 MiB      1.0 MiB           1           image = cv2.imread(image_path)
    70   1603.2 MiB      0.0 MiB           1           image_resize = cv2.resize(image, self.image_size)
    71   1603.2 MiB      0.0 MiB           1           del image
    72   1603.2 MiB      0.0 MiB           1           return image_resize


Filename: /home/vanya/projects/dl-benchmark/src/quantization/tvm/parameters.py

Line #    Mem usage    Increment  Occurrences   Line Contents
=============================================================
    67   1722.3 MiB   1722.3 MiB           1       @memory_profiler.profile
    68                                             def _preprocess_image(self, image_path):
    69   1723.5 MiB      1.1 MiB           1           image = cv2.imread(image_path)
    70   1723.5 MiB      0.0 MiB           1           image_resize = cv2.resize(image, self.image_size)
    71   1723.5 MiB      0.0 MiB           1           del image
    72   1723.5 MiB      0.0 MiB           1           return image_resize


Filename: /home/vanya/projects/dl-benchmark/src/quantization/tvm/parameters.py

Line #    Mem usage    Increment  Occurrences   Line Contents
=============================================================
    67   1842.6 MiB   1842.6 MiB           1       @memory_profiler.profile
    68                                             def _preprocess_image(self, image_path):
    69   1843.7 MiB      1.1 MiB           1           image = cv2.imread(image_path)
    70   1843.7 MiB      0.0 MiB           1           image_resize = cv2.resize(image, self.image_size)
    71   1843.7 MiB      0.0 MiB           1           del image
    72   1843.7 MiB      0.0 MiB           1           return image_resize


Filename: /home/vanya/projects/dl-benchmark/src/quantization/tvm/parameters.py

Line #    Mem usage    Increment  Occurrences   Line Contents
=============================================================
    67   1962.8 MiB   1962.8 MiB           1       @memory_profiler.profile
    68                                             def _preprocess_image(self, image_path):
    69   1964.3 MiB      1.5 MiB           1           image = cv2.imread(image_path)
    70   1964.3 MiB      0.0 MiB           1           image_resize = cv2.resize(image, self.image_size)
    71   1964.3 MiB      0.0 MiB           1           del image
    72   1964.3 MiB      0.0 MiB           1           return image_resize

这个实现可能存在什么问题？需要迭代 32 个图像，结果导致 RAM 出现故障。变压器代码：

class Transformer:
    def __init__(self, converting):
        self._converting = converting

    def get_shape_in_chw_order(self, shape, input_name):
        layout = self._converting['layout']
        sort = np.argsort(LAYER_LAYOUT_TO_IMAGE[layout])
        shape = np.array(shape)[sort]
        chw = shape[1:]
        if len(shape) in [4, 5]:
            chw = shape[-1], shape[-3], shape[-2]
        return chw

    def __set_channel_swap(self, image, input_name):
        channel_swap = self._converting['channel_swap']
        if channel_swap is not None:
            image = image[:, :, :, channel_swap]

    def __set_norm(self, image, input_name):
        image /= [np.float32(255), np.float32(255), np.float32(255)]

    def __set_mean(self, image, input_name):
        mean = self._converting['mean']
        if mean is not None:
            image -= mean

    def __set_input_scale(self, image, input_name):
        input_scale = self._converting['std']
        if input_scale is not None:
            image /= input_scale

    def __set_layout_order(self, image, input_name):
        layout = self._converting['layout']
        if layout is not None:
            layout = LAYER_LAYOUT_TO_IMAGE[layout]
            image = image.transpose(layout)
        return image

    def _transform(self, image, input_name):
        transformed_image = np.copy(image).astype(np.float64)
        self.__set_channel_swap(transformed_image, input_name)
        if self._converting['norm']:
            self.__set_norm(transformed_image, input_name)
        self.__set_mean(transformed_image, input_name)
        self.__set_input_scale(transformed_image, input_name)
        transformed_image = self.__set_layout_order(transformed_image, input_name)
        return transformed_image

    def transform_images(self, images, shape, element_type, input_name):
        transformed_images = self._transform(images, input_name)
        return transformed_images.astype(element_type)

基于迭代器创建生成器并将其发送到外部框架的代码（函数 quantization_tvm、calibrate_dataset - 生成器）：

class QuantizationProcess:
    def __init__(self, log, model, dataset, quant_params):
        self.log = log
        self.quant_model = None
        self.model = model
        self.dataset = dataset
        self.quant_params = quant_params

    @memory_profiler.profile
    def calibrate_dataset(self):
        for i, data in enumerate(self.dataset):
            if i * self.dataset.batch >= self.quant_params.calib_samples:
                break
            yield {'data': data}

    @memory_profiler.profile
    def quantization_tvm(self):
        self.log.info(f'Starting quantization with calibration mode {self.quant_params.calib_mode}')
        if self.quant_params.calib_mode.lower() == 'kl_divergence':
            with tvm.relay.quantize.qconfig(calibrate_mode=self.quant_params.calib_mode.lower(),
                                            weight_scale=self.quant_params.weights_scale.lower(),
                                            dtype_input='int32', dtype_weight='int32', dtype_activation='int32',
                                            partition_conversions='enabled'):

                self.quant_model = tvm.relay.quantize.quantize(self.model.mod,
                                                               self.model.params,
                                                               dataset=self.calibrate_dataset())

        elif self.quant_params.calib_mode.lower() == 'global_scale':
            with tvm.relay.quantize.qconfig(calibrate_mode=self.quant_params.calib_mode.lower(),
                                            global_scale=self.quant_params.global_scale,
                                            dtype_input='int32', dtype_weight='int32', dtype_activation='int32'):

                self.quant_model = tvm.relay.quantize.quantize(self.model.mod,
                                                               self.model.params)

        else:
            raise ValueError('Wrong calibration mode parameter.'
                             'Supported modes: kl_divergence, global_scale')

https://github.com/apache/tvm/blob/main/python/tvm/relay/quantize/_calibrate.py中的 _kl_scale 函数迭代数据集

1 个回答

Voted

Ivan Mukhin · Answer 1 · 2024-03-06T22:03:57Z

经过一番研究（简单地在循环中运行迭代器），结果发现迭代器本身与它无关。内存在外部框架内部流动。如果将上面发布的内存成本与常规循环内的内存成本进行比较，您可以看到迭代的内存成本不会增加，但会处理目录中的所有图像。

Line #    Mem usage    Increment  Occurrences   Line Contents
=============================================================
    66    284.9 MiB    284.9 MiB           1       @profile
    67                                             def _preprocess_image(self, image_path):
    68    286.2 MiB      1.2 MiB           1           image = cv2.imread(image_path)
    69    286.5 MiB      0.4 MiB           1           image_resize = cv2.resize(image, self.image_size)
    70    286.5 MiB      0.0 MiB           1           return image_resize


Filename: /home/vanya/projects/dl-benchmark/src/quantization/tvm/parameters.py

Line #    Mem usage    Increment  Occurrences   Line Contents
=============================================================
    76    284.9 MiB    284.9 MiB           1       @profile
    77                                             def __next__(self):
    78    284.9 MiB      0.0 MiB           1           if self.n <= self.max:
    79    284.9 MiB      0.0 MiB           1               res = []
    80    286.5 MiB      0.0 MiB           2               for _ in range(self.batch):
    81    284.9 MiB      0.0 MiB           1                   self.n += 1
    82    286.5 MiB      1.6 MiB           1                   image = self._preprocess_image(str(next(self.gg).absolute()))
    83    286.5 MiB      0.0 MiB           1                   res.append(image)
    84    286.5 MiB      0.0 MiB           1               res = np.array(res)
    85    286.7 MiB      0.1 MiB           1               result = self.transformer.transform_images(res, None, np.float32, 'data')
    86    286.7 MiB      0.0 MiB           1               return result
    87                                                 else:
    88                                                     raise StopIteration


(1, 224, 224, 3)
Filename: /home/vanya/projects/dl-benchmark/src/quantization/tvm/parameters.py

Line #    Mem usage    Increment  Occurrences   Line Contents
=============================================================
    66    286.7 MiB    286.7 MiB           1       @profile
    67                                             def _preprocess_image(self, image_path):
    68    286.7 MiB      0.0 MiB           1           image = cv2.imread(image_path)
    69    286.7 MiB      0.0 MiB           1           image_resize = cv2.resize(image, self.image_size)
    70    286.7 MiB      0.0 MiB           1           return image_resize


Filename: /home/vanya/projects/dl-benchmark/src/quantization/tvm/parameters.py

Line #    Mem usage    Increment  Occurrences   Line Contents
=============================================================
    76    286.7 MiB    286.7 MiB           1       @profile
    77                                             def __next__(self):
    78    286.7 MiB      0.0 MiB           1           if self.n <= self.max:
    79    286.7 MiB      0.0 MiB           1               res = []
    80    286.7 MiB      0.0 MiB           2               for _ in range(self.batch):
    81    286.7 MiB      0.0 MiB           1                   self.n += 1
    82    286.7 MiB      0.0 MiB           1                   image = self._preprocess_image(str(next(self.gg).absolute()))
    83    286.7 MiB      0.0 MiB           1                   res.append(image)
    84    286.7 MiB      0.0 MiB           1               res = np.array(res)
    85    286.7 MiB      0.0 MiB           1               result = self.transformer.transform_images(res, None, np.float32, 'data')
    86    286.7 MiB      0.0 MiB           1               return result
    87                                                 else:
    88                                                     raise StopIteration


(1, 224, 224, 3)
Filename: /home/vanya/projects/dl-benchmark/src/quantization/tvm/parameters.py

Line #    Mem usage    Increment  Occurrences   Line Contents
=============================================================
    66    286.7 MiB    286.7 MiB           1       @profile
    67                                             def _preprocess_image(self, image_path):
    68    286.7 MiB      0.0 MiB           1           image = cv2.imread(image_path)
    69    286.7 MiB      0.0 MiB           1           image_resize = cv2.resize(image, self.image_size)
    70    286.7 MiB      0.0 MiB           1           return image_resize


Filename: /home/vanya/projects/dl-benchmark/src/quantization/tvm/parameters.py

Line #    Mem usage    Increment  Occurrences   Line Contents
=============================================================
    76    286.7 MiB    286.7 MiB           1       @profile
    77                                             def __next__(self):
    78    286.7 MiB      0.0 MiB           1           if self.n <= self.max:
    79    286.7 MiB      0.0 MiB           1               res = []
    80    286.7 MiB      0.0 MiB           2               for _ in range(self.batch):
    81    286.7 MiB      0.0 MiB           1                   self.n += 1
    82    286.7 MiB      0.0 MiB           1                   image = self._preprocess_image(str(next(self.gg).absolute()))
    83    286.7 MiB      0.0 MiB           1                   res.append(image)
    84    286.7 MiB      0.0 MiB           1               res = np.array(res)
    85    286.7 MiB      0.0 MiB           1               result = self.transformer.transform_images(res, None, np.float32, 'data')
    86    286.7 MiB      0.0 MiB           1               return result
    87                                                 else:
    88                                                     raise StopIteration


(1, 224, 224, 3)
Filename: /home/vanya/projects/dl-benchmark/src/quantization/tvm/parameters.py

Line #    Mem usage    Increment  Occurrences   Line Contents
=============================================================
    66    286.7 MiB    286.7 MiB           1       @profile
    67                                             def _preprocess_image(self, image_path):
    68    286.7 MiB      0.0 MiB           1           image = cv2.imread(image_path)
    69    286.7 MiB      0.0 MiB           1           image_resize = cv2.resize(image, self.image_size)
    70    286.7 MiB      0.0 MiB           1           return image_resize


Filename: /home/vanya/projects/dl-benchmark/src/quantization/tvm/parameters.py

Python 数据集迭代器中的内存泄漏

我看不懂措辞

请求的模块“del”不提供名为“default”的导出

"!+tab" 在 HTML 的 vs 代码中不起作用

我正在尝试解决“猜词”的问题。Python

可以使用哪些命令将当前指针移动到指定的提交而不更改工作目录中的文件？

Python解析野莓

问题：“警告：检查最新版本的 pip 时出错。”

帮助编写一个用值填充变量的循环。解决这个问题

尽管依赖数组为空，但在渲染上调用了 2 次 useEffect

数据不通过 Telegram.WebApp.sendData 发送

Python 数据集迭代器中的内存泄漏

1 个回答

相关问题