microproduct/tool/algorithm/block/blockprocess.py

# -*- coding: UTF-8 -*-
"""
@Project : microproduct
@File : blockprocess.py
@Function : tif、tiff图像分块处理拼接功能
@Contact : https://blog.csdn.net/qq_38308388/article/details/102978755
@Author:SHJ
@Date:2021/9/6
@Version:1.0.0
"""
import glob
import zipfile

from osgeo import osr, gdal
import numpy as np
import os
from PIL import Image
# import time
# from skimage import io
from tool.algorithm.image.ImageHandle import ImageHandler

class BlockProcess:
    def __init__(self):
        pass

    @staticmethod
    def get_block_size(rows, cols):
        block_size = 1024
        if rows <= 2048 or cols <= 2048:
            block_size = 512
        if (rows // 1024) <= 5 or (cols // 1024) <= 5:
            block_size = 1024
        elif 5 < (rows // 1024) and 5 < (cols // 1024):
            block_size = 2048

        return block_size

    # def get_block_size(rows, cols, block_size_config):
    #     block_size = 512 if block_size_config < 512 else block_size_config
    #     if rows > 2048 and cols > 2048:
    #         block_size = block_size_config
    #     return block_size

    @staticmethod
    def get_suffix(path_name):
        name = path_name
        suffix = '_' + name.split('_')[-4] + '_' + name.split('_')[-3] + '_' + name.split('_')[-2] + '_' + \
                 name.split('_')[-1]
        return suffix

    @staticmethod
    def get_file_names(data_dir, file_type=['tif', 'tiff']):
        """
        获取data_dir文件夹下file_type类型的文件路径
        """
        result_dir = []
        result_name = []
        for maindir, subdir, file_name_list in os.walk(data_dir):
            for filename in file_name_list:
                apath = os.path.join(maindir, filename)
                ext = apath.split('.')[-1]
                if ext in file_type:
                    result_dir.append(apath)
                    result_name.append(filename)
                else:
                    pass
        return result_dir, result_name

    @staticmethod
    def unzip_file(zip_file_path, out_path):  # ):
        # 获取压缩文件所在的目录
        # extract_folder = os.path.dirname(zip_file_path)
        if zip_file_path.endswith("dem.zip"):
            out_path_merg = os.path.join(out_path)
            # os.mkdir(out_path_merg)

            para_value_list = zip_file_path.split(";")
            for n in para_value_list:
                with zipfile.ZipFile(n, 'r') as zip_ref:
                    # 解压到和压缩文件同名的文件夹中
                    zip_ref.extractall(out_path_merg)

            return out_path

        else:
            basename = os.path.splitext(os.path.basename(zip_file_path))[0]
            extract_folder = os.path.join(out_path, basename)

            with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
                # 解压到和压缩文件同名的文件夹中
                zip_ref.extractall(extract_folder)

            files = list(glob.glob(os.path.join(extract_folder, '*')))
            for file in files:
                if basename in os.path.basename(file):
                    if not file.endswith(".xml"):
                        unzipped_folder_path = file

            return unzipped_folder_path

    @staticmethod
    def get_same_img(img_dir, img_name):
        """
        在img_dir路径下，用img_name的子图像路径集合，将集合以字典输出
        """
        result = {}
        for idx, name in enumerate(img_name):
            temp_name = ''
            for idx2, item in enumerate(name.split('_')[:-4]):
                if idx2 == 0:
                    temp_name = temp_name + item
                else:
                    temp_name = temp_name + '_' + item

            if temp_name in result:
                result[temp_name].append(img_dir[idx])
            else:
                result[temp_name] = []
                result[temp_name].append(img_dir[idx])
        return result

    @staticmethod
    def assign_spatial_reference_byfile(src_path, dst_path):
        """
        将src_path的地理信息，输入到dst_path图像中
        """
        src_ds = gdal.Open(src_path, gdal.GA_ReadOnly)
        if src_ds is None:
            return False
        sr = osr.SpatialReference()
        sr.ImportFromWkt(src_ds.GetProjectionRef())
        geo_transform = src_ds.GetGeoTransform()

        dst_ds = gdal.Open(dst_path, gdal.GA_Update)
        if dst_ds is None:
            return False
        dst_ds.SetProjection(sr.ExportToWkt())
        dst_ds.SetGeoTransform(geo_transform)
        del dst_ds
        del src_ds
        return True

    @staticmethod
    def assign_spatial_reference_bypoint(row_begin, col_begin, src_proj, src_geo, img_path):
        """
        将src_path的地理信息，输入到dst_path图像中
        """

        sr = osr.SpatialReference()
        sr.ImportFromWkt(src_proj)
        geo_transform = src_geo
        geo_transform[0] = src_geo[0] + col_begin * src_geo[1] + row_begin * src_geo[2]
        geo_transform[3] = src_geo[3] + col_begin * src_geo[4] + row_begin * src_geo[5]
        dst_ds = gdal.Open(img_path, gdal.GA_Update)
        if dst_ds is None:
            return False
        dst_ds.SetProjection(sr.ExportToWkt())
        dst_ds.SetGeoTransform(geo_transform)
        del dst_ds
        return True

    @staticmethod
    def __get_band_array(filename, num):
        """
        :param filename: tif路径
        :param num: 波段序号
        :return: 对应波段的矩阵数据
        """
        gdal.AllRegister()
        dataset = gdal.Open(filename)
        if dataset is None:
            return None
        bands = dataset.GetRasterBand(num)
        array = bands.ReadAsArray(0, 0, bands.XSize, bands.YSize)
        del dataset
        return array

    @staticmethod
    def get_data(filename):
        """
        :param filename: tif路径
        :return: 获取所有波段的数据
        """
        gdal.AllRegister()
        dataset = gdal.Open(filename)
        if dataset is None:
            return None
        im_width = dataset.RasterXSize
        im_height = dataset.RasterYSize
        im_data = dataset.ReadAsArray(0, 0, im_width, im_height)
        del dataset
        return im_data

    def get_tif_dtype(self, filename):
        """
        :param filename: tif路径
        :return: tif数据类型
        """
        image = self.__get_band_array(filename, 1)
        return image.dtype.name

    def cut(self, in_dir, out_dir, file_type=['tif', 'tiff'], out_type='tif', out_size=2048):
        """
        :param in_dir:存放待裁剪的影像文件夹，不用指定到tif文件
        :param out_dir:存放裁剪结果的影像文件夹
        :param file_type:待裁剪的影像文件类型tif、tiff、bmp、jpg、png等等
        :param out_type:裁剪结果影像文件类型
        :param out_size:裁剪尺寸，裁剪为n*n的方形
        :return: True or Flase
        """
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)
        data_dir_list, _ = self.get_file_names(in_dir, file_type)
        count = 0

        for each_dir in data_dir_list:

            name_suffix = os.path.basename(each_dir)
            img_name = os.path.splitext(name_suffix)[0]

            # gdal读取方法
            image = self.__get_band_array(each_dir, 1)

            cut_factor_row = int(np.ceil(image.shape[0] / out_size))
            cut_factor_clo = int(np.ceil(image.shape[1] / out_size))
            for i in range(cut_factor_row):
                for j in range(cut_factor_clo):

                    if i == cut_factor_row - 1:
                        i = image.shape[0] / out_size - 1
                    else:
                        pass

                        if j == cut_factor_clo - 1:
                            j = image.shape[1] / out_size - 1
                        else:
                            pass

                    start_x = int(np.rint(i * out_size))
                    start_y = int(np.rint(j * out_size))
                    end_x = int(np.rint((i + 1) * out_size))
                    end_y = int(np.rint((j + 1) * out_size))
                    out_dir_images = os.path.join(out_dir, img_name + '_' + str(start_x) + '_' + str(end_x) + '_' + str(start_y) + '_' + str(
                        end_y) + '.' + out_type)
                        #              + '/' + img_name \
                        #              + '_' + str(start_x) + '_' + str(end_x) + '_' + str(start_y) + '_' + str(
                        # end_y) + '.' + out_type

                    # temp_image = image[start_x:end_x, start_y:end_y]
                    # out_image = Image.fromarray(temp_data)
                    # out_image = Image.fromarray(temp_image)
                    # out_image.save(out_dir_images)

                    data = ImageHandler.get_data(each_dir)
                    if ImageHandler.get_bands(each_dir) > 1:
                        temp_data = data[:,start_x:end_x, start_y:end_y]
                    else:
                        temp_data = data[start_x:end_x, start_y:end_y]
                    ImageHandler.write_img(out_dir_images, '', [0, 0, 0, 0, 0, 0], temp_data)
            count += 1
        return True

    def cut_new(self, in_dir, out_dir, file_type=['tif', 'tiff'], out_type='tif', out_size=2048):
        """
        :param in_dir:存放待裁剪的影像文件夹，不用指定到tif文件
        :param out_dir:存放裁剪结果的影像文件夹
        :param file_type:待裁剪的影像文件类型tif、tiff、bmp、jpg、png等等
        :param out_type:裁剪结果影像文件类型
        :param out_size:裁剪尺寸，裁剪为n*n的方形
        :return: True or Flase
        20230831修改 ----tjx
        """
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)
        data_dir_list, _ = self.get_file_names(in_dir, file_type)
        count = 0

        for each_dir in data_dir_list:

            name_suffix = os.path.basename(each_dir)
            img_name = os.path.splitext(name_suffix)[0]

            # gdal读取方法
            image = self.__get_band_array(each_dir, 1)

            block_x = int(np.ceil(image.shape[1] / out_size))
            block_y = int(np.ceil(image.shape[0] / out_size))           # todo 修改分块
            for i in range(block_y):
                for j in range(block_x):
                    start_x = j * out_size
                    start_y = i * out_size
                    end_x = image.shape[1] if (j + 1) * out_size > image.shape[1] else (j + 1) * out_size
                    end_y = image.shape[0] if (i + 1) * out_size > image.shape[0] else (i + 1) * out_size

                    out_dir_images = os.path.join(out_dir, img_name + '_' + str(start_x) + '_' + str(end_x) + '_' + str(start_y) + '_' + str(
                        end_y) + '.' + out_type)
                    # print(out_dir_images)

                    data = ImageHandler.get_data(each_dir)
                    if ImageHandler.get_bands(each_dir) > 1:
                        # temp_data = data[:,start_x:end_x, start_y:end_y]
                        temp_data = data[:,start_y:end_y, start_x:end_x]
                    else:
                        # temp_data = data[start_x:end_x, start_y:end_y]
                        temp_data = data[start_y:end_y, start_x:end_x]
                    ImageHandler.write_img(out_dir_images, '', [0, 0, 0, 0, 0, 0], temp_data)
            count += 1
        return True

    def combine(self, data_dir, w, h, out_dir, out_type='tif', file_type=['tif', 'tiff'], datetype='float16'):
        """
        :param data_dir: 存放待裁剪的影像文件夹，不用指定到tif文件
        :param  w  拼接影像的宽度，
        :param  h  拼接影像的高度
        :param out_dir: 存放裁剪结果的影像文件夹
        :param out_type: 裁剪结果影像文件类型
        :param file_type: 待裁剪的影像文件类型
        :param datetype:数据类型 int8，int16，float16，float32 等
        :return: True or Flase
        """
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)
        img_dir, img_name = self.get_file_names(data_dir, file_type)

        dir_dict = self.get_same_img(img_dir, img_name)
        count = 0
        for key in dir_dict.keys():
            temp_label = np.zeros(shape=(h, w), dtype=datetype)
            dir_list = dir_dict[key]
            for item in dir_list:
                name_split = item.split('_')
                x_start = int(name_split[-4])
                x_end = int(name_split[-3])
                y_start = int(name_split[-2])
                y_end = int(name_split[-1].split('.')[0])
                # img = Image.open(item)
                img = ImageHandler.get_band_array(item, 1)
                img = np.array(img)

                temp_label[x_start:x_end, y_start:y_end] = img

            img_name = key + '.' + out_type
            new_out_dir = os.path.join(out_dir, img_name)
            ImageHandler.write_img(new_out_dir, '', [0, 0, 0, 0, 0, 0], temp_label)
            # label = Image.fromarray(temp_label)
            # label.save(new_out_dir)

            count += 1
        return True

    # todo 20230901 修改分块同步修改合并代码
    def combine_new(self, data_dir, w, h, out_dir, out_type='tif', file_type=['tif', 'tiff'], datetype='float16'):
        """
        :param data_dir: 存放待裁剪的影像文件夹，不用指定到tif文件
        :param  w  拼接影像的宽度，
        :param  h  拼接影像的高度
        :param out_dir: 存放裁剪结果的影像文件夹
        :param out_type: 裁剪结果影像文件类型
        :param file_type: 待裁剪的影像文件类型
        :param datetype:数据类型 int8，int16，float16，float32 等
        :return: True or Flase
        """
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)
        img_dir, img_name = self.get_file_names(data_dir, file_type)

        dir_dict = self.get_same_img(img_dir, img_name)
        count = 0
        for key in dir_dict.keys():
            dir_list = dir_dict[key]
            bands = ImageHandler.get_bands(dir_list[0])
            if bands > 1:
                temp_label = np.zeros(shape=(bands, h, w), dtype=datetype)
                for item in dir_list:
                    name_split = item.split('_')
                    x_start = int(name_split[-4])
                    x_end = int(name_split[-3])
                    y_start = int(name_split[-2])
                    y_end = int(name_split[-1].split('.')[0])
                    # img = Image.open(item)
                    img = ImageHandler.get_band_array(item, 1)
                    img = np.array(img)

                    temp_label[:, y_start:y_end, x_start:x_end] = img

                img_name = key + '.' + out_type
                new_out_dir = os.path.join(out_dir, img_name)
                ImageHandler.write_img(new_out_dir, '', [0, 0, 0, 0, 0, 0], temp_label)
                # label = Image.fromarray(temp_label)
                # label.save(new_out_dir)
                count += 1
            else:
                temp_label = np.zeros(shape=(h, w), dtype=datetype)
                for item in dir_list:
                    name_split = item.split('_')
                    x_start = int(name_split[-4])
                    x_end = int(name_split[-3])
                    y_start = int(name_split[-2])
                    y_end = int(name_split[-1].split('.')[0])
                    # img = Image.open(item)
                    img = ImageHandler.get_band_array(item, 1)
                    img = np.array(img)

                    temp_label[y_start:y_end, x_start:x_end] = img

                img_name = key + '.' + out_type
                new_out_dir = os.path.join(out_dir, img_name)
                ImageHandler.write_img(new_out_dir, '', [0, 0, 0, 0, 0, 0], temp_label)
                # label = Image.fromarray(temp_label)
                # label.save(new_out_dir)

                count += 1
        return True

    def combine_Tif(self, data_dir, w, h, out_dir, proj, geo, out_type='tif', file_type=['tif', 'tiff'],
                datetype='float16'):
        """
        将文件夹下的tif拼接成一个大的tif
        :param data_dir: 存放待裁剪的影像文件夹，不用指定到tif文件
        :param  w  拼接影像的宽度，
        :param  h  拼接影像的高度
        :param out_dir: 存放裁剪结果的影像文件夹
        :param proj: 指定投影系
        :param geo: 指定变换参数
        :param out_type: 裁剪结果影像文件类型
        :param file_type: 待裁剪的影像文件类型
        :param datetype:数据类型 int8，int16，float16，float32 等
        :return: True or Flase
        """
        image_handler = ImageHandler()
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)
        img_dir, img_name = self.get_file_names(data_dir, file_type)

        dir_dict = self.get_same_img(img_dir, img_name)
        count = 0
        for key in dir_dict.keys():
            temp_label = np.zeros(shape=(h, w), dtype=datetype)
            dir_list = dir_dict[key]
            for item in dir_list:
                name_split = item.split('_')
                x_start = int(name_split[-4])
                x_end = int(name_split[-3])
                y_start = int(name_split[-2])
                y_end = int(name_split[-1].split('.')[0])
                img = image_handler.get_data(item)
                temp_label[x_start:x_end, y_start:y_end] = img

            img_name = key + '.' + out_type
            new_out_dir = os.path.join(out_dir,img_name)
            image_handler.write_img(new_out_dir, proj, geo, temp_label)
            count += 1
        return True

if __name__ == '__main__':
    bp = BlockProcess()
#     # # cut
#     data_dir = r"D:\micro\WorkSpace\LandCover\Temporary\processing\feature_tif\cut"
#     out_dir = r"D:\micro\WorkSpace\LandCover\Temporary\processing\feature_tif\combine"
#     file_type = ['tif']
#     out_type = 'tif'
#     cut_size = 1024
# #
#     bp.cut_new(data_dir, out_dir, file_type, out_type, cut_size)
#     # # combine
    data_dir=r"D:\micro\WorkSpace\VegetationPhenology\Temporary\processing\GF3C_MYC_QPSI_006270_E100.4_N27.0_20230615_L1A_AHV_L10000158764-ortho\features_geo\feature"
    w= 9548
    h= 9613
    out_dirs=r"D:\micro\WorkSpace\VegetationPhenology\Temporary\processing\GF3C_MYC_QPSI_006270_E100.4_N27.0_20230615_L1A_AHV_L10000158764-ortho\features_geo"
    out_type='tif'
    file_type=['tif']
    datetype = 'float'
    # src_path = r"D:\Workspace\SoilMoisture\Temporary\preprocessed\HH_preprocessed.tif"
    # datetype = bp.get_tif_dtype(src_path)
    bp.combine(data_dir, w, h, out_dirs, out_type, file_type, datetype)

    #
    # # 添加地理信息
    # new_out_dir =r"D:\DATA\testdata1\combine\TEST_20200429_NDVI.tif"
    # bp.assign_spatial_reference_byfile(src_path, new_out_dir)

    # fn = r'D:\Workspace\SoilMoisture\Temporary\combine\soil_moisture.tif'
    # product_path = r'D:\Workspace\SoilMoisture\Temporary\combine\soil_moisture_1.tif'
    #
    # proj, geos, img = ImageHandler.read_img(fn)
    # img[img>1] = 1
    # img[img<0] = 0
    # ImageHandler.write_img(product_path, proj, geos, img)