Home > AI > Uncategorized

multiprocessing

I’ve done an experiment on PANDA dataset that I extracted middle resolution and crop the white area.

Without multiprocessing, the time is 1 hour 30 mins.

With multiprocessing, the time is 30 mins.

Here is the core code

The complete code code is here


from multiprocessing import Pool
import multiprocessing
import numpy as np
import pandas as pd
import time
import skimage.io
from tqdm import tqdm
from PIL import Image


COMP_DIR = '../../../input/prostate-cancer-grade-assessment/'
TRAIN_DIR = COMP_DIR + 'train_images/'
df = pd.read_csv(COMP_DIR + 'train.csv')
df.head()



def crop_white(image: np.ndarray, value: int = 255) -> np.ndarray:
    assert image.shape[2] == 3
    assert image.dtype == np.uint8
    ys, = (image.min((1, 2)) < value).nonzero()
    xs, = (image.min(0).min(1) < value).nonzero()
    if len(xs) == 0 or len(ys) == 0:
        return image
    return image[ys.min():ys.max() + 1, xs.min():xs.max() + 1]



def crop_all_img(img_id):
    img_path = TRAIN_DIR + img_id + '.tiff'
    img = skimage.io.MultiImage(img_path)[1]

    # crop
    img = crop_white(img)
        
    # save
    img = Image.fromarray(img)
    img.save(img_id+'.jpg', quality=90)
#     plt.imshow(img)
    




    
if __name__ == '__main__':
    start = time.time()
    
    
    img_ids = df.image_id.values
    pool = Pool(processes=multiprocessing.cpu_count())
    pool.map(crop_all_img, img_ids)

    delta = time.time() - start
    print(f'Used time: {delta}')
Related posts:
Relevant tags:

Leave a Reply