Commit 845a5113 authored by mathpluscode's avatar mathpluscode

eval pipeline works

parent a75f79d2
#!/usr/bin/python
# -*- coding: utf-8 -*-
import sys
from yfmil3id2019.ui.eval_command_line import eval
if __name__ == "__main__":
sys.exit(eval(sys.argv[1:]))
#!/usr/bin/python
# -*- coding: utf-8 -*-
import sys
from yfmil3id2019.ui.preprocess_command_line import cut_image_mask
if __name__ == "__main__":
sys.exit(cut_image_mask(sys.argv[1:]))
import argparse
import logging
import os
import matplotlib
import numpy as np
import tensorflow as tf
import yaml
from yfmil3id2019.src.model.metric import seg_metric_np
from yfmil3id2019.src.util import make_dir
......
......@@ -113,6 +113,7 @@ def apply_affine_transform(images, scale, return_fn):
batch_size = sh[0]
size = sh[1:3]
A = get_affine_transform_batch(size, scale, batch_size) # shape = [batch_size, 2, 3]
A = tf.convert_to_tensor(A, dtype=tf.float32)
A = flatten(A) # shape = [batch_size, 6]
A = tf.concat([A, tf.zeros([batch_size, 2], tf.float32)], axis=1) # shape = [batch_size, 8]
images = tf.contrib.image.transform(images=images, transforms=tf.convert_to_tensor(A))
......
......@@ -10,11 +10,10 @@ import tensorflow as tf
from yfmil3id2019.src.wrapper.layer import resize_image
def get_labeled_folders(cwd, config, training):
def get_labeled_folders(cwd, config):
"""get the paths of folders containing labeled data
:param cwd: current working directory
:param config: dict of the config file
:param training: false if it is preprocessing
:return: paths of folders
"""
data_dir = cwd + config["dir"]["data"] + 'img/labeled'
......
import os
from multiprocessing import cpu_count
import numpy as np
import tensorflow as tf
from tqdm import tqdm
import ray
import cv2
import matplotlib
matplotlib.use('agg')
from matplotlib.image import imsave, imread
from yfmil3id2019.src.cv import get_folder_name_from_paths, split_train_eval
from yfmil3id2019.src.data.load import build_dataset, extract_image_mask_fnames, get_labeled_folders, get_unlabeled_folders
from yfmil3id2019.src.model.metric import seg_metric_np
from yfmil3id2019.src.model.model import model_fn
from yfmil3id2019.src.util import init_log_dir, make_dir, set_tf_logger, verify_dirs
from yfmil3id2019.src.wrapper.util import ConfigProto
from yfmil3id2019.src.eval import get_folder_and_id
NUM_CPUS = cpu_count()
ray.init(num_cpus=NUM_CPUS)
def save_predict_results(results, imgs, dir_run, all, name, preprocess):
dir_pred = dir_run + '/preds/%s/' % name
make_dir(dir_pred)
height, width, border_width = preprocess['shape']['orig'][0], preprocess['shape']['orig'][1], preprocess['shape']['border']
adjusted_width = width - 2 * border_width
orig_size = (adjusted_width, height)
preds = list(results) # iterator to list, to get the predictions
for sample_id, pred_dict in enumerate(preds):
# get predictions
image = pred_dict['images']
mask = pred_dict['masks']
pred = pred_dict['preds']
pred_orig = cv2.resize(pred, dsize=orig_size)
# output
image = (image - np.min(image)) / (np.max(image) - np.min(image))
imsave(dir_pred + '/%d_image.png' % sample_id, image)
imsave(dir_pred + '/%d_mask.png' % sample_id, mask, vmin=0, vmax=1, cmap='gray')
imsave(dir_pred + '/%d_prob.png' % sample_id, pred, vmin=0, vmax=1, cmap='gray')
imsave(dir_pred + '/%d_pred.png' % sample_id, np.round(pred), vmin=0, vmax=1, cmap='gray')
imsave(dir_pred + '/%d_pred_orig.png' % sample_id, np.round(pred_orig), vmin=0, vmax=1, cmap='gray')
@ray.remote
def eval_image(fname, dir_cut, return_hd, all):
"""calculate the metrics for one image
:param fname: if all == False, /LR01/preds/final/9
:param dir_cut:
:return: (sample_id, string of metrics)
"""
tokens = fname.split('/')
i = tokens[-1]
if all:
folder_name, img_index = get_folder_and_id(int(i))
mask = imread(dir_cut + '%s/%d' % (folder_name, img_index) + '_mask_cut.png')[:, :, 0]
else:
mask = imread(dir_cut + tokens[-4] + '/%s' % i + '_mask_cut.png')[:, :, 0]
pred = imread(fname + '_pred_orig.png')[:, :, 0]
metrics = seg_metric_np(pred, mask, return_hd=return_hd, fname=fname)
line = '%s|' % i
for k, v in metrics.items():
line += k + '=' + '%f,' % v
line += '\n'
return int(i), line
def batch_run(func, inputs):
"""execute the function with given inputs
:param func: the function to execute
:param inputs: a list of inputs, each element is a diction
:return: a list of results
"""
jobs = []
results = []
for inpt in tqdm(inputs):
if len(jobs) == NUM_CPUS:
ready_ids, jobs = ray.wait(jobs, num_returns=1, timeout=None) # timeout is ms
for res in ray.get(ready_ids):
results.append(res)
jobs.append(func.remote(**inpt))
for res in ray.get(jobs): # wait the rest
results.append(res)
return results
def calculate_metrics(fnames, dir_cut, return_hd, all):
dir_pred = '/'.join(fnames[0].split('/')[:-1])
inputs = [dict(fname=fname, dir_cut=dir_cut, return_hd=return_hd, all=all) for fname in fnames]
results = batch_run(eval_image, inputs)
results = sorted(results, key=lambda x: x[0])
with open(dir_pred + '/metric.log', 'w+') as f:
for i, s in results:
f.write(s)
def eval_app(config, path, best, all, eval, return_hd):
# create log folder
app_name = 'eval-' + path.split('/')[-1]
cwd = os.getcwd() + '/'
dir_log = init_log_dir(cwd + config['dir']['log'], app_name)
set_tf_logger(dir_log, app_name)
# check if the copy of cut data has been made
dir_cut = config['dir']['data'] + 'img/cut/'
# get folders for labeled and unlabeled data
fs_lbl = get_labeled_folders(cwd, config)
fs_unlbl = get_unlabeled_folders(fs_lbl, config)
verify_dirs(fs_unlbl)
runs = split_train_eval(fs_lbl, fs_unlbl, config['data']['cv'])
for run in runs:
# get run name, i.e. eval folder names
run_name = get_folder_name_from_paths(run.folders_lbl_eval)
# get cross validation folder
dir_run = path + '/' + run_name
# init configs
session_config = ConfigProto(device_count={'GPU': 0})
run_config = tf.estimator.RunConfig(session_config=session_config, **config['tf']['run'])
# eval, generate predictions
if best:
best_dir = dir_run + '/export/best/'
files = [f.path for f in os.scandir(best_dir)]
ckpt_to_initialize_from = '.'.join(files[0].split('.')[:-1])
save_name = 'best'
else:
final_dir = dir_run + '/'
files = [f.path for f in os.scandir(final_dir)]
files = [x for x in files if 'ckpt' in x and x.endswith('.index')]
files = sorted(files, key=lambda x: int(x.split('.')[-2].split('-')[-1]), reverse=True) # sort chkpts
ckpt_to_initialize_from = '.'.join(files[0].split('.')[:-1]) # use the last one
save_name = 'final'
if all:
save_name += '_all'
if eval:
# get mean and std
mean_std_path = [dir_run + '/mean.png', dir_run + '/std.png']
if all:
eval_folders = fs_lbl
else:
eval_folders = run.folders_lbl_eval
def eval_input_fn():
return build_dataset(eval_folders, None, mean_std_path, training=False, config=config)
print('evaluate for %s - %s' % (run_name, save_name))
warm_start_from = tf.estimator.WarmStartSettings(ckpt_to_initialize_from=ckpt_to_initialize_from)
model = tf.estimator.Estimator(model_fn=model_fn, warm_start_from=warm_start_from, config=run_config, params=config)
results = model.predict(input_fn=eval_input_fn)
imgs_eval = extract_image_mask_fnames(folders=eval_folders, has_mask=True, keep_ratio=-1, skip=1)
save_predict_results(results=results, imgs=imgs_eval, dir_run=dir_run, all=all, name=save_name,
preprocess=config['data']['preprocess'])
# calculate metrics
print('calculate metric for %s - %s' % (run_name, save_name))
save_dir = dir_run + '/preds/%s/' % save_name
img_fnames = [f.path for f in os.scandir(save_dir)]
img_fnames = [x[:-9] for x in img_fnames if x.endswith('_pred.png')]
img_fnames = sorted(img_fnames, key=lambda x: int(x.split('/')[-1]))
calculate_metrics(fnames=img_fnames, dir_cut=dir_cut, return_hd=return_hd, all=all)
import argparse
import logging
import os
import yaml
from yfmil3id2019.ui.eval_app import eval_app
def eval(args=None):
logging.getLogger("tensorflow").setLevel(logging.FATAL)
# parse args
parser = argparse.ArgumentParser(description='yfmil3id2019_eval')
parser.add_argument('-p',
'--path',
required=True,
help='Path of log folder')
parser.add_argument('-g',
'--gpu',
required=True,
help='GPU ID')
parser.add_argument('-b',
'--bs',
default=64,
help='batch size')
parser.add_argument('--best',
dest='best',
action='store_true',
help='use the best model instead of the final one, otherwise use the last checkpoint')
parser.add_argument('--eval',
dest='eval',
action='store_true',
help='evaluate models on test set')
parser.add_argument('--all',
dest='all',
action='store_true',
help='evaluate models on all data')
parser.add_argument('--hd',
dest='return_hd',
action='store_true',
help='calculate hausdorff distance')
parser.set_defaults(best=False)
parser.set_defaults(eval=False)
parser.set_defaults(all=False)
parser.set_defaults(return_hd=False)
args = parser.parse_args(args)
# load config
path = args.path
if path[-1] == '/':
path = path[:-1]
folders_cv = [f.path for f in os.scandir(path) if f.is_dir()]
config_path = folders_cv[0] + '/config_backup.yaml'
with open(config_path) as file:
config = yaml.load(file)
# modify config
config['model']['opt']['batch_size'] = args.bs
config['tf']['gpu'] = args.gpu
os.environ['CUDA_VISIBLE_DEVICES'] = config['tf']['gpu']
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
eval_app(config=config, path=path, best=args.best, all=args.all, eval=args.eval, return_hd=args.return_hd)
......@@ -3,18 +3,20 @@
import os
import yaml
import numpy as np
from tqdm import tqdm
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt
from matplotlib.image import imsave
from matplotlib.image import imsave, imread
from yfmil3id2019.src.cv import get_folder_name_from_paths, split_train_eval
from yfmil3id2019.src.data.load import get_labeled_folders, extract_image_mask_fnames
from yfmil3id2019.src.util import make_dir
def mean_std(img_fnames, chunk_size=200):
def calc_mean_std(img_fnames, chunk_size=200):
"""
calculate mean and std for the given images
if there are too many images, load images separately
......@@ -59,7 +61,7 @@ def gen_mean_std_app(config):
# file names
dir_mean_std = config['dir']['data'] + 'img/mean_std/'
folders = get_labeled_folders(cwd, config, training=False)
folders = get_labeled_folders(cwd, config)
folder_names = [x.split('/')[-1] for x in folders]
# calculate the mean of each folder
......@@ -70,7 +72,7 @@ def gen_mean_std_app(config):
print('Preprocessing the %d-th folder %s' % (i, folder_names[i]))
img_fnames = extract_image_mask_fnames(folders=[folders[i]], has_mask=True, keep_ratio=1, skip=1)
N = len(img_fnames) # number of imgs
mean, std = mean_std(img_fnames)
mean, std = calc_mean_std(img_fnames)
print('Calculating train mean & std for the %d-th folder %s' % (i, folder_names[i]))
img_fnames = extract_image_mask_fnames([folders[i]], has_mask=True, keep_ratio=1, skip=1)
......@@ -84,11 +86,10 @@ def gen_mean_std_app(config):
f.write('%d\n' % N)
def gen_unlabel_app(FPS):
def gen_unlabel_app(dir_data, fps):
REMOVE = True # remove the existing frames
dir_data = 'data/'
dir_video = dir_data + 'video/'
dir_frame = dir_data + 'img/unlabeled/fps%s/' % FPS
dir_frame = dir_data + 'img/unlabeled/fps%s/' % fps
make_dir(dir_frame, REMOVE)
patient_folders = os.listdir(dir_video)
......@@ -98,6 +99,35 @@ def gen_unlabel_app(FPS):
video_fname = [f.path for f in os.scandir(dir_video + p_folder + '/' + v_folder) if f.path.endswith('.264')][0]
frame_path = dir_frame + p_folder + '/' + v_folder
make_dir(frame_path)
cmd = "ffmpeg -i %s -vf fps=%s %s/%%08d.png" % (video_fname, FPS, frame_path)
cmd = "ffmpeg -i %s -vf fps=%s %s/%%08d.png" % (video_fname, fps, frame_path)
print('Extracting frames from %s into %s' % (video_fname, frame_path))
os.system(cmd)
def cut_image_mask_app(config):
width = config['data']['preprocess']['shape']['orig'][1]
border_width = config['data']['preprocess']['shape']['border']
cwd = os.getcwd() + '/'
runs = split_train_eval(folders_labeled=get_labeled_folders(cwd, config),
folders_unlabeled=None,
param=config['data']['cv'])
dir_cut = config['dir']['data'] + 'img/cut/'
for i, run in enumerate(runs):
# get run name, i.e. eval folder names
run_name = get_folder_name_from_paths(run.folders_lbl_eval)
print('Preprocessing the %d-th folder %s' % (i, run_name))
dir_cut_run = dir_cut + run_name
make_dir(dir_cut_run)
imgs = extract_image_mask_fnames(folders=run.folders_lbl_eval,
has_mask=True, keep_ratio=-1, skip=1)
for sample_id in tqdm(range(len(imgs))):
image_orig = imread(imgs[sample_id] + ".png")[:, :, :3]
mask_orig = imread(imgs[sample_id] + "Mask.png")
image_orig = image_orig[:, border_width:(width - border_width), :]
mask_orig = mask_orig[:, border_width:(width - border_width)]
imsave(dir_cut_run + '/%d_image_cut.png' % sample_id, image_orig)
imsave(dir_cut_run + '/%d_mask_cut.png' % sample_id, mask_orig, vmin=0, vmax=1, cmap='gray')
import argparse
import yaml
from yfmil3id2019.ui.preprocess_app import gen_mean_std_app, gen_unlabel_app
from yfmil3id2019.ui.preprocess_app import gen_mean_std_app, gen_unlabel_app, cut_image_mask_app
def gen_mean_std(args=None):
parser = argparse.ArgumentParser(description='yfmil3id2019_train')
parser = argparse.ArgumentParser(description='yfmil3id2019_mean_std')
parser.add_argument('-p',
'--path',
......@@ -22,8 +22,12 @@ def gen_mean_std(args=None):
def gen_unlabel(args=None):
parser = argparse.ArgumentParser(description='yfmil3id2019_train')
parser = argparse.ArgumentParser(description='yfmil3id2019_unlabel')
parser.add_argument('-p',
'--path',
required=True,
help='Path of config file')
parser.add_argument('-f',
'--fps',
required=True,
......@@ -31,4 +35,26 @@ def gen_unlabel(args=None):
args = parser.parse_args(args)
gen_unlabel_app(FPS=args.fps)
# load config
with open(args.path) as file:
config = yaml.load(file)
gen_unlabel_app(dir_data=config['dir']['data'],
fps=args.fps)
def cut_image_mask(args=None):
parser = argparse.ArgumentParser(description='yfmil3id2019_unlabel')
parser.add_argument('-p',
'--path',
required=True,
help='Path of config file')
args = parser.parse_args(args)
# load config
with open(args.path) as file:
config = yaml.load(file)
cut_image_mask_app(config=config)
......@@ -13,16 +13,13 @@ from yfmil3id2019.src.wrapper.util import ConfigProto
def train_app(config, app_name):
""" Run the application """
# create log folder
cwd = os.getcwd() + '/'
dir_log = init_log_dir(cwd + config['dir']['log'], app_name)
set_tf_logger(dir_log, app_name)
# get folders for labeled and unlabeled data
fs_lbl = get_labeled_folders(cwd, config, training=True)
fs_lbl = get_labeled_folders(cwd, config)
fs_unlbl = get_unlabeled_folders(fs_lbl, config)
verify_dirs(fs_unlbl)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment