Commit ee487243 authored by mathpluscode's avatar mathpluscode

make package installable and update readme

parent 2f8b468e
Pipeline #3310 failed with stages
in 6 minutes
......@@ -39,27 +39,37 @@ For images, the labelled data is stored in ``data/img/labeled/``, inside which e
For videos, each folder of ``data/video/`` corresponds to a patient, inside which, each subfolder corresponds to a video. Compared to the data in the server, the folders are selected and renamed manually to match the labeled data. Often the folder name contains only number ``LS09/556/``, it corresponds to the folder of labeled image ``2016.04.28_10-08-03-556``. Videos are stored in ``.264`` format.
**The code has not been tested with any other data set, therefore it is highly possible that the pipeline doesn't work with other data set.**
Example Usage
-------------
To use the latest verision, please clone the repository and switch to ``dev`` branch and install it via pip as follows:
::
pip install -e .
The examples of using predefined API are presented below.
Preprocess
^^^^^^^^^^
::
python preprocess_mean_std.py -p config.yaml
yfmil3id2019_gen_mean_std -p config.yaml
This code calculates the mean and std on channel level for the image data of each patient. The output is stored in ``data/img/mean_std``. These files are required for training.
::
python preprocess_unlabel.py -f 4
yfmil3id2019_gen_unlabel -f 4
This code extracts the unlabelled images from videos. The output is stored in ``data/img/unlabeled``. These files are required for semi-supervised training.
::
python preprocess_cut.py -p config.yaml
yfmil3id2019_cut_image_mask -p config.yaml
This code calculates the images without black border. The output is stored in ``data/img/cut``. These files are required for evaluation.
......@@ -69,7 +79,7 @@ Train
::
python train.py -p config.yaml -g 0
yfmil3id2019_train -p config.yaml -g 0
This code trains models with parameters defined in config.yaml file and uses GPU number 0.
......@@ -78,13 +88,13 @@ Evaluate
::
python eval.py -p log/20190902225640-config -g 0 --eval
yfmil3id2019_eval -p log/20190902225640-config -g 0 --eval
This code evaluates the models in the log folder. It calculates the prediction of each model of cross validation on the corresponding test set. The predictions of ``LR01`` will be saved in ``log/20190902225640-config/LR01/preds/final``.
::
python eval.py -p log/20190902225640-config -g 0 --eval --all
yfmil3id2019_eval -p log/20190902225640-config -g 0 --eval --all
This code calculates the prediction of each model of cross validation on all data, including training set and test set. The predictions of ``LR01`` will be saved in ``log/20190902225640-config/LR01/preds/final_all``.
......@@ -93,7 +103,7 @@ Analyse
::
python analyse.py -p log/ --std
yfmil3id2019_analyse -p log/ --std
This code
......@@ -103,27 +113,39 @@ This code
::
python analyse.py -p log/ --std --all
yfmil3id2019_analyse -p log/ --std --all
This code
- summarises the statistics of metrics
- generates the performance vs foreground proportion curves, one for training and another one for test.
But it requires to evaluate with ``--all``
But it requires to evaluate with ``--all``. Moreover, it assumes that all folders under log/ have been evaluated using the same mode, otherwise the code would raise error.
Test
^^^^
::
python test.py -p log/20190902225640-config/LR01 -d demo -g 0
yfmil3id2019_test -p log/20190902225640-config/LR01 -d demo -g 0
This code tests the model in the folder ``log/20190902225640-config/LR01`` on images inside ``demo/``.
Developing
----------
Linting
^^^^^^^
This code conforms to the PEP8 standard. Pylint can be used to analyse the code:
::
pip install pylint
pylint --rcfile=tests/pylintrc yfmil3id2019
Contributing
^^^^^^^^^^^^
......
......@@ -22,5 +22,5 @@ opencv-python
tqdm
seaborn
ray
plotly==4.1.0
plotly>=4.1.0
tensorflow-gpu
......@@ -10,5 +10,5 @@ opencv-python
tqdm
seaborn
ray
plotly==4.1.0
plotly>=4.1.0
tensorflow-gpu
\ No newline at end of file
......@@ -29,12 +29,9 @@ setup(
'Intended Audience :: Information Technology',
'Intended Audience :: Science/Research',
'License :: OSI Approved :: BSD License',
'Programming Language :: Python',
'Programming Language :: Python :: 2',
'Programming Language :: Python :: 3',
'Topic :: Scientific/Engineering :: Information Analysis',
......@@ -53,12 +50,36 @@ setup(
install_requires=[
'six>=1.10',
'numpy>=1.11',
'numpy',
'scipy',
'matplotlib',
'pyyaml',
'opencv-python',
'tqdm',
'seaborn',
'ray',
'plotly>=4.1.0',
'tensorflow-gpu',
],
entry_points={
"console_scripts": [
"yfmil3id2019_train=yfmil3id2019.ui.train_command_line:main",
"yfmil3id2019_test=yfmil3id2019.ui.test_command_line:main",
"yfmil3id2019_gen_mean_std="
"yfmil3id2019.ui.preprocess_command_line:gen_mean_std",
"yfmil3id2019_gen_unlabel="
"yfmil3id2019.ui.preprocess_command_line:gen_unlabel",
"yfmil3id2019_cut_image_mask="
"yfmil3id2019.ui.preprocess_command_line:cut_image_mask",
"yfmil3id2019_train=yfmil3id2019.ui.train_command_line:train",
"yfmil3id2019_eval=yfmil3id2019.ui.eval_command_line:evaluate",
"yfmil3id2019_analyse=yfmil3id2019.ui.analyse_command_line:analyse",
"yfmil3id2019_test=yfmil3id2019.ui.test_command_line:test",
]
},
)
......@@ -6,17 +6,21 @@ from yfmil3id2019.src.model.metric import calculate_stats_np
# 13 total patients
NUM_PATIENTS = 13
PATIENT_FOLDER_NAMES = ['LR01', 'LR02', 'LR04', 'LR05', 'LR06', 'LR07', 'LS03', 'LS04', 'LS05', 'LS06', 'LS07', 'LS08', 'LS09']
PATIENT_NUM_SAMPLES = [67, 156, 148, 168, 246, 180, 140, 260, 198, 178, 166, 144, 158]
PATIENT_CUMSUM_SAMPLES = np.cumsum(PATIENT_NUM_SAMPLES)
FOLDER_NAMES = ['LR01', 'LR02', 'LR04', 'LR05',
'LR06', 'LR07', 'LS03', 'LS04',
'LS05', 'LS06', 'LS07', 'LS08', 'LS09']
NUM_SAMPLES = [67, 156, 148, 168, 246, 180, 140,
260, 198, 178, 166, 144, 158]
CUMSUM_SAMPLES = np.cumsum(NUM_SAMPLES)
def get_folder_and_id(sample_id):
"""return the patient folder and the id in that folder
:param sample_id: between 0 and 2208, as in total there are 2209 images
"""
patient_id = np.where(PATIENT_CUMSUM_SAMPLES > sample_id)[0][0]
return PATIENT_FOLDER_NAMES[patient_id], sample_id - np.sum(PATIENT_NUM_SAMPLES[:patient_id])
patient_id = np.where(CUMSUM_SAMPLES > sample_id)[0][0]
return FOLDER_NAMES[patient_id], \
sample_id - np.sum(NUM_SAMPLES[:patient_id])
def read_log(fname):
......@@ -36,21 +40,22 @@ def read_log(fname):
def form_msg(values, name):
v_mean, v_std, v_10p, v_25p, v_50p, v_75p, v_90p = calculate_stats_np(values)
v_mean, v_std, v_10, v_25, v_50, v_75, v_90 = calculate_stats_np(values)
msg = '| %9s| %7.4f | %7.4f | %7.4f | %7.4f | %7.4f | %7.4f | %7.4f |' % (
name, v_mean, v_std, v_10p, v_25p, v_50p, v_75p, v_90p)
name, v_mean, v_std, v_10, v_25, v_50, v_75, v_90)
return msg
def read_folder(dir_path, best, all, total):
def read_folder(dir_path, best, eval_train, total):
folder_name = 'best' if best else 'final'
if all:
if eval_train:
folder_name += '_all'
log_fname = '/preds/%s/metric.log' % folder_name
folder_paths = [f.path for f in os.scandir(dir_path) if f.is_dir()]
folder_paths = [x for x in folder_paths if os.path.exists(x + log_fname)]
log_paths = [x + log_fname for x in folder_paths]
logs = sorted([(fname.split('/')[-4], read_log(fname)) for fname in log_paths])
logs = sorted([(fname.split('/')[-4], read_log(fname))
for fname in log_paths])
metrics_total = dict()
metrics_sep = dict()
......@@ -60,7 +65,8 @@ def read_folder(dir_path, best, all, total):
metrics_sep[m_name] = dict()
for x in logs:
msgs.append('| %5s | mean | std | 10%% | 25%% | 50%% | 75%% | 90%% |' % x[0])
msgs.append('| %5s | mean | std | 10%% | 25%% | 50%% | 75%% | 90%% |'
% x[0])
msgs.append('| --- | --- | --- | --- | --- | --- | --- | --- |')
for k, v in x[1].items():
msgs.append(form_msg(v, k))
......@@ -69,7 +75,8 @@ def read_folder(dir_path, best, all, total):
msgs.append('\n')
if not total:
msgs = []
msgs.append('| %5s | mean | std | 10%% | 25%% | 50%% | 75%% | 90%% |' % 'Total')
msgs.append('| %5s | mean | std | 10%% | 25%% | 50%% | 75%% | 90%% |'
% 'Total')
msgs.append('| --- | --- | --- | --- | --- | --- | --- | --- |')
for k, v in metrics_total.items():
msgs.append(form_msg(v, k))
......@@ -81,22 +88,26 @@ def read_folder_all(dir_path, best):
log_fname = '/preds/%s/metric.log' % ('best_all' if best else 'final_all')
metrics = []
for folder in PATIENT_FOLDER_NAMES:
for folder in FOLDER_NAMES:
metrics.append(read_log(dir_path + '/' + folder + log_fname))
metrics_sep = dict()
for m_name in metrics[0].keys():
values = np.array([metrics[patient_id][m_name] for patient_id in range(NUM_PATIENTS)])
values = np.array([metrics[patient_id][m_name]
for patient_id in range(NUM_PATIENTS)])
test_values = []
train_values = []
for patient_id in range(NUM_PATIENTS):
index_start = CUMSUM_SAMPLES[patient_id] - NUM_SAMPLES[patient_id]
index_end = CUMSUM_SAMPLES[patient_id]
# each element is of shape (num_samples,)
test_values.append(values[patient_id, (PATIENT_CUMSUM_SAMPLES[patient_id] - PATIENT_NUM_SAMPLES[patient_id]):PATIENT_CUMSUM_SAMPLES[patient_id]])
test_values.append(values[patient_id, index_start:index_end])
# each element is of shape (NUM_PATIENTS-1, num_samples)
train_values.append(
np.concatenate([values[:patient_id, (PATIENT_CUMSUM_SAMPLES[patient_id] - PATIENT_NUM_SAMPLES[patient_id]):PATIENT_CUMSUM_SAMPLES[patient_id]],
values[(patient_id + 1):, (PATIENT_CUMSUM_SAMPLES[patient_id] - PATIENT_NUM_SAMPLES[patient_id]):PATIENT_CUMSUM_SAMPLES[patient_id]]],
axis=0))
np.concatenate([
values[:patient_id, index_start:index_end],
values[(patient_id + 1):, index_start:index_end]],
axis=0))
test_values = np.concatenate(test_values, axis=0)
train_values = np.concatenate(train_values, axis=1)
......
......@@ -34,7 +34,7 @@ def analyse_stats(log_path, best, eval_train, total=False):
for path in folder_paths:
name = ''.join(path.split('-')[1:])
m_total, m_sep, msg = read_folder(path, best=best, total=total,
all=eval_train)
eval_train=eval_train)
names.append(name)
metrics_total[name] = m_total
metrics_sep[name] = m_sep
......@@ -71,10 +71,16 @@ def analyse_foreground(log_path, best, std):
metrics_total = dict()
for path in folder_paths:
name = ''.join(path.split('-')[1:])
m_total, _, _ = read_folder(path, best=best, total=False, all=False)
m_total, _, _ = read_folder(path, best=best, total=False,
eval_train=False)
if name in names:
print("folder name %s is repeated!" % name)
name += "x"
names.append(name)
metrics_total[name] = m_total
print(names)
for metric_name in ['f1', 'hd_p95']:
if metric_name not in metrics_total[names[0]].keys():
print('metric %s not found' % metric_name)
......@@ -124,7 +130,7 @@ def analyse_foreground_all(log_path, best, std):
vals = metrics[metric_name][mode]
if mode == 'train':
vals = np.mean(vals, axis=0)
fig = plot_prop_val(props, vals, std, fig, name)
fig = plot_prop_val(props, vals, std, fig, name + '-' + mode)
fig.layout.update(autosize=False,
width=800,
......@@ -156,36 +162,39 @@ def plot_prop_val(props, vals, std, fig, name, num_bins=30):
val_mean.append(np.mean([x[1] for x in chunk]))
val_std.append(np.std([x[1] for x in chunk]))
fig.append_trace(
go.Scatter(x=prop_mean,
y=val_mean,
mode='lines+markers',
marker=dict(color=DEFAULT_PLOTLY_COLORS[i % 10]),
showlegend=False,
name=name,
),
row=row, col=col)
if std:
fig.add_trace(
go.Scatter(x=prop_mean,
y=val_mean - val_std,
fill=None,
mode='lines',
line_color=rgb2rgba(
DEFAULT_PLOTLY_COLORS[col - 1], 0.4),
showlegend=False,
name='mean-std'),
row=row, col=col)
prop_mean = np.asarray(prop_mean)
val_mean = np.asarray(val_mean)
val_std = np.asarray(val_std)
fig.append_trace(
go.Scatter(x=prop_mean,
y=val_mean,
mode='lines+markers',
marker=dict(color=DEFAULT_PLOTLY_COLORS[i % 10]),
showlegend=False,
name=name,
),
row=row, col=col)
if std:
fig.add_trace(
go.Scatter(x=prop_mean,
y=val_mean + val_std,
fill='tonexty',
y=val_mean - val_std,
fill=None,
mode='lines',
line_color=rgb2rgba(
DEFAULT_PLOTLY_COLORS[col - 1], 0.4),
showlegend=False, name='mean+std'),
showlegend=False,
name='mean-std'),
row=row, col=col)
fig.update_xaxes(range=[0, 1],
title=dict(text='Liver Proportion'),
row=row, col=col)
fig.add_trace(
go.Scatter(x=prop_mean,
y=val_mean + val_std,
fill='tonexty',
mode='lines',
line_color=rgb2rgba(
DEFAULT_PLOTLY_COLORS[col - 1], 0.4),
showlegend=False, name='mean+std'),
row=row, col=col)
fig.update_xaxes(range=[0, 1],
title=dict(text='Liver Proportion'),
row=row, col=col)
return fig
......@@ -141,7 +141,7 @@ def calculate_metrics(fnames, dir_cut, eval_train, return_hd):
f.write(res)
def _eval(run, config, path, best,
def _eval(run, config, dir_run, best,
eval_train: bool, predict: bool, return_hd: bool):
"""
evaluate for a model trained on one cross validation fold
......@@ -154,13 +154,9 @@ def _eval(run, config, path, best,
:param return_hd: if calculate hausdorff distance
:return:
"""
# get run name, i.e. eval folder names
run_name = get_folder_name_from_paths(run.folders_lbl_eval)
# original data without border
dir_cut = config['dir']['data'] + 'img/cut/'
# get cross validation folder
dir_run = path + '/' + run_name
# init configs
session_config = ConfigProto(device_count={'GPU': 0})
......@@ -199,7 +195,7 @@ def _eval(run, config, path, best,
training=False,
config=config)
print('evaluate for %s - %s' % (run_name, save_name))
print('evaluate for %s - %s' % (dir_run, save_name))
warm_start_from = tf.estimator.WarmStartSettings(
ckpt_to_initialize_from=ckpt_to_initialize_from)
model = tf.estimator.Estimator(model_fn=model_fn,
......@@ -213,7 +209,7 @@ def _eval(run, config, path, best,
preprocess=config['data']['preprocess'])
# calculate metrics
print('calculate metric for %s - %s' % (run_name, save_name))
print('calculate metric for %s - %s' % (dir_run, save_name))
save_dir = dir_run + '/preds/%s/' % save_name
img_fnames = [f.path for f in os.scandir(save_dir)]
img_fnames = [x[:-9] for x in img_fnames if x.endswith('_pred.png')]
......@@ -239,6 +235,9 @@ def eval_app(config, path, best, eval_train, predict, return_hd):
folders_unlabeled=None,
param=config['data']['cv'])
for run in runs:
# get run name, i.e. eval folder names
run_name = get_folder_name_from_paths(run.folders_lbl_eval)
dir_run = path + '/' + run_name
if eval_train:
run.folders_lbl_eval = fs_lbl
_eval(run, config, path, best, eval_train, predict, return_hd)
_eval(run, config, dir_run, best, eval_train, predict, return_hd)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment