Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
10
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
Yunguan Fu
COVID-Net
Commits
ad90243e
Commit
ad90243e
authored
Mar 28, 2020
by
lindawangg
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
removed archive folder
parent
304b4cfa
Changes
7
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
5 additions
and
6494 deletions
+5
-6494
.gitignore
.gitignore
+1
-0
README.md
README.md
+4
-4
archive/create_COVIDx.ipynb
archive/create_COVIDx.ipynb
+0
-266
archive/data_augmentation.py
archive/data_augmentation.py
+0
-127
archive/preprocessing.ipynb
archive/preprocessing.ipynb
+0
-148
archive/test_split_v1.txt
archive/test_split_v1.txt
+0
-639
archive/train_split_v1.txt
archive/train_split_v1.txt
+0
-5310
No files found.
.gitignore
View file @
ad90243e
...
...
@@ -16,3 +16,4 @@ data.py
export_to_meta.py
model.py
train_tf.py
archive/
README.md
View file @
ad90243e
...
...
@@ -191,7 +191,7 @@ These are the final results for COVID-Net Small and COVID-Net Large.
## Pretrained Models
| Type | COVID-19 Sensitivity | # Params (M) | Model |
|:-----:|:--------------------:|:------------:|:-------------------:|
| ckpt | 80.0 |
116
|
[
COVID-Net Small
](
https://drive.google.com/file/d/1djqWcxzRehtyJV9EQsppj1YdgsP2JRQy/view?usp=sharing
)
|
| ckpt | 90.0 |
126
|
[
COVID-Net Large
](
https://drive.google.com/file/d/1xrxK9swFVlFI-WAYcccIgm0tt9RgawXD/view?usp=sharing
)
|
| Type | COVID-19 Sensitivity | # Params (M) |
MACs (G) |
Model |
|:-----:|:--------------------:|:------------:|:--------
:|:--------
-----------:|
| ckpt | 80.0 | 116
.6 | 2.26
|
[
COVID-Net Small
](
https://drive.google.com/file/d/1djqWcxzRehtyJV9EQsppj1YdgsP2JRQy/view?usp=sharing
)
|
| ckpt | 90.0 | 126
.6 | 3.59
|
[
COVID-Net Large
](
https://drive.google.com/file/d/1xrxK9swFVlFI-WAYcccIgm0tt9RgawXD/view?usp=sharing
)
|
archive/create_COVIDx.ipynb
deleted
100644 → 0
View file @
304b4cfa
{
"cells": [
{
"cell_type": "code",
"execution_count": 161,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import os\n",
"import random \n",
"from shutil import copyfile"
]
},
{
"cell_type": "code",
"execution_count": 215,
"metadata": {},
"outputs": [],
"source": [
"# set parameters here\n",
"savepath = 'data'\n",
"seed = 0\n",
"np.random.seed(seed). # Reset the seed so all runs are the same.\n",
"random.seed(seed)\n",
"MAXVAL = 255 # Range [0 255]\n",
"\n",
"# path to covid-19 dataset from https://github.com/ieee8023/covid-chestxray-dataset\n",
"imgpath = '../covid-chestxray-dataset/images' \n",
"csvpath = '../covid-chestxray-dataset/metadata.csv'\n",
"\n",
"# path to kaggle chest xray data from https://www.kaggle.com/paultimothymooney/chest-xray-pneumonia\n",
"data_path = 'chest_xray'\n",
"\n",
"# parameters for COVIDx dataset\n",
"train = []\n",
"test = []\n",
"split = 0.1 # train/test split\n",
"test_count = {'normal': 0, 'viral': 0, 'bacteria': 0, 'COVID-19': 0}\n",
"train_count = {'normal': 0, 'viral': 0, 'bacteria': 0, 'COVID-19': 0}"
]
},
{
"cell_type": "code",
"execution_count": 162,
"metadata": {},
"outputs": [],
"source": [
"# adapted from https://github.com/mlmed/torchxrayvision/blob/master/torchxrayvision/datasets.py#L814\n",
"csv = pd.read_csv(csvpath, nrows=None)\n",
"idx_pa = csv[\"view\"] == \"PA\" # Keep only the PA view\n",
"csv = csv[idx_pa]\n",
"\n",
"pneumonias = [\"COVID-19\", \"SARS\", \"MERS\", \"ARDS\", \"Streptococcus\"]\n",
"pathologies = [\"Pneumonia\",\"Viral Pneumonia\", \"Bacterial Pneumonia\", \"No Finding\"] + pneumonias\n",
"pathologies = sorted(pathologies)\n",
"\n",
"mapping = dict()\n",
"mapping['COVID-19'] = 'COVID-19'\n",
"mapping['SARS'] = 'viral'\n",
"mapping['MERS'] = 'viral'\n",
"mapping['Streptococcus'] = 'bacteria'"
]
},
{
"cell_type": "code",
"execution_count": 218,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'normal': 0, 'viral': 11, 'bacteria': 6, 'COVID-19': 68}\n",
"68\n"
]
}
],
"source": [
"# get non-COVID19 viral, bacteria, and COVID-19 infections from covid-chestxray-dataset\n",
"# stored as patient id, image filename and label\n",
"filename_label = {'normal': [], 'viral': [], 'bacteria': [], 'COVID-19': []}\n",
"count = {'normal': 0, 'viral': 0, 'bacteria': 0, 'COVID-19': 0}\n",
"for index, row in csv.iterrows():\n",
" f = row['finding']\n",
" if f in mapping:\n",
" count[mapping[f]] += 1\n",
" entry = [int(row['Patientid']), row['filename'], mapping[f]]\n",
" filename_label[mapping[f]].append(entry)\n",
"\n",
"print('Data distribution from covid-chestxray-dataset:')\n",
"print(count)"
]
},
{
"cell_type": "code",
"execution_count": 243,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Key: viral\n",
"Test patients: ['8']\n",
"Key: bacteria\n",
"Test patients: ['31']\n",
"Key: COVID-19\n",
"Test patients: ['36', '42', '19', '20']\n",
"test count: {'normal': 0, 'viral': 1, 'bacteria': 4, 'COVID-19': 8}\n",
"train count: {'normal': 0, 'viral': 10, 'bacteria': 2, 'COVID-19': 60}\n"
]
}
],
"source": [
"# add covid-chestxray-dataset into COVIDx dataset\n",
"# since covid-chestxray-dataset doesn't have test dataset\n",
"# split into train/test by patientid\n",
"# for COVIDx:\n",
"# patient 8 is used as non-COVID19 viral test\n",
"# patient 31 is used as bacterial test\n",
"# patients 19, 20, 36, 42 are used as COVID-19 viral test\n",
"\n",
"for key in filename_label.keys():\n",
" arr = np.array(filename_label[key])\n",
" if arr.size == 0:\n",
" continue\n",
" # split by patients\n",
" num_diff_patients = len(np.unique(arr[:,0]))\n",
" num_test = max(1, round(split*num_diff_patients))\n",
" # select num_test number of random patients\n",
" test_patients = random.sample(list(arr[:,0]), num_test)\n",
" print('Key: ', key)\n",
" print('Test patients: ', test_patients)\n",
" # go through all the patients\n",
" for patient in arr:\n",
" if patient[0] in test_patients:\n",
" copyfile(os.path.join(imgpath, patient[1]), os.path.join(savepath, 'test', patient[1]))\n",
" test.append(patient)\n",
" test_count[patient[2]] += 1\n",
" else:\n",
" copyfile(os.path.join(imgpath, patient[1]), os.path.join(savepath, 'train', patient[1]))\n",
" train.append(patient)\n",
" train_count[patient[2]] += 1\n",
"\n",
"print('test count: ', test_count)\n",
"print('train count: ', train_count)"
]
},
{
"cell_type": "code",
"execution_count": 244,
"metadata": {},
"outputs": [],
"source": [
"# add kaggle chest xray data into COVID19\n",
"folders = ['train', 'val', 'test']\n",
"\n",
"# train, val, test normal data\n",
"for folder in folders: \n",
" for img in os.listdir(os.path.join(data_path, folder, 'NORMAL')):\n",
" if '.jp' in img:\n",
" new_img = img.strip('IM-')\n",
" new_img = new_img.strip('NORMAL2-IM-')\n",
" # add to current dataset\n",
" patientid = '1000' + new_img.split('-')[0] # add 1000 in front of kaggle patient ids\n",
" if folder == 'train' or folder == 'val':\n",
" # copy files to data folder\n",
" copyfile(os.path.join(data_path, folder, 'NORMAL', img), os.path.join(savepath, 'train', img))\n",
" train.append([patientid, img, 'normal'])\n",
" train_count['normal'] += 1\n",
" else:\n",
" copyfile(os.path.join(data_path, folder, 'NORMAL', img), os.path.join(savepath, 'test', img))\n",
" test.append([patientid, img, 'normal'])\n",
" test_count['normal'] += 1\n",
"\n",
"# train, val, test pneumonia data\n",
" for img in os.listdir(os.path.join(data_path, folder, 'PNEUMONIA')):\n",
" if '.jp' in img:\n",
" new_img = img.strip('person')\n",
" patientid = '1000' + new_img.split('_')[0]\n",
" p_type = 'bacteria' if 'bacteria' in new_img else 'viral'\n",
" if folder == 'train' or folder == 'val':\n",
" copyfile(os.path.join(data_path, folder, 'PNEUMONIA', img), os.path.join(savepath, 'train', img))\n",
" train.append([patientid, img, p_type])\n",
" train_count[p_type] += 1\n",
" else:\n",
" copyfile(os.path.join(data_path, folder, 'PNEUMONIA', img), os.path.join(savepath, 'test', img))\n",
" test.append([patientid, img, p_type])\n",
" test_count[p_type] += 1\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 245,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Final stats\n",
"Train count: {'normal': 1349, 'viral': 1355, 'bacteria': 2540, 'COVID-19': 60}\n",
"Test count: {'normal': 234, 'viral': 149, 'bacteria': 246, 'COVID-19': 8}\n",
"Total length of train: 5304\n",
"Total length of test: 637\n"
]
}
],
"source": [
"# final stats\n",
"print('Final stats')\n",
"print('Train count: ', train_count)\n",
"print('Test count: ', test_count)\n",
"print('Total length of train: ', len(train))\n",
"print('Total length of test: ', len(test))"
]
},
{
"cell_type": "code",
"execution_count": 246,
"metadata": {},
"outputs": [],
"source": [
"# export to train and test csv\n",
"# format as patientid, filename, label, separated by a space\n",
"train_file = open(\"train_split.txt\",\"a\") \n",
"for sample in train:\n",
" info = str(sample[0]) + ' ' + sample[1] + ' ' + sample[2] + '\\n'\n",
" train_file.write(info)\n",
"\n",
"train_file.close()\n",
"\n",
"test_file = open(\"test_split.txt\", \"a\")\n",
"for sample in test:\n",
" info = str(sample[0]) + ' ' + sample[1] + ' ' + sample[2] + '\\n'\n",
" test_file.write(info)\n",
"\n",
"test_file.close()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python (covid)",
"language": "python",
"name": "covid"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.10"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
archive/data_augmentation.py
deleted
100644 → 0
View file @
304b4cfa
import
cv2
import
numpy
as
np
import
matplotlib.pyplot
as
plt
import
os
np
.
random
.
seed
(
0
)
def
rotate_image
(
image
,
angle
):
# grab the dimensions of the image and then determine the
# center
(
h
,
w
)
=
image
.
shape
[:
2
]
(
cX
,
cY
)
=
(
w
//
2
,
h
//
2
)
# grab the rotation matrix (applying the negative of the
# angle to rotate clockwise), then grab the sine and cosine
# (i.e., the rotation components of the matrix)
M
=
cv2
.
getRotationMatrix2D
((
cX
,
cY
),
-
angle
,
1.0
)
cos
=
np
.
abs
(
M
[
0
,
0
])
sin
=
np
.
abs
(
M
[
0
,
1
])
# compute the new bounding dimensions of the image
nW
=
int
((
h
*
sin
)
+
(
w
*
cos
))
nH
=
int
((
h
*
cos
)
+
(
w
*
sin
))
# adjust the rotation matrix to take into account translation
M
[
0
,
2
]
+=
(
nW
/
2
)
-
cX
M
[
1
,
2
]
+=
(
nH
/
2
)
-
cY
# perform the actual rotation and return the image
return
cv2
.
warpAffine
(
image
,
M
,
(
nW
,
nH
))
def
horizontal_flip
(
image
):
return
cv2
.
flip
(
image
,
1
)
def
shift_image
(
image
,
lr_pixels
,
tb_pixels
):
num_rows
,
num_cols
=
image
.
shape
[:
2
]
translation_matrix
=
np
.
float32
([
[
1
,
0
,
lr_pixels
],
[
0
,
1
,
tb_pixels
]
])
return
cv2
.
warpAffine
(
img
,
translation_matrix
,
(
num_cols
,
num_rows
))
INPUT_SIZE
=
(
224
,
224
)
mapping
=
{
'normal'
:
0
,
'bacteria'
:
1
,
'viral'
:
2
,
'COVID-19'
:
3
}
train_filepath
=
'train_split.txt'
test_filepath
=
'test_split.txt'
num_samples
=
3000
# load in the train and test files
file
=
open
(
train_filepath
,
'r'
)
trainfiles
=
file
.
readlines
()
file
=
open
(
test_filepath
,
'r'
)
testfiles
=
file
.
readlines
()
# augment all the train class to 3000 examples each
# get number of each class
classes
=
{
'normal'
:
[],
'bacteria'
:
[],
'viral'
:
[],
'COVID-19'
:
[]}
img_aug
=
{
'normal'
:
[],
'bacteria'
:
[],
'viral'
:
[],
'COVID-19'
:
[]}
classes_test
=
{
'normal'
:
[],
'bacteria'
:
[],
'viral'
:
[],
'COVID-19'
:
[]}
for
i
in
range
(
len
(
trainfiles
)):
train_i
=
trainfiles
[
i
].
split
()
classes
[
train_i
[
2
]].
append
(
train_i
[
1
])
for
i
in
range
(
len
(
testfiles
)):
test_i
=
testfiles
[
i
].
split
()
classes_test
[
test_i
[
2
]].
append
(
test_i
[
1
])
for
key
in
classes
.
keys
():
print
(
'{}: {}'
.
format
(
key
,
len
(
classes
[
key
])))
num_to_augment
=
{
'normal'
:
min
(
num_samples
-
(
len
(
classes
[
'normal'
])
+
len
(
img_aug
[
'normal'
])),
len
(
classes
[
'normal'
])),
'bacteria'
:
min
(
num_samples
-
(
len
(
classes
[
'bacteria'
])
+
len
(
img_aug
[
'normal'
])),
len
(
classes
[
'bacteria'
])),
'viral'
:
min
(
num_samples
-
(
len
(
classes
[
'viral'
])
+
len
(
img_aug
[
'normal'
])),
len
(
classes
[
'viral'
])),
'COVID-19'
:
min
(
num_samples
-
(
len
(
classes
[
'COVID-19'
])
+
len
(
img_aug
[
'normal'
])),
len
(
classes
[
'COVID-19'
]))}
print
(
'num_to_augment 1:'
,
num_to_augment
)
to_augment
=
0
for
key
in
num_to_augment
.
keys
():
to_augment
+=
num_to_augment
[
key
]
print
(
to_augment
)
while
to_augment
:
for
key
in
classes
.
keys
():
aug_class
=
classes
[
key
]
# sample which images to augment
sample_indexes
=
np
.
random
.
choice
(
len
(
aug_class
),
num_to_augment
[
key
],
replace
=
False
)
for
i
in
sample_indexes
:
# randomly select the degree of each augmentation
rot
=
np
.
random
.
uniform
(
-
5
,
5
)
do_flip
=
np
.
random
.
randint
(
0
,
2
)
shift_vert
=
np
.
random
.
randint
(
0
,
2
)
shift
=
np
.
random
.
uniform
(
-
10
,
10
)
# read in image and apply augmentation
img
=
cv2
.
imread
(
os
.
path
.
join
(
'data'
,
'train'
,
aug_class
[
i
]))
#img = rotate_image(img, rot)
#if shift_vert:
# img = shift_image(img, 0, shift)
#else:
# img = shift_image(img, shift, 0)
if
do_flip
:
img
=
horizontal_flip
(
img
)
# append filename and class to img_aug, save as png
imgname
=
'{}.png'
.
format
(
aug_class
[
i
].
split
(
'.'
)[
0
]
+
'_aug_r'
+
str
(
round
(
rot
))
+
'_'
+
str
(
do_flip
)
+
'_s'
+
str
(
shift_vert
)
+
str
(
round
(
shift
)))
img_aug
[
key
].
append
(
imgname
)
cv2
.
imwrite
(
os
.
path
.
join
(
'data'
,
'train'
,
imgname
),
img
)
# update num_to_augment numbers
num_to_augment
=
{
'normal'
:
min
(
num_samples
-
(
len
(
classes
[
'normal'
])
+
len
(
img_aug
[
'normal'
])),
len
(
classes
[
'normal'
])),
'bacteria'
:
min
(
num_samples
-
(
len
(
classes
[
'bacteria'
])
+
len
(
img_aug
[
'bacteria'
])),
len
(
classes
[
'bacteria'
])),
'viral'
:
min
(
num_samples
-
(
len
(
classes
[
'viral'
])
+
len
(
img_aug
[
'viral'
])),
len
(
classes
[
'viral'
])),
'COVID-19'
:
min
(
num_samples
-
(
len
(
classes
[
'COVID-19'
])
+
len
(
img_aug
[
'COVID-19'
])),
len
(
classes
[
'COVID-19'
]))}
to_augment
=
0
for
key
in
num_to_augment
.
keys
():
to_augment
+=
num_to_augment
[
key
]
print
(
num_to_augment
)
mapping
=
{
'normal'
:
0
,
'bacteria'
:
1
,
'viral'
:
2
,
'COVID-19'
:
3
}
train_file
=
open
(
"train_augment.txt"
,
"a"
)
for
key
in
classes
.
keys
():
for
imgname
in
classes
[
key
]:
info
=
imgname
+
' '
+
str
(
mapping
[
key
])
+
'
\n
'
train_file
.
write
(
info
)
for
imgname
in
img_aug
[
key
]:
info
=
imgname
+
' '
+
str
(
mapping
[
key
])
+
'
\n
'
train_file
.
write
(
info
)
train_file
.
close
()
test_file
=
open
(
"test.txt"
,
"a"
)
for
key
in
classes_test
.
keys
():
for
imgname
in
classes_test
[
key
]:
info
=
imgname
+
' '
+
str
(
mapping
[
key
])
+
'
\n
'
test_file
.
write
(
info
)
test_file
.
close
()
archive/preprocessing.ipynb
deleted
100644 → 0
View file @
304b4cfa
{
"cells": [
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"import cv2\n",
"import keras\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import os"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [],
"source": [
"# Set parameters here \n",
"INPUT_SIZE = (224, 224)\n",
"mapping = {'normal': 0, 'bacteria': 1, 'viral': 2, 'COVID-19': 3}\n",
"train_filepath = 'train_split.txt'\n",
"test_filepath = 'test_split.txt'"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"# load in the train and test files\n",
"file = open(train_filepath, 'r') \n",
"trainfiles = file.readlines() \n",
"file = open(test_filepath, 'r')\n",
"testfiles = file.readlines()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"5304\n",
"637\n"
]
}
],
"source": [
"print('Total samples for train: ', len(trainfiles))\n",
"print('Total samples for test: ', len(testfiles))"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(224, 224, 3)\n",
"(224, 224, 3)\n"
]
}
],
"source": [
"# load in images\n",
"# resize to input size and normalize to 0 - 1\n",
"x_train = []\n",
"x_test = []\n",
"y_train = []\n",
"y_test = []\n",
"\n",
"for i in range(len(testfiles)):\n",
" test_i = testfiles[i].split()\n",
" imgpath = test_i[1]\n",
" img = cv2.imread(os.path.join('data', 'test', imgpath))\n",
" img = cv2.resize(img, INPUT_SIZE) # resize\n",
" img = img.astype('float32') / 255.0\n",
" x_test.append(img)\n",
" y_test.append(mapping[test_i[2]])\n",
"\n",
"print('Shape of test images: ', x_test[0].shape)\n",
"\n",
"for i in range(len(trainfiles)):\n",
" train_i = trainfiles[i].split()\n",
" imgpath = train_i[1]\n",
" img = cv2.imread(os.path.join('data', 'train', imgpath))\n",
" img = cv2.resize(img, INPUT_SIZE) # resize\n",
" img = img.astype('float32') / 255.0\n",
" x_train.append(img)\n",
" y_train.append(mapping[train_i[2]])\n",
"\n",
"print('Shape of train images: ', x_train[0].shape)"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {},
"outputs": [],
"source": [
"# export to npy to load in for training\n",
"np.save('data/x_train.npy', x_train)\n",
"np.save('data/y_train.npy', y_train)\n",
"np.save('data/x_test.npy', x_test)\n",
"np.save('data/y_test.npy', y_test)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python (tf1.15)",
"language": "python",
"name": "tf1.15"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.10"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
archive/test_split_v1.txt
deleted
100644 → 0
View file @
304b4cfa
This diff is collapsed.
Click to expand it.
archive/train_split_v1.txt
deleted
100644 → 0
View file @
304b4cfa
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment