icpr2020论文精读

1. 论文精读

EfficientNet

2. 代码运行

数据集初始化

1
!python index_dfdc.py --source "/home/jovyan/work/icpr2020/dfdc"
1
2
3
4
5
6
7
8
9
def parse_args(argv):
parser = argparse.ArgumentParser()
# 路径应该写成 50个子数据集的根目录
parser.add_argument('--source', type=Path, help='Source dir', required=True)
parser.add_argument('--videodataset', type=Path, default='data/dfdc_videos.pkl',
help='Path to save the videos DataFrame')
parser.add_argument('--batch', type=int, help='Batch size', default=64)

return parser.parse_args(argv)

提取人脸

1
2
3
4
5
6
!python extract_faces.py \
--source "/home/jovyan/work/icpr2020/dfdc" \
--videodf "/home/jovyan/work/icpr2020/data/dfdc_videos.pkl"\
--facesfolder "/home/jovyan/work/icpr2020/faces/output/directory"\
--facesdf "/home/jovyan/work/icpr2020/faces/df/output/directory"\
--checkpoint "/home/jovyan/work/icpr2020/tmp/outputs"
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
def parse_args(argv):
parser = argparse.ArgumentParser()
# 路径应该写成 50个子数据集的根目录
parser.add_argument('--source', type=Path, help='Videos root directory', required=True)
# dfdc_videos.pkl 文件
parser.add_argument('--videodf', type=Path, help='Path to read the videos DataFrame', required=True)
# directory 路径
parser.add_argument('--facesfolder', type=Path, help='Faces output root directory', required=True)
# directory 路径
parser.add_argument('--facesdf', type=Path, help='Path to save the output DataFrame of faces', required=True)
parser.add_argument('--checkpoint', type=Path, help='Path to save the temporary per-video outputs', required=True)

parser.add_argument('--fpv', type=int, default=32, help='Frames per video')
parser.add_argument('--device', type=torch.device,
default=torch.device('cuda:0' if torch.cuda.is_available() else 'cpu'),
help='Device to use for face extraction')
parser.add_argument('--collateonly', help='Only perform collation of pre-existing results', action='store_true')
parser.add_argument('--noindex', help='Do not rebuild the index', action='store_false')
parser.add_argument('--batch', type=int, help='Batch size', default=16)
parser.add_argument('--threads', type=int, help='Number of threads', default=8)
parser.add_argument('--offset', type=int, help='Offset to start extraction', default=0)
parser.add_argument('--num', type=int, help='Number of videos to process', default=0)
parser.add_argument('--lazycheck', action='store_true', help='Lazy check of existing video indexes')
parser.add_argument('--deepcheck', action='store_true', help='Try to open every image')

return parser.parse_args(argv)

train_binclass

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
!python train_binclass.py \
--net EfficientNetB4 \
# dfdc-35-5-10 意思是 35 train,5 val,10 test。如果想改,看split.py文件
--traindb dfdc-35-5-10 \
--valdb dfdc-35-5-10 \
--dfdc_faces_df_path "/home/jovyan/work/icpr2020/faces/df/output/directory/faces_df.pkl" \
--dfdc_faces_dir "/home/jovyan/work/icpr2020/faces/output/directory" \
--face scale \
--size 224 \
--batch 32 \
--lr 1e-5 \
--valint 500 \
--patience 10 \
--maxiter 30000 \
--seed 41 \
--attention \
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
def main():
# Args
parser = argparse.ArgumentParser()
parser.add_argument('--net', type=str, help='Net model class', required=True)
parser.add_argument('--traindb', type=str, help='Training datasets', nargs='+', choices=split.available_datasets,
required=True)
parser.add_argument('--valdb', type=str, help='Validation datasets', nargs='+', choices=split.available_datasets,
required=True)
parser.add_argument('--dfdc_faces_df_path', type=str, action='store',
help='Path to the Pandas Dataframe obtained from extract_faces.py on the DFDC dataset. '
'Required for training/validating on the DFDC dataset.')
parser.add_argument('--dfdc_faces_dir', type=str, action='store',
help='Path to the directory containing the faces extracted from the DFDC dataset. '
'Required for training/validating on the DFDC dataset.')
parser.add_argument('--ffpp_faces_df_path', type=str, action='store',
help='Path to the Pandas Dataframe obtained from extract_faces.py on the FF++ dataset. '
'Required for training/validating on the FF++ dataset.')
parser.add_argument('--ffpp_faces_dir', type=str, action='store',
help='Path to the directory containing the faces extracted from the FF++ dataset. '
'Required for training/validating on the FF++ dataset.')
parser.add_argument('--face', type=str, help='Face crop or scale', required=True,
choices=['scale', 'tight'])
parser.add_argument('--size', type=int, help='Train patch size', required=True)

parser.add_argument('--batch', type=int, help='Batch size to fit in GPU memory', default=32)
parser.add_argument('--lr', type=float, default=1e-5, help='Learning rate')
parser.add_argument('--valint', type=int, help='Validation interval (iterations)', default=500)
parser.add_argument('--patience', type=int, help='Patience before dropping the LR [validation intervals]',
default=10)
parser.add_argument('--maxiter', type=int, help='Maximum number of iterations', default=20000)
parser.add_argument('--init', type=str, help='Weight initialization file')
parser.add_argument('--scratch', action='store_true', help='Train from scratch')

parser.add_argument('--trainsamples', type=int, help='Limit the number of train samples per epoch', default=-1)
parser.add_argument('--valsamples', type=int, help='Limit the number of validation samples per epoch',
default=6000)

parser.add_argument('--logint', type=int, help='Training log interval (iterations)', default=100)
parser.add_argument('--workers', type=int, help='Num workers for data loaders', default=6)
parser.add_argument('--device', type=int, help='GPU device id', default=0)
parser.add_argument('--seed', type=int, help='Random seed', default=0)

parser.add_argument('--debug', action='store_true', help='Activate debug')
parser.add_argument('--suffix', type=str, help='Suffix to default tag')

parser.add_argument('--attention', action='store_true',
help='Enable Tensorboard log of attention masks')
parser.add_argument('--log_dir', type=str, help='Directory for saving the training logs',
default='runs/binclass/')
parser.add_argument('--models_dir', type=str, help='Directory for saving the models weights',
default='weights/binclass/')

args = parser.parse_args()

3. 代码解读

数据集初始化 index_dfdc.py

metadata.json合并

由于DFDC被拆分成dfdc_tarin_part_00~50,而且每个部分都有一个metadata.json。于是以下代码可以让50个metadata.json合并起来

1
2
3
4
5
6
7
8
9
# Index
df_train_list = list()
for idx, json_path in enumerate(tqdm(sorted(source_dir.rglob('metadata.json')), desc='Indexing')):
df_tmp = pd.read_json(json_path, orient='index')
df_tmp['path'] = df_tmp.index.map(
lambda x: str(json_path.parent.relative_to(source_dir).joinpath(x)))
df_tmp['folder'] = int(str(json_path.parts[-2]).split('_')[-1])
df_train_list.append(df_tmp)
df_videos = pd.concat(df_train_list, axis=0, verify_integrity=True)

人脸提取 extract_faces.py

训练 train_binclass.py

split.by

这部分代码可以更改训练集和测试集的数量

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
available_datasets = [
'dfdc-35-5-10',
'ff-c23-720-140-140',
'ff-c23-720-140-140-5fpv',
'ff-c23-720-140-140-10fpv',
'ff-c23-720-140-140-15fpv',
'ff-c23-720-140-140-20fpv',
'ff-c23-720-140-140-25fpv',
'celebdf', # just for convenience, not used in the original paper
]

def get_split_df(df: pd.DataFrame, dataset: str, split: str) -> pd.DataFrame:
if dataset == 'dfdc-35-5-10':
if split == 'train':
split_df = df[df['folder'].isin(range(35))]
elif split == 'val':
split_df = df[df['folder'].isin(range(35,40))]
elif split == 'test':
split_df = df[df['folder'].isin(range(40,50))]
else:
raise NotImplementedError('Unknown split: {}'.format(split))

4. 指标

-------------已经到底啦!-------------