icpr2020论文精读

发表于 2024-03-27 更新于 2024-03-31 分类于计算机， CV学习
本文字数： 1k 阅读时长 ≈ 4 分钟

1. 论文精读

EfficientNet

2. 代码运行

数据集初始化

1	!python index_dfdc.py --source "/home/jovyan/work/icpr2020/dfdc"

def parse_args(argv):
    parser = argparse.ArgumentParser()
    # 路径应该写成 50个子数据集的根目录
    parser.add_argument('--source', type=Path, help='Source dir', required=True)
    parser.add_argument('--videodataset', type=Path, default='data/dfdc_videos.pkl',
                        help='Path to save the videos DataFrame')
    parser.add_argument('--batch', type=int, help='Batch size', default=64)

    return parser.parse_args(argv)

提取人脸

!python extract_faces.py \
--source "/home/jovyan/work/icpr2020/dfdc" \
--videodf "/home/jovyan/work/icpr2020/data/dfdc_videos.pkl"\
--facesfolder "/home/jovyan/work/icpr2020/faces/output/directory"\
--facesdf "/home/jovyan/work/icpr2020/faces/df/output/directory"\
--checkpoint "/home/jovyan/work/icpr2020/tmp/outputs"

def parse_args(argv):
    parser = argparse.ArgumentParser()
    # 路径应该写成 50个子数据集的根目录
    parser.add_argument('--source', type=Path, help='Videos root directory', required=True)
    # dfdc_videos.pkl 文件
    parser.add_argument('--videodf', type=Path, help='Path to read the videos DataFrame', required=True)
    # directory 路径
    parser.add_argument('--facesfolder', type=Path, help='Faces output root directory', required=True)
    # directory 路径
    parser.add_argument('--facesdf', type=Path, help='Path to save the output DataFrame of faces', required=True)
    parser.add_argument('--checkpoint', type=Path, help='Path to save the temporary per-video outputs', required=True)

    parser.add_argument('--fpv', type=int, default=32, help='Frames per video')
    parser.add_argument('--device', type=torch.device,
                        default=torch.device('cuda:0' if torch.cuda.is_available() else 'cpu'),
                        help='Device to use for face extraction')
    parser.add_argument('--collateonly', help='Only perform collation of pre-existing results', action='store_true')
    parser.add_argument('--noindex', help='Do not rebuild the index', action='store_false')
    parser.add_argument('--batch', type=int, help='Batch size', default=16)
    parser.add_argument('--threads', type=int, help='Number of threads', default=8)
    parser.add_argument('--offset', type=int, help='Offset to start extraction', default=0)
    parser.add_argument('--num', type=int, help='Number of videos to process', default=0)
    parser.add_argument('--lazycheck', action='store_true', help='Lazy check of existing video indexes')
    parser.add_argument('--deepcheck', action='store_true', help='Try to open every image')

    return parser.parse_args(argv)

train_binclass

!python train_binclass.py \
--net EfficientNetB4  \
# dfdc-35-5-10 意思是 35 train，5 val，10 test。如果想改,看split.py文件
--traindb dfdc-35-5-10 \
--valdb dfdc-35-5-10 \
--dfdc_faces_df_path "/home/jovyan/work/icpr2020/faces/df/output/directory/faces_df.pkl" \
--dfdc_faces_dir "/home/jovyan/work/icpr2020/faces/output/directory" \
--face scale \
--size 224 \
--batch 32 \
--lr 1e-5 \
--valint 500 \
--patience 10 \
--maxiter 30000 \
--seed 41 \
--attention \

def main():
    # Args
    parser = argparse.ArgumentParser()
    parser.add_argument('--net', type=str, help='Net model class', required=True)
    parser.add_argument('--traindb', type=str, help='Training datasets', nargs='+', choices=split.available_datasets,
                        required=True)
    parser.add_argument('--valdb', type=str, help='Validation datasets', nargs='+', choices=split.available_datasets,
                        required=True)
    parser.add_argument('--dfdc_faces_df_path', type=str, action='store',
                        help='Path to the Pandas Dataframe obtained from extract_faces.py on the DFDC dataset. '
                             'Required for training/validating on the DFDC dataset.')
    parser.add_argument('--dfdc_faces_dir', type=str, action='store',
                        help='Path to the directory containing the faces extracted from the DFDC dataset. '
                             'Required for training/validating on the DFDC dataset.')
    parser.add_argument('--ffpp_faces_df_path', type=str, action='store',
                        help='Path to the Pandas Dataframe obtained from extract_faces.py on the FF++ dataset. '
                             'Required for training/validating on the FF++ dataset.')
    parser.add_argument('--ffpp_faces_dir', type=str, action='store',
                        help='Path to the directory containing the faces extracted from the FF++ dataset. '
                             'Required for training/validating on the FF++ dataset.')
    parser.add_argument('--face', type=str, help='Face crop or scale', required=True,
                        choices=['scale', 'tight'])
    parser.add_argument('--size', type=int, help='Train patch size', required=True)

    parser.add_argument('--batch', type=int, help='Batch size to fit in GPU memory', default=32)
    parser.add_argument('--lr', type=float, default=1e-5, help='Learning rate')
    parser.add_argument('--valint', type=int, help='Validation interval (iterations)', default=500)
    parser.add_argument('--patience', type=int, help='Patience before dropping the LR [validation intervals]',
                        default=10)
    parser.add_argument('--maxiter', type=int, help='Maximum number of iterations', default=20000)
    parser.add_argument('--init', type=str, help='Weight initialization file')
    parser.add_argument('--scratch', action='store_true', help='Train from scratch')

    parser.add_argument('--trainsamples', type=int, help='Limit the number of train samples per epoch', default=-1)
    parser.add_argument('--valsamples', type=int, help='Limit the number of validation samples per epoch',
                        default=6000)

    parser.add_argument('--logint', type=int, help='Training log interval (iterations)', default=100)
    parser.add_argument('--workers', type=int, help='Num workers for data loaders', default=6)
    parser.add_argument('--device', type=int, help='GPU device id', default=0)
    parser.add_argument('--seed', type=int, help='Random seed', default=0)

    parser.add_argument('--debug', action='store_true', help='Activate debug')
    parser.add_argument('--suffix', type=str, help='Suffix to default tag')

    parser.add_argument('--attention', action='store_true',
                        help='Enable Tensorboard log of attention masks')
    parser.add_argument('--log_dir', type=str, help='Directory for saving the training logs',
                        default='runs/binclass/')
    parser.add_argument('--models_dir', type=str, help='Directory for saving the models weights',
                        default='weights/binclass/')

    args = parser.parse_args()

3. 代码解读

数据集初始化 index_dfdc.py

metadata.json合并

由于DFDC被拆分成dfdc_tarin_part_00~50，而且每个部分都有一个metadata.json。于是以下代码可以让50个metadata.json合并起来

# Index
df_train_list = list()
for idx, json_path in enumerate(tqdm(sorted(source_dir.rglob('metadata.json')), desc='Indexing')):
    df_tmp = pd.read_json(json_path, orient='index')
    df_tmp['path'] = df_tmp.index.map(
    lambda x: str(json_path.parent.relative_to(source_dir).joinpath(x)))
    df_tmp['folder'] = int(str(json_path.parts[-2]).split('_')[-1])
    df_train_list.append(df_tmp)
df_videos = pd.concat(df_train_list, axis=0, verify_integrity=True)

人脸提取 extract_faces.py

训练 train_binclass.py

split.by

这部分代码可以更改训练集和测试集的数量

available_datasets = [
    'dfdc-35-5-10',
    'ff-c23-720-140-140',
    'ff-c23-720-140-140-5fpv',
    'ff-c23-720-140-140-10fpv',
    'ff-c23-720-140-140-15fpv',
    'ff-c23-720-140-140-20fpv',
    'ff-c23-720-140-140-25fpv',
    'celebdf',  # just for convenience, not used in the original paper
]

def get_split_df(df: pd.DataFrame, dataset: str, split: str) -> pd.DataFrame:
    if dataset == 'dfdc-35-5-10':
        if split == 'train':
            split_df = df[df['folder'].isin(range(35))]
        elif split == 'val':
            split_df = df[df['folder'].isin(range(35,40))]
        elif split == 'test':
            split_df = df[df['folder'].isin(range(40,50))]
        else:
            raise NotImplementedError('Unknown split: {}'.format(split))

4. 指标

-------------已经到底啦！-------------