Merge pull request #213 from opendr-eu/merge-master-into-develop

Merge `master` branch into `develop` branch

Merge pull request #213 from opendr-eu/merge-master-into-develop
a3c31c0c · Nikolaos Passalis · GitHub · ddfa8012 · a5c65a9e · a3c31c0c
Unverified Commit a3c31c0c authored 3 years ago by Nikolaos Passalis Committed by GitHub 3 years ago
--- a/.github/workflows/publisher.yml
+++ b/.github/workflows/publisher.yml
+name: Publisher
+
+# Trigger on new github release, a tag with format vX.Y.Z is expected (used to tag the docker)
+on:
+  release:
+    types: [published]
+
+env:
+  OPENDR_VERSION: ${{ github.event.release.tag_name }}
+
+defaults:
+  run:
+    shell: bash
+
+jobs:
+  publish-wheel:
+    runs-on: ubuntu-20.04
+    steps:
+    - uses: actions/checkout@v2
+      with:
+        submodules: true
+    - name: Set up Python 3.8
+      uses: actions/setup-python@v2
+      with:
+        python-version: 3.8
+    - name: Install prerequisites
+      run: |
+        python -m pip install --upgrade pip
+        pip install setuptools wheel twine
+    - name: Build Wheel
+      run: |
+        ./bin/build_wheel.sh
+    - name: Upload Wheel
+      env:
+        TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
+        TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
+      run : |
+        twine upload dist/*
+  publish-docker-cpu:
+    runs-on: ubuntu-20.04
+    steps:
+    - uses: actions/checkout@v2
+      with:
+        submodules: true
+    - name: Build Docker Image
+      run: docker build --tag opendr-toolkit:cpu_$OPENDR_VERSION --file Dockerfile .
+    - name: Login to Docker Hub
+      uses: docker/login-action@v1
+      with:
+        username: ${{ secrets.DOCKERHUB_USERNAME }}
+        password: ${{ secrets.DOCKERHUB_PASSWORD }}
+    - name: Publish Image
+      run: |
+        docker tag opendr-toolkit:cpu_$OPENDR_VERSION opendr/opendr-toolkit:cpu_$OPENDR_VERSION
+        docker push opendr/opendr-toolkit:cpu_$OPENDR_VERSION
+  publish-docker-cuda:
+    runs-on: ubuntu-20.04
+    steps:
+    - uses: actions/checkout@v2
+      with:
+        submodules: true
+    - name: Build Docker Image
+      run: docker build --tag opendr-toolkit:cuda_$OPENDR_VERSION --file Dockerfile-cuda .
+    - name: Login to Docker Hub
+      uses: docker/login-action@v1
+      with:
+        username: ${{ secrets.DOCKERHUB_USERNAME }}
+        password: ${{ secrets.DOCKERHUB_PASSWORD }}
+    - name: Publish Image
+      run: |
+        docker tag opendr-toolkit:cuda_$OPENDR_VERSION opendr/opendr-toolkit:cuda_$OPENDR_VERSION
+        docker push opendr/opendr-toolkit:cuda_$OPENDR_VERSION
--- a/.github/workflows/test_packages.yml
+++ b/.github/workflows/test_packages.yml
@@ -61,7 +61,6 @@ jobs:
        source venv/bin/activate
        wget https://raw.githubusercontent.com/opendr-eu/opendr/master/dependencies/pip_requirements.txt
        cat pip_requirements.txt | xargs -n 1 -L 1 pip install
-        # Test new package
        pip install opendr-toolkit
        python -m unittest discover -s tests/sources/tools/${{ matrix.package }}
  test-docker:
@@ -89,7 +88,7 @@ jobs:
          - control/mobile_manipulation
          - simulation/human_model_generation
          - control/single_demo_grasp
-          #- perception/object_tracking_3d
+          # - perception/object_tracking_3d
    runs-on: ${{ matrix.os }}
    steps:
    - name: Set up Python 3.8

--- a/.github/workflows/tests_suite.yml
+++ b/.github/workflows/tests_suite.yml
@@ -12,7 +12,7 @@ defaults:

 jobs:
  cleanup-runs:
-    if: ${{ contains(github.event.pull_request.labels.*.name, 'test sources') || contains(github.event.pull_request.labels.*.name, 'test tools') || github.event_name == 'schedule' }}
+    if: ${{ contains(github.event.pull_request.labels.*.name, 'test sources') || contains(github.event.pull_request.labels.*.name, 'test tools') || contains(github.event.pull_request.labels.*.name, 'test release') || github.event_name == 'schedule' }}
    runs-on: ubuntu-latest
    steps:
    - uses: rokroskar/workflow-run-cleanup-action@master
@@ -106,4 +106,134 @@ jobs:
            source tests/sources/tools/control/mobile_manipulation/run_ros.sh
            python -m unittest discover -s tests/sources/tools/${{ matrix.package }}
        fi
-
+  build-wheel:
+    needs: cleanup-runs
+    if: ${{ contains(github.event.pull_request.labels.*.name, 'test release') || github.event_name == 'schedule' }}
+    runs-on: ubuntu-20.04
+    steps:
+    - uses: actions/checkout@v2
+      with:
+         submodules: true
+    - name: Set up Python 3.8
+      uses: actions/setup-python@v2
+      with:
+        python-version: 3.8
+    - name: Install prerequisites
+      run: |
+        python -m pip install --upgrade pip
+        pip install setuptools wheel twine
+    - name: Build Wheel
+      run:
+        ./bin/build_wheel.sh
+    - name: Upload wheel as artifact
+      uses: actions/upload-artifact@v2
+      with:
+        path:
+          dist/*.tar.gz
+  build-docker:
+    needs: cleanup-runs
+    if: ${{ contains(github.event.pull_request.labels.*.name, 'test release') || github.event_name == 'schedule' }}
+    runs-on: ubuntu-20.04
+    steps:
+    - uses: actions/checkout@v2
+      with:
+         submodules: true
+    - name: Build image
+      run: |
+        docker build --tag opendr/opendr-toolkit:cpu_test --file Dockerfile .
+        docker save opendr/opendr-toolkit:cpu_test > cpu_test.zip
+    - name: Upload image artifact
+      uses: actions/upload-artifact@v2
+      with:
+        path:
+          cpu_test.zip
+  test-wheel:
+    needs: build-wheel
+    if: ${{ contains(github.event.pull_request.labels.*.name, 'test release') || github.event_name == 'schedule' }}
+    strategy:
+      matrix:
+        os: [ubuntu-20.04]
+        package:
+          - engine
+          - utils
+          - perception/activity_recognition
+          - perception/compressive_learning
+          - perception/face_recognition
+          - perception/heart_anomaly_detection
+          - perception/multimodal_human_centric
+          - perception/object_tracking_2d
+          - perception/pose_estimation
+          - perception/speech_recognition
+          - perception/skeleton_based_action_recognition
+          - perception/semantic_segmentation
+          - perception/object_detection_2d
+          - perception/facial_expression_recognition
+          # - perception/object_detection_3d
+          # - control/mobile_manipulation
+          # - simulation/human_model_generation
+          # - control/single_demo_grasp
+          # - perception/object_tracking_3d
+    runs-on: ubuntu-20.04
+    steps:
+    - uses: actions/checkout@v2
+      with:
+         submodules: true
+    - name: Set up Python 3.8
+      uses: actions/setup-python@v2
+      with:
+        python-version: 3.8
+    - name: Download artifact
+      uses: actions/download-artifact@v2
+      with:
+        path: artifact
+    - name: Get branch name
+      id: branch-name
+      uses: tj-actions/branch-names@v5.1
+    - name: Test Wheel
+      run: |
+        export DISABLE_BCOLZ_AVX2=true
+        sudo apt -y install python3.8-venv libfreetype6-dev git build-essential cmake python3-dev wget libopenblas-dev libsndfile1 libboost-dev python3-dev
+        python3 -m venv venv
+        source venv/bin/activate
+        wget https://raw.githubusercontent.com/opendr-eu/opendr/${{ steps.branch-name.outputs.current_branch }}/dependencies/pip_requirements.txt
+        cat pip_requirements.txt | xargs -n 1 -L 1 pip install
+        pip install ./artifact/artifact/*.tar.gz
+        python -m unittest discover -s tests/sources/tools/${{ matrix.package }}
+  test-docker:
+    needs: build-docker
+    if: ${{ contains(github.event.pull_request.labels.*.name, 'test release') || github.event_name == 'schedule' }}
+    strategy:
+      matrix:
+        os: [ubuntu-20.04]
+        package:
+          - engine
+          - utils
+          - perception/activity_recognition
+          - perception/compressive_learning
+          - perception/face_recognition
+          - perception/heart_anomaly_detection
+          - perception/multimodal_human_centric
+          - perception/object_tracking_2d
+          - perception/pose_estimation
+          - perception/speech_recognition
+          - perception/skeleton_based_action_recognition
+          - perception/semantic_segmentation
+          - perception/object_detection_2d
+          - perception/facial_expression_recognition
+          - perception/object_detection_3d
+          - control/mobile_manipulation
+          - simulation/human_model_generation
+          - control/single_demo_grasp
+          # - perception/object_tracking_3d
+    runs-on: ubuntu-20.04
+    steps:
+    - name: Download artifact
+      uses: actions/download-artifact@v2
+      with:
+        path: artifact
+    - name: Test docker
+      run: |
+        docker load < ./artifact/artifact/cpu_test.zip
+        docker run --name toolkit -i opendr/opendr-toolkit:cpu_test bash
+        docker start toolkit
+        docker exec -i toolkit bash -c "source bin/activate.sh && source tests/sources/tools/control/mobile_manipulation/run_ros.sh && python -m unittest discover -s tests/sources/tools/${{ matrix.package }}"
--- a/docs/reference/index.md
+++ b/docs/reference/index.md
@@ -67,11 +67,14 @@ Neither the copyright holder nor any applicable licensor will be liable for any
        - [single_demo_grasp Module](single-demonstration-grasping.md)        

    - `simulation` Module
+        - [human_model_generation Module](human_model_generation.md)
+    - `data_generation` Module
+        - [synthetic_facial_image_generation Module](synthetic_facial_image_generator.md)
        - [human_model_generation Module](human-model-generation.md)
    - `utils` Module
        - [Hyperparameter Tuning Module](hyperparameter_tuner.md)
- `Stand-alone Utility Frameworks`
-    - [Engine Agnostic Gym Environment with Reactive extension (EAGERx)](eagerx.md)
+    - `Stand-alone Utility Frameworks`
+        - [Engine Agnostic Gym Environment with Reactive extension (EAGERx)](eagerx.md)
 - [ROSBridge Package](rosbridge.md)
 - [C Inference API](c-api.md)
    - [data.h](c-data-h.md)

--- a/docs/reference/synthetic_facial_image_generator.md
+++ b/docs/reference/synthetic_facial_image_generator.md
+## synthetic_facial_image_generator module
+
+The *synthetic_facial_image_generator* module contains the *MultiviewDataGeneration* class, which implements the multi-view facial image rendering operation.
+
+### Class MultiviewDataGeneration
+
+The *MultiviewDataGeneration* class is a wrapper of the Rotate-and-Render [[1]](#R-R-paper) photorealistic multi-view facial image generator based on the original
+[Rotate-and-Render implementation](https://github.com/Hangz-nju-cuhk/Rotate-and-Render).
+It can be used to perform multi-view facial image generation from a single view image on the wild (eval). 
+The [MultiviewDataGeneration](#projects.data_generation.synthetic-multi-view-facial-image-generation.3ddfa.SyntheticDataGeneration.py ) class has the
+following public methods:
+
+#### `MultiviewDataGeneration` constructor
+```python
+MultiviewDataGeneration(self, args)
+```
+
+Constructor main parameters *args* explanation:
+
+- **path_in**: *str, default='./example/Images'* \
+An absolute path (path in) which indicates the folder that contains the set of single view facial image snapshots to be processed by the algorithm.
+- **path_3ddfa**: *str, default='./'* \
+An absolute path (path 3ddfa) which indicates the 3ddfa module folder of the software structure as presented in the repository. This path is necessary in order for the software to create the folders for the intermediate / temporary storage of files generated during the pre-processing such as 3d face models, facial landmarks etc.
+in the folder results of this path.
+- **save_path**: *str, default='./results'* \
+The output images are stored in the folder indicated by save path which is also a class input parameter.
+- **val_yaw**: *str, default='10,20'* \
+Definition of the yaw angles (in the interval [−90°,90°]) for which the rendered images will be produced.
+- **val_pitch**: *str, default=' 30,40'* \
+Definition of the pitch angles (in the interval [−90°,90°]) for which the rendered images will be produced.
+- **device**: *{'cuda', 'cpu'}, default='cpu'* \
+Specifies the device to be used.
+
+
+#### `MultiviewDataGeneration.eval`
+```python
+MultiviewDataGeneration.eval()
+```
+
+This function is implementing the main procedure for the creation of the multi-view facial images, which consists of three different stages.
+Instead of initializing the main parameters of the 3DDFA network in the intializer, the first stage includes detection of the candidate faces in the input images and 3D-head mesh fitting using 3DDFA.
+Moreover, the second stage extracts the facial landmarks in order to derive the head pose and align the images with the 3d head model mesh.
+Finally, the main functionality of the multiview facial image rendering is executed by loading the respective network parameters.
+
+### Usage Example
+
+```python
+python3 tool_synthetic_facial_generation.py -path_in ./demos/imgs_input/ -path_3ddfa ./algorithm/DDFA/ -save_path ./results -val_yaw 10, 40 -val_pitch 10, 30 -device cuda
+```
+The corresponding paths for the input, output folders as well as the pitch and yaw angles for which the user wants to
+produce the facial images can be easily incorporated in the class creation while the method is initialized. 
+The process is executed for the CNN parameters and GPUs specified in the arguments of the aforementioned command.
+Users that wish to modify these parameters shall change the respective input arguments which derived from a parser including the arguments path in, path_3ddfa, save_path, val_yaw, val_pitch etc. 
+
+#### References
+<a name="R-R-paper" href="https://github.com/Hangz-nju-cuhk/Rotate-and-Render">[1]</a>
+Hang Zhou, Jihao Liu, Ziwei Liu, Yu Liu, Xiaogang Wang, Rotate-and-Render: Unsupervised Photorealistic Face Rotation from Single-View Images,
+[arXiv](https://arxiv.org/abs/2003.08124#).  
--- a/projects/data_generation/synthetic_multi_view_facial_image_generation/README.md
+++ b/projects/data_generation/synthetic_multi_view_facial_image_generation/README.md
+# Synthentic Multi-view Facial Image Generation based on Rotate-and-Render: Unsupervised Photorealistic Face Rotation from Single-View Images (CVPR 2020)
+
+Based on: [[Rotate-and-Render: Unsupervised Photorealistic Face Rotation from Single-View Images]](https://arxiv.org/abs/2003.08124)
+
+We utilize, with small modifications in order to be easily executed, publicly available code, namely an un-supervised framework that can synthesize photorealistic rotated facial images using as input  a single facial image, or multiple such images (one per person).
+The implemented method allows for rotating faces in the 3D space back and forth, and then re-rendering them to the 2D plane.
+The generated multi-view facial images can be used for different learning tasks, such as in self-supervised learning tasks.
+
+## Sources:
+* Face Alignment in Full Pose Range: A 3D Total Solution (IEEE TPAMI 2017)
+* Neural 3D Mesh Renderer (CVPR 2018)
+* Rotate-and-Render: Unsupervised Photorealistic Face Rotation from Single-View Images (CVPR 2020)
+
+## Requirements
+* Python 3.6 is used. Basic requirements are listed in the 'requirements.txt'.
+
+```
+pip3 install -r requirements.txt
+```
+* Install the [Neural_Renderer](https://github.com/daniilidis-group/neural_renderer) following the instructions.
+```
+pip install git+https://github.com/cidl-auth/neural_renderer
+```
+
+* Download checkpoint and BFM model from [checkpoint.zip](ftp://opendrdata.csd.auth.gr/data_generation/synthetic_multi-view-facial-generator/ckpt_and_bfm.zip) put it in ```3ddfa``` and unzip it:
+```bash
+wget ftp://opendrdata.csd.auth.gr/data_generation/synthetic_multi-view-facial-generator/checkpoints.zip
+unzip checkpoints.zip
+unzip checkpoints/ckpt_and_bfm.zip -d 3ddfa
+```
+The 3D models are borrowed from [3DDFA](https://github.com/cleardusk/3DDFA).
+
+* Compile cython code and download remaining models:
+```bash
+cd algorithm/DDFA/utils/cython/
+python3 setup.py build_ext -i
+cd ../../../..
+mkdir algorithm/DDFA/models
+mkdir algorithm/DDFA/example
+wget https://github.com/cleardusk/3DDFA/blob/master/models/phase1_wpdc_vdc.pth.tar?raw=true -O algorithm/DDFA/models/phase1_wpdc_vdc.pth.tar
+```
+
+## Usage Example
+
+1.	Execute the one-step OPENDR function ```tool_synthetic_facial_generation.py``` specifying the input images folder, the output folder, the desired degrees (range -90 to 90) for generating the facial images in multiple view angles pitch and yaw as indicated in the command line: 
+```sh
+python3 tool_synthetic_facial_generation.py -path_in ./demos/imgs_input/ -path_3ddfa ./algorithm/DDFA/ -save_path ./results -val_yaw 10, 40 -val_pitch 10, 30 -device cuda
+```
+
+3. The results can be found in ```results/rs_model/example/```, where multi-view facial images are generated for every person in a respective folder.
+
+## License 
+Rotate-and-Render is provided under [CC-BY-4.0](https://github.com/Hangz-nju-cuhk/Rotate-and-Render/blob/master/LICENSE) license.
+SPADE, SyncBN, 3DDFA are under [MIT License](https://github.com/tasostefas/opendr_internal/blob/synthetic-multi-view-facial-generator/projects/data_generation/synthetic-multi-view-facial-image-generation/3ddfa/LICENSE)
+
+## Acknowledgement
+Large parts of the code are taken from: 
+* The structure of this codebase is borrowed from [SPADE](https://github.com/NVlabs/SPADE).
+* The [SyncBN](https://github.com/vacancy/Synchronized-BatchNorm-PyTorch) module is used in the current code.
+* The [3DDFA](https://github.com/cleardusk/3DDFA) implementation for 3D reconstruction.
+* The code [Rotate-and-Render](https://github.com/Hangz-nju-cuhk/Rotate-and-Render/)  
+  
+with the following modifications to make them compatible with the OpenDR specifications:
+## Minor Modifications
+1. All scripts: PEP8 changes
+2. ```3ddfa/preprocessing_1.py, 3ddfa/preprocessing_2.py, test_multipose.py``` Modified to work as a callable functions
+3. ```options/base_options.py, options/test_options.py ``` Commented out/change several parameters to be easily executed 
+4. ```models/networks/render.py``` Minor functional changes
+5. The OPENDR created functions are ```SyntheticDataGeneration.py, tool_synthetic_facial_generation.py```
+6. The rest are taken from the aforementioned repositories
--- a/projects/data_generation/synthetic_multi_view_facial_image_generation/SyntheticDataGeneration.py
+++ b/projects/data_generation/synthetic_multi_view_facial_image_generation/SyntheticDataGeneration.py
+# Copyright 2020-2022 OpenDR European Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# MIT License
+#
+# Copyright (c) 2019 Jian Zhao
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+# !/usr/bin/env python3.7
+# coding: utf-8
+from tqdm import tqdm
+from shutil import copyfile
+import cv2
+import os
+from algorithm.DDFA import preprocessing_1
+from algorithm.DDFA import preprocessing_2
+from algorithm.Rotate_and_Render import test_multipose
+
+
+class MultiviewDataGeneration():
+
+    def __init__(self, args):
+
+        self.path_in = args.path_in
+        self.key = str(args.path_3ddfa + "/example/Images/")
+        self.key1 = str(args.path_3ddfa + "/example/")
+        self.key2 = str(args.path_3ddfa + "/results/")
+        self.save_path = args.save_path
+        self.val_yaw = args.val_yaw
+        self.val_pitch = args.val_pitch
+        self.args = args
+
+    def eval(self):
+
+        # STAGE No1 : detect faces and fitting to 3d mesh by main.py execution
+        list_im = []
+
+        print("START")
+
+        a = open("file_list.txt", "w")
+        for subdir, dirs, files in os.walk(self.path_in):
+            current_directory_path = os.path.abspath(subdir)
+            for file in files:
+                name, ext = os.path.splitext(file)
+                if ext == ".jpg":
+                    current_image_path = os.path.join(current_directory_path, file)
+                    current_image = cv2.imread(current_image_path)
+                    list_im.append(current_image_path)
+                    a.write(str(file) + os.linesep)
+                    cv2.imwrite(os.path.join(self.key, file), current_image)
+            self.args.files = list_im.copy()
+            list_im.clear()
+            preprocessing_1.main(self.args)
+        a.close()
+
+        # STAGE No2: Landmarks Output with inference.py execution
+
+        im_list2 = []
+        d = open(os.path.join(self.key1, 'realign_lmk'), "w")
+        for subdir, dirs, files in os.walk(self.path_in):
+            current_directory_path = os.path.abspath(subdir)
+            self.args.img_prefix = current_directory_path
+            self.args.save_dir = os.path.abspath(self.key2)
+            self.args.save_lmk_dir = os.path.abspath(self.key1)
+            if not os.path.exists(self.args.save_dir):
+                os.mkdir(self.args.save_dir)
+            if not os.path.exists(self.args.save_lmk_dir):
+                os.mkdir(self.args.save_lmk_dir)
+
+            list_lfw_batch = './file_list.txt'
+            dst = os.path.join(self.args.save_lmk_dir, "file_list.txt")
+            copyfile(list_lfw_batch, dst)
+            b = open("txt_name_batch.txt", "w")
+            for file in files:
+
+                with open(list_lfw_batch) as f:
+                    img_list = [x.strip() for x in f.readlines()]
+
+                    for img_idx, img_fp in enumerate(tqdm(img_list)):
+                        if img_fp == str(file):
+                            im_list2.append(str(file))
+                            b.write(str(file) + os.linesep)
+            self.args.img_list = './txt_name_batch.txt'
+            b.close()
+            self.args.dump_lmk = 'true'
+            im_list2.clear()
+            preprocessing_2.main(self.args)
+            with open(os.path.join(self.args.save_lmk_dir, 'realign_lmk_')) as f:
+                img_list = [x.strip() for x in f.readlines()]
+                for img_idx, img_fp in enumerate(tqdm(img_list)):
+                    d.write(img_fp + os.linesep)
+        d.close()
+
+        # STAGE No3: Generate Facial Images in specific pitch and yaw angles
+        test_multipose.main(self.save_path, self.val_yaw, self.val_pitch)
+
+    def fit(self):
+        raise NotImplementedError()
+
+    def infer(self):
+        raise NotImplementedError()
+
+    def load(self):
+        raise NotImplementedError()
+
+    def optimize(self):
+        raise NotImplementedError()
+
+    def reset(self):
+        raise NotImplementedError()
+
+    def save(self):
+        raise NotImplementedError()
--- a/projects/data_generation/synthetic_multi_view_facial_image_generation/algorithm/DDFA/BFM_Remove_Neck/bfm_show.m
+++ b/projects/data_generation/synthetic_multi_view_facial_image_generation/algorithm/DDFA/BFM_Remove_Neck/bfm_show.m
+model = load('model_refine.mat');
+model = model.model_refine;
+
+mu = model.mu_shape + model.mu_exp;
+mu = reshape(mu, 3, length(mu) / 3);
+tri = model.tri;
+keypoints = model.keypoints;
+pts68_3d = mu(:, keypoints);
+
+render_face_mesh(mu, tri, pts68_3d);
+
+A = getframe(gcf);
+mimg = A.cdata;
+imwrite(mimg, 'imgs/bfm_noneck.jpg', 'quality', 95);
--- a/projects/data_generation/synthetic_multi_view_facial_image_generation/algorithm/DDFA/BFM_Remove_Neck/imgs/bfm_noneck.jpg
+++ b/projects/data_generation/synthetic_multi_view_facial_image_generation/algorithm/DDFA/BFM_Remove_Neck/imgs/bfm_noneck.jpg
--- a/projects/data_generation/synthetic_multi_view_facial_image_generation/algorithm/DDFA/BFM_Remove_Neck/imgs/bfm_refine.jpg
+++ b/projects/data_generation/synthetic_multi_view_facial_image_generation/algorithm/DDFA/BFM_Remove_Neck/imgs/bfm_refine.jpg
--- a/projects/data_generation/synthetic_multi_view_facial_image_generation/algorithm/DDFA/BFM_Remove_Neck/readme.md
+++ b/projects/data_generation/synthetic_multi_view_facial_image_generation/algorithm/DDFA/BFM_Remove_Neck/readme.md
+The original version with neck:
+<p align="center">
+  <img src="imgs/bfm_noneck.jpg" alt="neck" width="400px">
+</p>
+
+[bfm.ply](https://github.com/Hangz-nju-cuhk/Rotate-and-Render/blob/master/3ddfa/BFM_Remove_Neck/bfm.ply)
+
+The image is rendered by MeshLab.
+
+`bfm_show.m` shows how to render it with 68 keypoints in Matlab.
+
+<p align="center">
+  <img src="imgs/bfm_refine.jpg" alt="no neck">
+</p>
+
+Attention: Do not use the `ply` file in training.
--- a/projects/data_generation/synthetic_multi_view_facial_image_generation/algorithm/DDFA/BFM_Remove_Neck/render_face_mesh.m
+++ b/projects/data_generation/synthetic_multi_view_facial_image_generation/algorithm/DDFA/BFM_Remove_Neck/render_face_mesh.m
+function render_face_mesh(vertex, tri, pts68_3d)
+    trisurf(tri', vertex(1, :), vertex(2, :), vertex(3, :), ones(size(vertex, 2),1), 'edgecolor', 'none');
+    
+    re=[1 1 1];
+    colormap(re);
+
+    light('Position', [0 0 1], 'Style', 'infinite');
+    lighting gouraud
+    axis equal
+    view([0 90]);
+    
+    if nargin == 3
+        hold on; plot3(pts68_3d(1,:), pts68_3d(2,:), pts68_3d(3,:)+1, '*');
+    end
+    
+    xlabel('x');
+    ylabel('y');
+    zlabel('z');
+    
+    axis on;
+    grid on;
+end
\ No newline at end of file
--- a/projects/data_generation/synthetic_multi_view_facial_image_generation/algorithm/DDFA/LICENSE
+++ b/projects/data_generation/synthetic_multi_view_facial_image_generation/algorithm/DDFA/LICENSE
+MIT License
+
+Copyright (c) 2018 Jianzhu Guo
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/projects/data_generation/synthetic_multi_view_facial_image_generation/algorithm/DDFA/__init__.py
+++ b/projects/data_generation/synthetic_multi_view_facial_image_generation/algorithm/DDFA/__init__.py
--- a/projects/data_generation/synthetic_multi_view_facial_image_generation/algorithm/DDFA/example/Images/.keep
+++ b/projects/data_generation/synthetic_multi_view_facial_image_generation/algorithm/DDFA/example/Images/.keep
--- a/projects/data_generation/synthetic_multi_view_facial_image_generation/algorithm/DDFA/mobilenet_v1.py
+++ b/projects/data_generation/synthetic_multi_view_facial_image_generation/algorithm/DDFA/mobilenet_v1.py
+#!/usr/bin/env python3
+# coding: utf-8
+
+from __future__ import division
+
+"""
+Creates a MobileNet Model as defined in:
+Andrew G. Howard Menglong Zhu Bo Chen, et.al. (2017).
+MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications.
+Copyright (c) Yang Lu, 2017
+
+Modified By cleardusk
+"""
+import math
+import torch.nn as nn
+
+__all__ = ['mobilenet_2', 'mobilenet_1', 'mobilenet_075', 'mobilenet_05', 'mobilenet_025']
+
+
+class DepthWiseBlock(nn.Module):
+    def __init__(self, inplanes, planes, stride=1, prelu=False):
+        super(DepthWiseBlock, self).__init__()
+        inplanes, planes = int(inplanes), int(planes)
+        self.conv_dw = nn.Conv2d(inplanes, inplanes, kernel_size=3, padding=1, stride=stride, groups=inplanes,
+                                 bias=False)
+        self.bn_dw = nn.BatchNorm2d(inplanes)
+        self.conv_sep = nn.Conv2d(inplanes, planes, kernel_size=1, stride=1, padding=0, bias=False)
+        self.bn_sep = nn.BatchNorm2d(planes)
+        if prelu:
+            self.relu = nn.PReLU()
+        else:
+            self.relu = nn.ReLU(inplace=True)
+
+    def forward(self, x):
+        out = self.conv_dw(x)
+        out = self.bn_dw(out)
+        out = self.relu(out)
+
+        out = self.conv_sep(out)
+        out = self.bn_sep(out)
+        out = self.relu(out)
+
+        return out
+
+
+class MobileNet(nn.Module):
+    def __init__(self, widen_factor=1.0, num_classes=1000, prelu=False, input_channel=3):
+        """ Constructor
+        Args:
+            widen_factor: config of widen_factor
+            num_classes: number of classes
+        """
+        super(MobileNet, self).__init__()
+
+        block = DepthWiseBlock
+        self.conv1 = nn.Conv2d(input_channel, int(32 * widen_factor), kernel_size=3, stride=2, padding=1,
+                               bias=False)
+
+        self.bn1 = nn.BatchNorm2d(int(32 * widen_factor))
+        if prelu:
+            self.relu = nn.PReLU()
+        else:
+            self.relu = nn.ReLU(inplace=True)
+
+        self.dw2_1 = block(32 * widen_factor, 64 * widen_factor, prelu=prelu)
+        self.dw2_2 = block(64 * widen_factor, 128 * widen_factor, stride=2, prelu=prelu)
+
+        self.dw3_1 = block(128 * widen_factor, 128 * widen_factor, prelu=prelu)
+        self.dw3_2 = block(128 * widen_factor, 256 * widen_factor, stride=2, prelu=prelu)
+
+        self.dw4_1 = block(256 * widen_factor, 256 * widen_factor, prelu=prelu)
+        self.dw4_2 = block(256 * widen_factor, 512 * widen_factor, stride=2, prelu=prelu)
+
+        self.dw5_1 = block(512 * widen_factor, 512 * widen_factor, prelu=prelu)
+        self.dw5_2 = block(512 * widen_factor, 512 * widen_factor, prelu=prelu)
+        self.dw5_3 = block(512 * widen_factor, 512 * widen_factor, prelu=prelu)
+        self.dw5_4 = block(512 * widen_factor, 512 * widen_factor, prelu=prelu)
+        self.dw5_5 = block(512 * widen_factor, 512 * widen_factor, prelu=prelu)
+        self.dw5_6 = block(512 * widen_factor, 1024 * widen_factor, stride=2, prelu=prelu)
+
+        self.dw6 = block(1024 * widen_factor, 1024 * widen_factor, prelu=prelu)
+
+        self.avgpool = nn.AdaptiveAvgPool2d(1)
+        self.fc = nn.Linear(int(1024 * widen_factor), num_classes)
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+
+        x = self.dw2_1(x)
+        x = self.dw2_2(x)
+        x = self.dw3_1(x)
+        x = self.dw3_2(x)
+        x = self.dw4_1(x)
+        x = self.dw4_2(x)
+        x = self.dw5_1(x)
+        x = self.dw5_2(x)
+        x = self.dw5_3(x)
+        x = self.dw5_4(x)
+        x = self.dw5_5(x)
+        x = self.dw5_6(x)
+        x = self.dw6(x)
+
+        x = self.avgpool(x)
+        x = x.view(x.size(0), -1)
+        x = self.fc(x)
+
+        return x
+
+
+def mobilenet(widen_factor=1.0, num_classes=1000):
+    """
+    Construct MobileNet.
+    widen_factor=1.0  for mobilenet_1
+    widen_factor=0.75 for mobilenet_075
+    widen_factor=0.5  for mobilenet_05
+    widen_factor=0.25 for mobilenet_025
+    """
+    model = MobileNet(widen_factor=widen_factor, num_classes=num_classes)
+    return model
+
+
+def mobilenet_2(num_classes=62, input_channel=3):
+    model = MobileNet(widen_factor=2.0, num_classes=num_classes, input_channel=input_channel)
+    return model
+
+
+def mobilenet_1(num_classes=62, input_channel=3):
+    model = MobileNet(widen_factor=1.0, num_classes=num_classes, input_channel=input_channel)
+    return model
+
+
+def mobilenet_075(num_classes=62, input_channel=3):
+    model = MobileNet(widen_factor=0.75, num_classes=num_classes, input_channel=input_channel)
+    return model
+
+
+def mobilenet_05(num_classes=62, input_channel=3):
+    model = MobileNet(widen_factor=0.5, num_classes=num_classes, input_channel=input_channel)
+    return model
+
+
+def mobilenet_025(num_classes=62, input_channel=3):
+    model = MobileNet(widen_factor=0.25, num_classes=num_classes, input_channel=input_channel)
+    return model
--- a/projects/data_generation/synthetic_multi_view_facial_image_generation/algorithm/DDFA/preprocessing_1.py
+++ b/projects/data_generation/synthetic_multi_view_facial_image_generation/algorithm/DDFA/preprocessing_1.py
+#!/usr/bin/env python3
+# coding: utf-8
+
+"""
+The pipeline of 3DDFA prediction: given one image, predict the 3d face vertices, 68 landmarks and visualization.
+
+[todo]
+1. CPU optimization: https://pmchojnacki.wordpress.com/2018/10/07/slow-pytorch-cpu-performance
+"""
+
+import torch
+import torchvision.transforms as transforms
+from . import mobilenet_v1
+import numpy as np
+import cv2
+from os import path
+import face_alignment
+from .utils.ddfa import ToTensorGjz, NormalizeGjz
+import scipy.io as sio
+from .utils.inference import get_suffix, parse_roi_box_from_landmark, crop_img, predict_68pts, dump_to_ply, \
+    dump_vertex, draw_landmarks, predict_dense, parse_roi_box_from_bbox, get_colors, write_obj_with_colors
+from .utils.cv_plot import plot_pose_box
+from .utils.estimate_pose import parse_pose
+from .utils.render import cget_depths_image, cpncc
+from .utils.paf import gen_img_paf
+import torch.backends.cudnn as cudnn
+import sys
+
+__author__ = 'cleardusk'
+STD_SIZE = 120
+
+
+def main(args):
+    # 1. load pre-tained model
+    checkpoint_fp = 'algorithm/DDFA/models/phase1_wpdc_vdc.pth.tar'
+    arch = 'mobilenet_1'
+
+    checkpoint = torch.load(checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict']
+    model = getattr(mobilenet_v1, arch)(num_classes=62)  # 62 = 12(pose) + 40(shape) +10(expression)
+
+    model_dict = model.state_dict()
+    # because the model is trained by multiple gpus, prefix module should be removed
+    for k in checkpoint.keys():
+        model_dict[k.replace('module.', '')] = checkpoint[k]
+    model.load_state_dict(model_dict)
+    if args.mode == 'gpu':
+        cudnn.benchmark = True
+        model = model.cuda()
+    model.eval()
+
+    '''
+    # 2. load dlib model for face detection and landmark used for face cropping
+    if args.dlib_landmark:
+        dlib_landmark_model = 'models/shape_predictor_68_face_landmarks.dat'
+        face_regressor = dlib.shape_predictor(dlib_landmark_model)
+    if args.dlib_bbox:
+        face_detector = dlib.get_frontal_face_detector()
+    '''
+    face_regressor = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=False)
+    # face_detector = face_regressor.face_detector
+
+    # 3. forward
+    tri = sio.loadmat('algorithm/DDFA/visualize/tri.mat')['tri']
+    transform = transforms.Compose([ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)])
+    # print(args.files)
+    for img_fp in args.files:
+        print(img_fp)
+        suffix = get_suffix(img_fp)
+        wfp = '{}_{}.obj'.format(img_fp.replace(suffix, ''), 0)
+        if not path.exists(wfp):
+            img_ori = cv2.imread(img_fp)
+            if img_ori is None:
+                print("Can't load image, please check the path", file=sys.stderr)
+                sys.exit(1)
+
+            try:
+                rect
+            except NameError:
+                rect = None
+            '''
+            if args.dlib_bbox:
+                rects = face_detector(img_ori, 1)
+            else:
+                rects = []
+
+            if len(rects) == 0:
+                rects = dlib.rectangles()
+                rect_fp = img_fp + '.bbox'
+                lines = open(rect_fp).read().strip().split('\n')[1:]
+                for l in lines:
+                    l, r, t, b = [int(_) for _ in l.split(' ')[1:]]
+                    rect = dlib.rectangle(l, r, t, b)
+                    rects.append(rect)
+            '''
+            img_rgb = img_ori[:, :, ::-1]
+            ptss = face_regressor.get_landmarks(img_rgb)
+            pts_res = []
+            Ps = []  # Camera matrix collection
+            poses = []  # pose collection, [todo: validate it]
+            vertices_lst = []  # store multiple face vertices
+            ind = 0
+
+            for pts in ptss:
+                # whether use dlib landmark to crop image, if not, use only face bbox to calc roi bbox for cropping
+                if args.dlib_landmark:
+                    # - use landmark for cropping
+                    # pts = face_regressor(img_ori, rect).parts()
+                    # pts = np.array([[pt.x, pt.y] for pt in pts]).T
+                    roi_box = parse_roi_box_from_landmark(pts.T)
+                else:
+                    # - use detected face bbox
+                    bbox = [rect.left(), rect.top(), rect.right(), rect.bottom()]
+                    roi_box = parse_roi_box_from_bbox(bbox)
+
+                img = crop_img(img_ori, roi_box)
+
+                # forward: one step
+                img = cv2.resize(img, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR)
+                input = transform(img).unsqueeze(0)
+                with torch.no_grad():
+                    if args.mode == 'gpu':
+                        input = input.cuda()
+                    param = model(input)
+                    param = param.squeeze().cpu().numpy().flatten().astype(np.float32)
+
+                # 68 pts
+                pts68 = predict_68pts(param, roi_box)
+
+                # two-step for more accurate bbox to crop face
+                if args.bbox_init == 'two':
+                    roi_box = parse_roi_box_from_landmark(pts68)
+                    img_step2 = crop_img(img_ori, roi_box)
+                    img_step2 = cv2.resize(img_step2, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR)
+                    input = transform(img_step2).unsqueeze(0)
+                    with torch.no_grad():
+                        if args.mode == 'gpu':
+                            input = input.cuda()
+                        param = model(input)
+                        param = param.squeeze().cpu().numpy().flatten().astype(np.float32)
+
+                    pts68 = predict_68pts(param, roi_box)
+
+                pts_res.append(pts68)
+                P, pose = parse_pose(param)
+                Ps.append(P)
+                poses.append(pose)
+
+                # dense face 3d vertices
+                if args.dump_ply or args.dump_vertex or args.dump_depth or args.dump_pncc or args.dump_obj:
+                    vertices = predict_dense(param, roi_box)
+                    vertices_lst.append(vertices)
+                if args.dump_ply:
+                    dump_to_ply(vertices, tri, '{}_{}.ply'.format(img_fp.replace(suffix, ''), ind))
+                if args.dump_vertex:
+                    dump_vertex(vertices, '{}_{}.mat'.format(img_fp.replace(suffix, ''), ind))
+                if args.dump_pts:
+                    wfp = '{}_{}.txt'.format(img_fp.replace(suffix, ''), ind)
+                    np.savetxt(wfp, pts68, fmt='%.3f')
+                    print('Save 68 3d landmarks to {}'.format(wfp))
+                if args.dump_roi_box:
+                    wfp = '{}_{}.roibox'.format(img_fp.replace(suffix, ''), ind)
+                    np.savetxt(wfp, roi_box, fmt='%.3f')
+                    print('Save roi box to {}'.format(wfp))
+                if args.dump_paf:
+                    wfp_paf = '{}_{}_paf.jpg'.format(img_fp.replace(suffix, ''), ind)
+                    wfp_crop = '{}_{}_crop.jpg'.format(img_fp.replace(suffix, ''), ind)
+                    paf_feature = gen_img_paf(img_crop=img, param=param, kernel_size=args.paf_size)
+
+                    cv2.imwrite(wfp_paf, paf_feature)
+                    cv2.imwrite(wfp_crop, img)
+                    print('Dump to {} and {}'.format(wfp_crop, wfp_paf))
+                if args.dump_obj:
+                    wfp = '{}_{}.obj'.format(img_fp.replace(suffix, ''), ind)
+                    colors = get_colors(img_ori, vertices)
+                    write_obj_with_colors(wfp, vertices, tri, colors)
+                    print('Dump obj with sampled texture to {}'.format(wfp))
+                ind += 1
+
+            if args.dump_pose:
+                # P, pose = parse_pose(param)  # Camera matrix (without scale), and pose (yaw, pitch, roll, to verify)
+                img_pose = plot_pose_box(img_ori, Ps, pts_res)
+                wfp = img_fp.replace(suffix, '_pose.jpg')
+                cv2.imwrite(wfp, img_pose)
+                print('Dump to {}'.format(wfp))
+            if args.dump_depth:
+                wfp = img_fp.replace(suffix, '_depth.png')
+                # depths_img = get_depths_image(img_ori, vertices_lst, tri-1)  # python version
+                depths_img = cget_depths_image(img_ori, vertices_lst, tri - 1)  # cython version
+                cv2.imwrite(wfp, depths_img)
+                print('Dump to {}'.format(wfp))
+            if args.dump_pncc:
+                wfp = img_fp.replace(suffix, '_pncc.png')
+                pncc_feature = cpncc(img_ori, vertices_lst, tri - 1)  # cython version
+                cv2.imwrite(wfp, pncc_feature[:, :, ::-1])  # cv2.imwrite will swap RGB -> BGR
+                print('Dump to {}'.format(wfp))
+            if args.dump_res:
+                draw_landmarks(img_ori, pts_res, wfp=img_fp.replace(suffix, '_3DDFA.jpg'), show_flg=args.show_flg)
+        else:
+            print("Main_Done")
+
+
+'''
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='3DDFA inference pipeline')
+    parser.add_argument('-f', '--files', nargs='+',
+                        help='image files paths fed into network, single or multiple images')
+    parser.add_argument('-m', '--mode', default='cpu', type=str, help='gpu or cpu mode')
+    parser.add_argument('--show_flg', default='true', type=str2bool, help='whether show the visualization result')
+    parser.add_argument('--bbox_init', default='one', type=str,
+                        help='one|two: one-step bbox initialization or two-step')
+    parser.add_argument('--dump_res', default='true', type=str2bool, help='whether write out the visualization image')
+    parser.add_argument('--dump_vertex', default='false', type=str2bool,
+                        help='whether write out the dense face vertices to mat')
+    parser.add_argument('--dump_ply', default='true', type=str2bool)
+    parser.add_argument('--dump_pts', default='true', type=str2bool)
+    parser.add_argument('--dump_roi_box', default='false', type=str2bool)
+    parser.add_argument('--dump_pose', default='true', type=str2bool)
+    parser.add_argument('--dump_depth', default='true', type=str2bool)
+    parser.add_argument('--dump_pncc', default='true', type=str2bool)
+    parser.add_argument('--dump_paf', default='true', type=str2bool)
+    parser.add_argument('--paf_size', default=3, type=int, help='PAF feature kernel size')
+    parser.add_argument('--dump_obj', default='true', type=str2bool)
+    parser.add_argument('--dlib_bbox', default='true', type=str2bool, help='whether use dlib to predict bbox')
+    parser.add_argument('--dlib_landmark', default='true', type=str2bool,
+                        help='whether use dlib landmark to crop image')
+
+    args = parser.parse_args()
+    main(args)
+'''
--- a/projects/data_generation/synthetic_multi_view_facial_image_generation/algorithm/DDFA/preprocessing_2.py
+++ b/projects/data_generation/synthetic_multi_view_facial_image_generation/algorithm/DDFA/preprocessing_2.py
+#!/usr/bin/env python3
+# coding: utf-8
+
+"""
+The pipeline of 3DDFA prediction: given one image, predict the 3d face vertices, 68 landmarks and visualization.
+
+[todo]
+1. CPU optimization: https://pmchojnacki.wordpress.com/2018/10/07/slow-pytorch-cpu-performance
+"""
+
+import torch
+import torchvision.transforms as transforms
+from . import mobilenet_v1
+import numpy as np
+import cv2
+import os
+from tqdm import tqdm
+import face_alignment
+from .utils.ddfa import ToTensorGjz, NormalizeGjz
+import scipy.io as sio
+from .utils.inference import parse_roi_box_from_landmark, crop_img, predict_68pts, predict_dense, get_colors, \
+    get_5lmk_from_68lmk
+from .utils.estimate_pose import parse_pose
+from .utils.params import param_mean, param_std
+from .utils.render import crender_colors
+import torch.backends.cudnn as cudnn
+__author__ = 'cleardusk'
+STD_SIZE = 120
+
+
+def main(args):
+    # 1. load pre-tained model
+    checkpoint_fp = 'algorithm/DDFA/models/phase1_wpdc_vdc.pth.tar'
+    arch = 'mobilenet_1'
+
+    checkpoint = torch.load(checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict']
+    model = getattr(mobilenet_v1, arch)(num_classes=62)  # 62 = 12(pose) + 40(shape) +10(expression)
+
+    model_dict = model.state_dict()
+    # because the model is trained by multiple gpus, prefix module should be removed
+    for k in checkpoint.keys():
+        model_dict[k.replace('module.', '')] = checkpoint[k]
+    model.load_state_dict(model_dict)
+    if args.mode == 'gpu':
+        cudnn.benchmark = True
+        model = model.cuda()
+    model.eval()
+
+    tri = sio.loadmat('algorithm/DDFA/visualize/tri.mat')['tri']
+    transform = transforms.Compose([ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)])
+
+    # 2. parse images list
+    with open(args.img_list) as f:
+        img_list = [x.strip() for x in f.readlines()]
+    landmark_list = []
+
+    alignment_model = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=False)
+
+    if not os.path.exists(args.save_dir):
+        os.mkdir(args.save_dir)
+    if not os.path.exists(args.save_lmk_dir):
+        os.mkdir(args.save_lmk_dir)
+
+    for img_idx, img_fp in enumerate(tqdm(img_list)):
+        img_ori = cv2.imread(os.path.join(args.img_prefix, img_fp))
+        print("Image", img_fp)
+        pts_res = []
+        Ps = []  # Camera matrix collection
+        poses = []  # pose collection, [todo: validate it]
+        # vertices_lst = []  # store multiple face vertices
+        # ind = 0
+        # suffix = get_suffix(img_fp)
+
+        # face alignment model use RGB as input, result is a tuple with landmarks and boxes
+        preds = alignment_model.get_landmarks(img_ori[:, :, ::-1])
+        pts_2d_68 = preds[0]
+        pts_2d_5 = get_5lmk_from_68lmk(pts_2d_68)
+        landmark_list.append(pts_2d_5)
+        roi_box = parse_roi_box_from_landmark(pts_2d_68.T)
+
+        img = crop_img(img_ori, roi_box)
+        # import pdb; pdb.set_trace()
+
+        # forward: one step
+        img = cv2.resize(img, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR)
+        input = transform(img).unsqueeze(0)
+        with torch.no_grad():
+            if args.mode == 'gpu':
+                input = input.cuda()
+            param = model(input)
+            param = param.squeeze().cpu().numpy().flatten().astype(np.float32)
+
+        # 68 pts
+        pts68 = predict_68pts(param, roi_box)
+
+        # two-step for more accurate bbox to crop face
+        if args.bbox_init == 'two':
+            roi_box = parse_roi_box_from_landmark(pts68)
+            img_step2 = crop_img(img_ori, roi_box)
+            img_step2 = cv2.resize(img_step2, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR)
+            input = transform(img_step2).unsqueeze(0)
+            with torch.no_grad():
+                if args.mode == 'gpu':
+                    input = input.cuda()
+                param = model(input)
+                param = param.squeeze().cpu().numpy().flatten().astype(np.float32)
+
+            pts68 = predict_68pts(param, roi_box)
+
+        pts_res.append(pts68)
+        P, pose = parse_pose(param)
+        Ps.append(P)
+        poses.append(pose)
+
+        # dense face 3d vertices
+        vertices = predict_dense(param, roi_box)
+
+        if args.dump_2d_img:
+            wfp_2d_img = os.path.join(args.save_dir, os.path.basename(img_fp))
+            colors = get_colors(img_ori, vertices)
+            # aligned_param = get_aligned_param(param)
+            # vertices_aligned = predict_dense(aligned_param, roi_box)
+            # h, w, c = 120, 120, 3
+            h, w, c = img_ori.shape
+            img_2d = crender_colors(vertices.T, (tri - 1).T, colors[:, ::-1], h, w)
+            cv2.imwrite(wfp_2d_img, img_2d[:, :, ::-1])
+        if args.dump_param:
+            split = img_fp.split('/')
+            save_name = os.path.join(args.save_dir, '{}.txt'.format(os.path.splitext(split[-1])[0]))
+            this_param = param * param_std + param_mean
+            this_param = np.concatenate((this_param, roi_box))
+            this_param.tofile(save_name, sep=' ')
+    if args.dump_lmk:
+        save_path = os.path.join(args.save_lmk_dir, 'realign_lmk_')
+        with open(save_path, 'w') as f:
+            for idx, (fname, land) in enumerate(zip(img_list, landmark_list)):
+                # f.write('{} {} {} {}')
+                land = land.astype(np.int)
+                land_str = ' '.join([str(x) for x in land])
+                msg = f'{fname} {idx} {land_str}\n'
+                f.write(msg)
+
+
+if __name__ == '__main__':
+    '''
+    parser = argparse.ArgumentParser(description='3DDFA inference pipeline')
+    parser.add_argument('-m', '--mode', default='gpu', type=str, help='gpu or cpu mode')
+    parser.add_argument('--bbox_init', default='two', type=str,
+                        help='one|two: one-step bbox initialization or two-step')
+    parser.add_argument('--dump_2d_img', default='true', type=str2bool, help='whether to save 3d rendered image')
+    parser.add_argument('--dump_param', default='true', type=str2bool, help='whether to save param')
+    parser.add_argument('--dump_lmk', default='true', type=str2bool, help='whether to save landmarks')
+    parser.add_argument('--save_dir', default='results', type=str, help='dir to save result')
+    parser.add_argument('--save_lmk_dir', default='example', type=str, help='dir to save landmark result')
+    parser.add_argument('--img_list', default='example/file_list.txt', type=str, help='test image list file')
+    parser.add_argument('--img_prefix', default='example/Images/', type=str, help='test image prefix')
+    parser.add_argument('--rank', default=0, type=int, help='used when parallel run')
+    parser.add_argument('--world_size', default=1, type=int, help='used when parallel run')
+    parser.add_argument('--resume_idx', default=0, type=int)
+
+    args = parser.parse_args()
+    '''
+    # main(args)
--- a/projects/data_generation/synthetic_multi_view_facial_image_generation/algorithm/DDFA/simple_dataset.py
+++ b/projects/data_generation/synthetic_multi_view_facial_image_generation/algorithm/DDFA/simple_dataset.py
+from torch.utils.data import Dataset
+import numpy as np
+import cv2
+from utils.inference import crop_img, parse_roi_box_from_landmark
+
+
+def cv2_loader(img_str):
+    img_array = np.frombuffer(img_str, dtype=np.uint8)
+    return cv2.imdecode(img_array, cv2.IMREAD_COLOR)
+
+
+class McDataset(Dataset):
+    def __init__(self, img_list, landmarks, std_size=120, transform=None):
+        self.img_list = img_list
+        self.landmarks = landmarks
+        self.transform = transform
+        self.std_size = std_size
+        assert len(self.img_list) == len(self.landmarks)
+        self.num = len(self.img_list)
+
+        self.initialized = False
+
+    def __len__(self):
+        return self.num
+
+    def __getitem__(self, idx):
+        filename = self.img_list[idx]
+        ori_img = cv2.imread(filename)
+
+        landmark = self.landmarks[idx]
+
+        # preprocess img
+        roi_box = parse_roi_box_from_landmark(landmark.T)
+        img = crop_img(ori_img, roi_box)
+        img = cv2.resize(img, dsize=(self.std_size, self.std_size), interpolation=cv2.INTER_LINEAR)
+        if self.transform is not None:
+            img = self.transform(img)
+
+        return img, ori_img, filename, np.array(roi_box)
--- a/projects/data_generation/synthetic_multi_view_facial_image_generation/algorithm/DDFA/test.py
+++ b/projects/data_generation/synthetic_multi_view_facial_image_generation/algorithm/DDFA/test.py
+#!/usr/bin/env python3
+# coding: utf-8
+
+"""
+The pipeline of 3DDFA prediction: given one image, predict the 3d face vertices, 68 landmarks and visualization.
+
+[todo]
+1. CPU optimization: https://pmchojnacki.wordpress.com/2018/10/07/slow-pytorch-cpu-performance
+"""
+
+import torch
+import torchvision.transforms as transforms
+import mobilenet_v1
+import numpy as np
+import cv2
+import os
+from tqdm import tqdm
+import time
+from utils.ddfa import ToTensorGjz, NormalizeGjz, str2bool
+from utils.inference import parse_roi_box_from_landmark, crop_img, predict_68pts, parse_quality_list_part
+from utils.params import param_mean, param_std
+import argparse
+import torch.backends.cudnn as cudnn
+from simple_dataset import McDataset
+from torch.utils.data import DataLoader
+__author__ = 'cleardusk'
+STD_SIZE = 120
+
+
+def main(args):
+    # 1. load pre-tained model
+    checkpoint_fp = 'models/phase1_wpdc_vdc.pth.tar'
+    arch = 'mobilenet_1'
+
+    checkpoint = torch.load(checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict']
+    model = getattr(mobilenet_v1, arch)(num_classes=62)  # 62 = 12(pose) + 40(shape) +10(expression)
+
+    model_dict = model.state_dict()
+    # because the model is trained by multiple gpus, prefix module should be removed
+    for k in checkpoint.keys():
+        model_dict[k.replace('module.', '')] = checkpoint[k]
+    model.load_state_dict(model_dict)
+    if args.mode == 'gpu':
+        cudnn.benchmark = True
+        model = model.cuda()
+    model.eval()
+
+    # tri = sio.loadmat('visualize/tri.mat')['tri']
+    transform = transforms.Compose([ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)])
+
+    if not os.path.exists(args.save_dir):
+        os.mkdir(args.save_dir)
+
+    # 2. parse images list and landmark
+    lmk_file = args.lmk_file
+    ts = time.time()
+    rank_land, rank_img_list, start, end = parse_quality_list_part(lmk_file, args.world_size, args.rank,
+                                                                   args.resume_idx)
+    print('parse land file in {:.3f} seconds'.format(time.time() - ts))
+
+    # for batch processing
+    print('World size {}, rank {}, start from {}, end with {}'.format(args.world_size, args.rank, start, end))
+    dataset = McDataset(rank_img_list, rank_land, transform=transform, std_size=STD_SIZE)
+    dataloader = DataLoader(dataset, batch_size=args.batch_size, shuffle=False, num_workers=2, pin_memory=True)
+
+    for img_idx, (inputs, ori_imgs, img_fps, roi_boxes) in enumerate(tqdm(dataloader)):
+
+        # forward: one step
+        with torch.no_grad():
+            if args.mode == 'gpu':
+                inputs = inputs.cuda()
+            params = model(inputs)
+            params = params.cpu().numpy()
+
+        roi_boxes = roi_boxes.numpy()
+        outputs_roi_boxes = roi_boxes
+        if args.bbox_init == 'two':
+            step_two_ori_imgs = []
+            step_two_roi_boxes = []
+            ori_imgs = ori_imgs.numpy()
+            for ii in range(params.shape[0]):
+                # 68 pts
+                pts68 = predict_68pts(params[ii], roi_boxes[ii])
+
+                # two-step for more accurate bbox to crop face
+                roi_box = parse_roi_box_from_landmark(pts68)
+                img_step2 = crop_img(ori_imgs[ii], roi_box)
+                img_step2 = cv2.resize(img_step2, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR)
+                # input = transform(img_step2).unsqueeze(0)
+                step_two_ori_imgs.append(transform(img_step2))
+                step_two_roi_boxes.append(roi_box)
+            with torch.no_grad():
+                step_two_ori_imgs = torch.stack(step_two_ori_imgs, dim=0)
+                inputs = step_two_ori_imgs
+                if args.mode == 'gpu':
+                    inputs = inputs.cuda()
+                params = model(inputs)
+                params = params.cpu().numpy()
+            outputs_roi_boxes = step_two_roi_boxes
+
+        # dump results
+        if args.dump_param:
+            for img_fp, param, roi_box in zip(img_fps, params, outputs_roi_boxes):
+                split = img_fp.split('/')
+                save_name = os.path.join(args.save_dir, '{}.txt'.format(os.path.splitext(split[-1])[0]))
+                this_param = param * param_std + param_mean
+                this_param = np.concatenate((this_param, roi_box))
+                this_param.tofile(save_name, sep=' ')
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='3DDFA inference pipeline')
+    parser.add_argument('-m', '--mode', default='gpu', type=str, help='gpu or cpu mode')
+    parser.add_argument('--bbox_init', default='two', type=str,
+                        help='one|two: one-step bbox initialization or two-step')
+    parser.add_argument('--dump_2d_img', default='false', type=str2bool, help='whether to save 3d rendered image')
+    parser.add_argument('--dump_param', default='true', type=str2bool, help='whether to save param')
+    parser.add_argument('--save_dir', default='results', type=str, help='dir to save result')
+    parser.add_argument('--lmk_file', default='quality_list', type=str, help='landmarks file')
+    parser.add_argument('--rank', default=0, type=int, help='used when parallel run')
+    parser.add_argument('--world_size', default=1, type=int, help='used when parallel run')
+    parser.add_argument('--resume_idx', default=0, type=int)
+    parser.add_argument('--batch_size', default=80, type=int, help='batch size')
+
+    args = parser.parse_args()
+    main(args)