Skip to content
Snippets Groups Projects
Unverified Commit a3c31c0c authored by Nikolaos Passalis's avatar Nikolaos Passalis Committed by GitHub
Browse files

Merge pull request #213 from opendr-eu/merge-master-into-develop

Merge `master` branch into `develop` branch
parents ddfa8012 a5c65a9e
No related branches found
No related tags found
No related merge requests found
Showing
with 1263 additions and 6 deletions
name: Publisher
# Trigger on new github release, a tag with format vX.Y.Z is expected (used to tag the docker)
on:
release:
types: [published]
env:
OPENDR_VERSION: ${{ github.event.release.tag_name }}
defaults:
run:
shell: bash
jobs:
publish-wheel:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v2
with:
submodules: true
- name: Set up Python 3.8
uses: actions/setup-python@v2
with:
python-version: 3.8
- name: Install prerequisites
run: |
python -m pip install --upgrade pip
pip install setuptools wheel twine
- name: Build Wheel
run: |
./bin/build_wheel.sh
- name: Upload Wheel
env:
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
run : |
twine upload dist/*
publish-docker-cpu:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v2
with:
submodules: true
- name: Build Docker Image
run: docker build --tag opendr-toolkit:cpu_$OPENDR_VERSION --file Dockerfile .
- name: Login to Docker Hub
uses: docker/login-action@v1
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_PASSWORD }}
- name: Publish Image
run: |
docker tag opendr-toolkit:cpu_$OPENDR_VERSION opendr/opendr-toolkit:cpu_$OPENDR_VERSION
docker push opendr/opendr-toolkit:cpu_$OPENDR_VERSION
publish-docker-cuda:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v2
with:
submodules: true
- name: Build Docker Image
run: docker build --tag opendr-toolkit:cuda_$OPENDR_VERSION --file Dockerfile-cuda .
- name: Login to Docker Hub
uses: docker/login-action@v1
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_PASSWORD }}
- name: Publish Image
run: |
docker tag opendr-toolkit:cuda_$OPENDR_VERSION opendr/opendr-toolkit:cuda_$OPENDR_VERSION
docker push opendr/opendr-toolkit:cuda_$OPENDR_VERSION
......@@ -61,7 +61,6 @@ jobs:
source venv/bin/activate
wget https://raw.githubusercontent.com/opendr-eu/opendr/master/dependencies/pip_requirements.txt
cat pip_requirements.txt | xargs -n 1 -L 1 pip install
# Test new package
pip install opendr-toolkit
python -m unittest discover -s tests/sources/tools/${{ matrix.package }}
test-docker:
......@@ -89,7 +88,7 @@ jobs:
- control/mobile_manipulation
- simulation/human_model_generation
- control/single_demo_grasp
#- perception/object_tracking_3d
# - perception/object_tracking_3d
runs-on: ${{ matrix.os }}
steps:
- name: Set up Python 3.8
......
......@@ -12,7 +12,7 @@ defaults:
jobs:
cleanup-runs:
if: ${{ contains(github.event.pull_request.labels.*.name, 'test sources') || contains(github.event.pull_request.labels.*.name, 'test tools') || github.event_name == 'schedule' }}
if: ${{ contains(github.event.pull_request.labels.*.name, 'test sources') || contains(github.event.pull_request.labels.*.name, 'test tools') || contains(github.event.pull_request.labels.*.name, 'test release') || github.event_name == 'schedule' }}
runs-on: ubuntu-latest
steps:
- uses: rokroskar/workflow-run-cleanup-action@master
......@@ -106,4 +106,134 @@ jobs:
source tests/sources/tools/control/mobile_manipulation/run_ros.sh
python -m unittest discover -s tests/sources/tools/${{ matrix.package }}
fi
build-wheel:
needs: cleanup-runs
if: ${{ contains(github.event.pull_request.labels.*.name, 'test release') || github.event_name == 'schedule' }}
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v2
with:
submodules: true
- name: Set up Python 3.8
uses: actions/setup-python@v2
with:
python-version: 3.8
- name: Install prerequisites
run: |
python -m pip install --upgrade pip
pip install setuptools wheel twine
- name: Build Wheel
run:
./bin/build_wheel.sh
- name: Upload wheel as artifact
uses: actions/upload-artifact@v2
with:
path:
dist/*.tar.gz
build-docker:
needs: cleanup-runs
if: ${{ contains(github.event.pull_request.labels.*.name, 'test release') || github.event_name == 'schedule' }}
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v2
with:
submodules: true
- name: Build image
run: |
docker build --tag opendr/opendr-toolkit:cpu_test --file Dockerfile .
docker save opendr/opendr-toolkit:cpu_test > cpu_test.zip
- name: Upload image artifact
uses: actions/upload-artifact@v2
with:
path:
cpu_test.zip
test-wheel:
needs: build-wheel
if: ${{ contains(github.event.pull_request.labels.*.name, 'test release') || github.event_name == 'schedule' }}
strategy:
matrix:
os: [ubuntu-20.04]
package:
- engine
- utils
- perception/activity_recognition
- perception/compressive_learning
- perception/face_recognition
- perception/heart_anomaly_detection
- perception/multimodal_human_centric
- perception/object_tracking_2d
- perception/pose_estimation
- perception/speech_recognition
- perception/skeleton_based_action_recognition
- perception/semantic_segmentation
- perception/object_detection_2d
- perception/facial_expression_recognition
# - perception/object_detection_3d
# - control/mobile_manipulation
# - simulation/human_model_generation
# - control/single_demo_grasp
# - perception/object_tracking_3d
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v2
with:
submodules: true
- name: Set up Python 3.8
uses: actions/setup-python@v2
with:
python-version: 3.8
- name: Download artifact
uses: actions/download-artifact@v2
with:
path: artifact
- name: Get branch name
id: branch-name
uses: tj-actions/branch-names@v5.1
- name: Test Wheel
run: |
export DISABLE_BCOLZ_AVX2=true
sudo apt -y install python3.8-venv libfreetype6-dev git build-essential cmake python3-dev wget libopenblas-dev libsndfile1 libboost-dev python3-dev
python3 -m venv venv
source venv/bin/activate
wget https://raw.githubusercontent.com/opendr-eu/opendr/${{ steps.branch-name.outputs.current_branch }}/dependencies/pip_requirements.txt
cat pip_requirements.txt | xargs -n 1 -L 1 pip install
pip install ./artifact/artifact/*.tar.gz
python -m unittest discover -s tests/sources/tools/${{ matrix.package }}
test-docker:
needs: build-docker
if: ${{ contains(github.event.pull_request.labels.*.name, 'test release') || github.event_name == 'schedule' }}
strategy:
matrix:
os: [ubuntu-20.04]
package:
- engine
- utils
- perception/activity_recognition
- perception/compressive_learning
- perception/face_recognition
- perception/heart_anomaly_detection
- perception/multimodal_human_centric
- perception/object_tracking_2d
- perception/pose_estimation
- perception/speech_recognition
- perception/skeleton_based_action_recognition
- perception/semantic_segmentation
- perception/object_detection_2d
- perception/facial_expression_recognition
- perception/object_detection_3d
- control/mobile_manipulation
- simulation/human_model_generation
- control/single_demo_grasp
# - perception/object_tracking_3d
runs-on: ubuntu-20.04
steps:
- name: Download artifact
uses: actions/download-artifact@v2
with:
path: artifact
- name: Test docker
run: |
docker load < ./artifact/artifact/cpu_test.zip
docker run --name toolkit -i opendr/opendr-toolkit:cpu_test bash
docker start toolkit
docker exec -i toolkit bash -c "source bin/activate.sh && source tests/sources/tools/control/mobile_manipulation/run_ros.sh && python -m unittest discover -s tests/sources/tools/${{ matrix.package }}"
......@@ -67,11 +67,14 @@ Neither the copyright holder nor any applicable licensor will be liable for any
- [single_demo_grasp Module](single-demonstration-grasping.md)
- `simulation` Module
- [human_model_generation Module](human_model_generation.md)
- `data_generation` Module
- [synthetic_facial_image_generation Module](synthetic_facial_image_generator.md)
- [human_model_generation Module](human-model-generation.md)
- `utils` Module
- [Hyperparameter Tuning Module](hyperparameter_tuner.md)
- `Stand-alone Utility Frameworks`
- [Engine Agnostic Gym Environment with Reactive extension (EAGERx)](eagerx.md)
- `Stand-alone Utility Frameworks`
- [Engine Agnostic Gym Environment with Reactive extension (EAGERx)](eagerx.md)
- [ROSBridge Package](rosbridge.md)
- [C Inference API](c-api.md)
- [data.h](c-data-h.md)
......
## synthetic_facial_image_generator module
The *synthetic_facial_image_generator* module contains the *MultiviewDataGeneration* class, which implements the multi-view facial image rendering operation.
### Class MultiviewDataGeneration
The *MultiviewDataGeneration* class is a wrapper of the Rotate-and-Render [[1]](#R-R-paper) photorealistic multi-view facial image generator based on the original
[Rotate-and-Render implementation](https://github.com/Hangz-nju-cuhk/Rotate-and-Render).
It can be used to perform multi-view facial image generation from a single view image on the wild (eval).
The [MultiviewDataGeneration](#projects.data_generation.synthetic-multi-view-facial-image-generation.3ddfa.SyntheticDataGeneration.py ) class has the
following public methods:
#### `MultiviewDataGeneration` constructor
```python
MultiviewDataGeneration(self, args)
```
Constructor main parameters *args* explanation:
- **path_in**: *str, default='./example/Images'* \
An absolute path (path in) which indicates the folder that contains the set of single view facial image snapshots to be processed by the algorithm.
- **path_3ddfa**: *str, default='./'* \
An absolute path (path 3ddfa) which indicates the 3ddfa module folder of the software structure as presented in the repository. This path is necessary in order for the software to create the folders for the intermediate / temporary storage of files generated during the pre-processing such as 3d face models, facial landmarks etc.
in the folder results of this path.
- **save_path**: *str, default='./results'* \
The output images are stored in the folder indicated by save path which is also a class input parameter.
- **val_yaw**: *str, default='10,20'* \
Definition of the yaw angles (in the interval [−90°,90°]) for which the rendered images will be produced.
- **val_pitch**: *str, default=' 30,40'* \
Definition of the pitch angles (in the interval [−90°,90°]) for which the rendered images will be produced.
- **device**: *{'cuda', 'cpu'}, default='cpu'* \
Specifies the device to be used.
#### `MultiviewDataGeneration.eval`
```python
MultiviewDataGeneration.eval()
```
This function is implementing the main procedure for the creation of the multi-view facial images, which consists of three different stages.
Instead of initializing the main parameters of the 3DDFA network in the intializer, the first stage includes detection of the candidate faces in the input images and 3D-head mesh fitting using 3DDFA.
Moreover, the second stage extracts the facial landmarks in order to derive the head pose and align the images with the 3d head model mesh.
Finally, the main functionality of the multiview facial image rendering is executed by loading the respective network parameters.
### Usage Example
```python
python3 tool_synthetic_facial_generation.py -path_in ./demos/imgs_input/ -path_3ddfa ./algorithm/DDFA/ -save_path ./results -val_yaw 10, 40 -val_pitch 10, 30 -device cuda
```
The corresponding paths for the input, output folders as well as the pitch and yaw angles for which the user wants to
produce the facial images can be easily incorporated in the class creation while the method is initialized.
The process is executed for the CNN parameters and GPUs specified in the arguments of the aforementioned command.
Users that wish to modify these parameters shall change the respective input arguments which derived from a parser including the arguments path in, path_3ddfa, save_path, val_yaw, val_pitch etc.
#### References
<a name="R-R-paper" href="https://github.com/Hangz-nju-cuhk/Rotate-and-Render">[1]</a>
Hang Zhou, Jihao Liu, Ziwei Liu, Yu Liu, Xiaogang Wang, Rotate-and-Render: Unsupervised Photorealistic Face Rotation from Single-View Images,
[arXiv](https://arxiv.org/abs/2003.08124#).
# Synthentic Multi-view Facial Image Generation based on Rotate-and-Render: Unsupervised Photorealistic Face Rotation from Single-View Images (CVPR 2020)
Based on: [[Rotate-and-Render: Unsupervised Photorealistic Face Rotation from Single-View Images]](https://arxiv.org/abs/2003.08124)
We utilize, with small modifications in order to be easily executed, publicly available code, namely an un-supervised framework that can synthesize photorealistic rotated facial images using as input a single facial image, or multiple such images (one per person).
The implemented method allows for rotating faces in the 3D space back and forth, and then re-rendering them to the 2D plane.
The generated multi-view facial images can be used for different learning tasks, such as in self-supervised learning tasks.
## Sources:
* Face Alignment in Full Pose Range: A 3D Total Solution (IEEE TPAMI 2017)
* Neural 3D Mesh Renderer (CVPR 2018)
* Rotate-and-Render: Unsupervised Photorealistic Face Rotation from Single-View Images (CVPR 2020)
## Requirements
* Python 3.6 is used. Basic requirements are listed in the 'requirements.txt'.
```
pip3 install -r requirements.txt
```
* Install the [Neural_Renderer](https://github.com/daniilidis-group/neural_renderer) following the instructions.
```
pip install git+https://github.com/cidl-auth/neural_renderer
```
* Download checkpoint and BFM model from [checkpoint.zip](ftp://opendrdata.csd.auth.gr/data_generation/synthetic_multi-view-facial-generator/ckpt_and_bfm.zip) put it in ```3ddfa``` and unzip it:
```bash
wget ftp://opendrdata.csd.auth.gr/data_generation/synthetic_multi-view-facial-generator/checkpoints.zip
unzip checkpoints.zip
unzip checkpoints/ckpt_and_bfm.zip -d 3ddfa
```
The 3D models are borrowed from [3DDFA](https://github.com/cleardusk/3DDFA).
* Compile cython code and download remaining models:
```bash
cd algorithm/DDFA/utils/cython/
python3 setup.py build_ext -i
cd ../../../..
mkdir algorithm/DDFA/models
mkdir algorithm/DDFA/example
wget https://github.com/cleardusk/3DDFA/blob/master/models/phase1_wpdc_vdc.pth.tar?raw=true -O algorithm/DDFA/models/phase1_wpdc_vdc.pth.tar
```
## Usage Example
1. Execute the one-step OPENDR function ```tool_synthetic_facial_generation.py``` specifying the input images folder, the output folder, the desired degrees (range -90 to 90) for generating the facial images in multiple view angles pitch and yaw as indicated in the command line:
```sh
python3 tool_synthetic_facial_generation.py -path_in ./demos/imgs_input/ -path_3ddfa ./algorithm/DDFA/ -save_path ./results -val_yaw 10, 40 -val_pitch 10, 30 -device cuda
```
3. The results can be found in ```results/rs_model/example/```, where multi-view facial images are generated for every person in a respective folder.
## License
Rotate-and-Render is provided under [CC-BY-4.0](https://github.com/Hangz-nju-cuhk/Rotate-and-Render/blob/master/LICENSE) license.
SPADE, SyncBN, 3DDFA are under [MIT License](https://github.com/tasostefas/opendr_internal/blob/synthetic-multi-view-facial-generator/projects/data_generation/synthetic-multi-view-facial-image-generation/3ddfa/LICENSE)
## Acknowledgement
Large parts of the code are taken from:
* The structure of this codebase is borrowed from [SPADE](https://github.com/NVlabs/SPADE).
* The [SyncBN](https://github.com/vacancy/Synchronized-BatchNorm-PyTorch) module is used in the current code.
* The [3DDFA](https://github.com/cleardusk/3DDFA) implementation for 3D reconstruction.
* The code [Rotate-and-Render](https://github.com/Hangz-nju-cuhk/Rotate-and-Render/)
with the following modifications to make them compatible with the OpenDR specifications:
## Minor Modifications
1. All scripts: PEP8 changes
2. ```3ddfa/preprocessing_1.py, 3ddfa/preprocessing_2.py, test_multipose.py``` Modified to work as a callable functions
3. ```options/base_options.py, options/test_options.py ``` Commented out/change several parameters to be easily executed
4. ```models/networks/render.py``` Minor functional changes
5. The OPENDR created functions are ```SyntheticDataGeneration.py, tool_synthetic_facial_generation.py```
6. The rest are taken from the aforementioned repositories
# Copyright 2020-2022 OpenDR European Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# MIT License
#
# Copyright (c) 2019 Jian Zhao
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# !/usr/bin/env python3.7
# coding: utf-8
from tqdm import tqdm
from shutil import copyfile
import cv2
import os
from algorithm.DDFA import preprocessing_1
from algorithm.DDFA import preprocessing_2
from algorithm.Rotate_and_Render import test_multipose
class MultiviewDataGeneration():
def __init__(self, args):
self.path_in = args.path_in
self.key = str(args.path_3ddfa + "/example/Images/")
self.key1 = str(args.path_3ddfa + "/example/")
self.key2 = str(args.path_3ddfa + "/results/")
self.save_path = args.save_path
self.val_yaw = args.val_yaw
self.val_pitch = args.val_pitch
self.args = args
def eval(self):
# STAGE No1 : detect faces and fitting to 3d mesh by main.py execution
list_im = []
print("START")
a = open("file_list.txt", "w")
for subdir, dirs, files in os.walk(self.path_in):
current_directory_path = os.path.abspath(subdir)
for file in files:
name, ext = os.path.splitext(file)
if ext == ".jpg":
current_image_path = os.path.join(current_directory_path, file)
current_image = cv2.imread(current_image_path)
list_im.append(current_image_path)
a.write(str(file) + os.linesep)
cv2.imwrite(os.path.join(self.key, file), current_image)
self.args.files = list_im.copy()
list_im.clear()
preprocessing_1.main(self.args)
a.close()
# STAGE No2: Landmarks Output with inference.py execution
im_list2 = []
d = open(os.path.join(self.key1, 'realign_lmk'), "w")
for subdir, dirs, files in os.walk(self.path_in):
current_directory_path = os.path.abspath(subdir)
self.args.img_prefix = current_directory_path
self.args.save_dir = os.path.abspath(self.key2)
self.args.save_lmk_dir = os.path.abspath(self.key1)
if not os.path.exists(self.args.save_dir):
os.mkdir(self.args.save_dir)
if not os.path.exists(self.args.save_lmk_dir):
os.mkdir(self.args.save_lmk_dir)
list_lfw_batch = './file_list.txt'
dst = os.path.join(self.args.save_lmk_dir, "file_list.txt")
copyfile(list_lfw_batch, dst)
b = open("txt_name_batch.txt", "w")
for file in files:
with open(list_lfw_batch) as f:
img_list = [x.strip() for x in f.readlines()]
for img_idx, img_fp in enumerate(tqdm(img_list)):
if img_fp == str(file):
im_list2.append(str(file))
b.write(str(file) + os.linesep)
self.args.img_list = './txt_name_batch.txt'
b.close()
self.args.dump_lmk = 'true'
im_list2.clear()
preprocessing_2.main(self.args)
with open(os.path.join(self.args.save_lmk_dir, 'realign_lmk_')) as f:
img_list = [x.strip() for x in f.readlines()]
for img_idx, img_fp in enumerate(tqdm(img_list)):
d.write(img_fp + os.linesep)
d.close()
# STAGE No3: Generate Facial Images in specific pitch and yaw angles
test_multipose.main(self.save_path, self.val_yaw, self.val_pitch)
def fit(self):
raise NotImplementedError()
def infer(self):
raise NotImplementedError()
def load(self):
raise NotImplementedError()
def optimize(self):
raise NotImplementedError()
def reset(self):
raise NotImplementedError()
def save(self):
raise NotImplementedError()
model = load('model_refine.mat');
model = model.model_refine;
mu = model.mu_shape + model.mu_exp;
mu = reshape(mu, 3, length(mu) / 3);
tri = model.tri;
keypoints = model.keypoints;
pts68_3d = mu(:, keypoints);
render_face_mesh(mu, tri, pts68_3d);
A = getframe(gcf);
mimg = A.cdata;
imwrite(mimg, 'imgs/bfm_noneck.jpg', 'quality', 95);
The original version with neck:
<p align="center">
<img src="imgs/bfm_noneck.jpg" alt="neck" width="400px">
</p>
[bfm.ply](https://github.com/Hangz-nju-cuhk/Rotate-and-Render/blob/master/3ddfa/BFM_Remove_Neck/bfm.ply)
The image is rendered by MeshLab.
`bfm_show.m` shows how to render it with 68 keypoints in Matlab.
<p align="center">
<img src="imgs/bfm_refine.jpg" alt="no neck">
</p>
Attention: Do not use the `ply` file in training.
function render_face_mesh(vertex, tri, pts68_3d)
trisurf(tri', vertex(1, :), vertex(2, :), vertex(3, :), ones(size(vertex, 2),1), 'edgecolor', 'none');
re=[1 1 1];
colormap(re);
light('Position', [0 0 1], 'Style', 'infinite');
lighting gouraud
axis equal
view([0 90]);
if nargin == 3
hold on; plot3(pts68_3d(1,:), pts68_3d(2,:), pts68_3d(3,:)+1, '*');
end
xlabel('x');
ylabel('y');
zlabel('z');
axis on;
grid on;
end
\ No newline at end of file
MIT License
Copyright (c) 2018 Jianzhu Guo
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
#!/usr/bin/env python3
# coding: utf-8
from __future__ import division
"""
Creates a MobileNet Model as defined in:
Andrew G. Howard Menglong Zhu Bo Chen, et.al. (2017).
MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications.
Copyright (c) Yang Lu, 2017
Modified By cleardusk
"""
import math
import torch.nn as nn
__all__ = ['mobilenet_2', 'mobilenet_1', 'mobilenet_075', 'mobilenet_05', 'mobilenet_025']
class DepthWiseBlock(nn.Module):
def __init__(self, inplanes, planes, stride=1, prelu=False):
super(DepthWiseBlock, self).__init__()
inplanes, planes = int(inplanes), int(planes)
self.conv_dw = nn.Conv2d(inplanes, inplanes, kernel_size=3, padding=1, stride=stride, groups=inplanes,
bias=False)
self.bn_dw = nn.BatchNorm2d(inplanes)
self.conv_sep = nn.Conv2d(inplanes, planes, kernel_size=1, stride=1, padding=0, bias=False)
self.bn_sep = nn.BatchNorm2d(planes)
if prelu:
self.relu = nn.PReLU()
else:
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
out = self.conv_dw(x)
out = self.bn_dw(out)
out = self.relu(out)
out = self.conv_sep(out)
out = self.bn_sep(out)
out = self.relu(out)
return out
class MobileNet(nn.Module):
def __init__(self, widen_factor=1.0, num_classes=1000, prelu=False, input_channel=3):
""" Constructor
Args:
widen_factor: config of widen_factor
num_classes: number of classes
"""
super(MobileNet, self).__init__()
block = DepthWiseBlock
self.conv1 = nn.Conv2d(input_channel, int(32 * widen_factor), kernel_size=3, stride=2, padding=1,
bias=False)
self.bn1 = nn.BatchNorm2d(int(32 * widen_factor))
if prelu:
self.relu = nn.PReLU()
else:
self.relu = nn.ReLU(inplace=True)
self.dw2_1 = block(32 * widen_factor, 64 * widen_factor, prelu=prelu)
self.dw2_2 = block(64 * widen_factor, 128 * widen_factor, stride=2, prelu=prelu)
self.dw3_1 = block(128 * widen_factor, 128 * widen_factor, prelu=prelu)
self.dw3_2 = block(128 * widen_factor, 256 * widen_factor, stride=2, prelu=prelu)
self.dw4_1 = block(256 * widen_factor, 256 * widen_factor, prelu=prelu)
self.dw4_2 = block(256 * widen_factor, 512 * widen_factor, stride=2, prelu=prelu)
self.dw5_1 = block(512 * widen_factor, 512 * widen_factor, prelu=prelu)
self.dw5_2 = block(512 * widen_factor, 512 * widen_factor, prelu=prelu)
self.dw5_3 = block(512 * widen_factor, 512 * widen_factor, prelu=prelu)
self.dw5_4 = block(512 * widen_factor, 512 * widen_factor, prelu=prelu)
self.dw5_5 = block(512 * widen_factor, 512 * widen_factor, prelu=prelu)
self.dw5_6 = block(512 * widen_factor, 1024 * widen_factor, stride=2, prelu=prelu)
self.dw6 = block(1024 * widen_factor, 1024 * widen_factor, prelu=prelu)
self.avgpool = nn.AdaptiveAvgPool2d(1)
self.fc = nn.Linear(int(1024 * widen_factor), num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.dw2_1(x)
x = self.dw2_2(x)
x = self.dw3_1(x)
x = self.dw3_2(x)
x = self.dw4_1(x)
x = self.dw4_2(x)
x = self.dw5_1(x)
x = self.dw5_2(x)
x = self.dw5_3(x)
x = self.dw5_4(x)
x = self.dw5_5(x)
x = self.dw5_6(x)
x = self.dw6(x)
x = self.avgpool(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
return x
def mobilenet(widen_factor=1.0, num_classes=1000):
"""
Construct MobileNet.
widen_factor=1.0 for mobilenet_1
widen_factor=0.75 for mobilenet_075
widen_factor=0.5 for mobilenet_05
widen_factor=0.25 for mobilenet_025
"""
model = MobileNet(widen_factor=widen_factor, num_classes=num_classes)
return model
def mobilenet_2(num_classes=62, input_channel=3):
model = MobileNet(widen_factor=2.0, num_classes=num_classes, input_channel=input_channel)
return model
def mobilenet_1(num_classes=62, input_channel=3):
model = MobileNet(widen_factor=1.0, num_classes=num_classes, input_channel=input_channel)
return model
def mobilenet_075(num_classes=62, input_channel=3):
model = MobileNet(widen_factor=0.75, num_classes=num_classes, input_channel=input_channel)
return model
def mobilenet_05(num_classes=62, input_channel=3):
model = MobileNet(widen_factor=0.5, num_classes=num_classes, input_channel=input_channel)
return model
def mobilenet_025(num_classes=62, input_channel=3):
model = MobileNet(widen_factor=0.25, num_classes=num_classes, input_channel=input_channel)
return model
#!/usr/bin/env python3
# coding: utf-8
"""
The pipeline of 3DDFA prediction: given one image, predict the 3d face vertices, 68 landmarks and visualization.
[todo]
1. CPU optimization: https://pmchojnacki.wordpress.com/2018/10/07/slow-pytorch-cpu-performance
"""
import torch
import torchvision.transforms as transforms
from . import mobilenet_v1
import numpy as np
import cv2
from os import path
import face_alignment
from .utils.ddfa import ToTensorGjz, NormalizeGjz
import scipy.io as sio
from .utils.inference import get_suffix, parse_roi_box_from_landmark, crop_img, predict_68pts, dump_to_ply, \
dump_vertex, draw_landmarks, predict_dense, parse_roi_box_from_bbox, get_colors, write_obj_with_colors
from .utils.cv_plot import plot_pose_box
from .utils.estimate_pose import parse_pose
from .utils.render import cget_depths_image, cpncc
from .utils.paf import gen_img_paf
import torch.backends.cudnn as cudnn
import sys
__author__ = 'cleardusk'
STD_SIZE = 120
def main(args):
# 1. load pre-tained model
checkpoint_fp = 'algorithm/DDFA/models/phase1_wpdc_vdc.pth.tar'
arch = 'mobilenet_1'
checkpoint = torch.load(checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict']
model = getattr(mobilenet_v1, arch)(num_classes=62) # 62 = 12(pose) + 40(shape) +10(expression)
model_dict = model.state_dict()
# because the model is trained by multiple gpus, prefix module should be removed
for k in checkpoint.keys():
model_dict[k.replace('module.', '')] = checkpoint[k]
model.load_state_dict(model_dict)
if args.mode == 'gpu':
cudnn.benchmark = True
model = model.cuda()
model.eval()
'''
# 2. load dlib model for face detection and landmark used for face cropping
if args.dlib_landmark:
dlib_landmark_model = 'models/shape_predictor_68_face_landmarks.dat'
face_regressor = dlib.shape_predictor(dlib_landmark_model)
if args.dlib_bbox:
face_detector = dlib.get_frontal_face_detector()
'''
face_regressor = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=False)
# face_detector = face_regressor.face_detector
# 3. forward
tri = sio.loadmat('algorithm/DDFA/visualize/tri.mat')['tri']
transform = transforms.Compose([ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)])
# print(args.files)
for img_fp in args.files:
print(img_fp)
suffix = get_suffix(img_fp)
wfp = '{}_{}.obj'.format(img_fp.replace(suffix, ''), 0)
if not path.exists(wfp):
img_ori = cv2.imread(img_fp)
if img_ori is None:
print("Can't load image, please check the path", file=sys.stderr)
sys.exit(1)
try:
rect
except NameError:
rect = None
'''
if args.dlib_bbox:
rects = face_detector(img_ori, 1)
else:
rects = []
if len(rects) == 0:
rects = dlib.rectangles()
rect_fp = img_fp + '.bbox'
lines = open(rect_fp).read().strip().split('\n')[1:]
for l in lines:
l, r, t, b = [int(_) for _ in l.split(' ')[1:]]
rect = dlib.rectangle(l, r, t, b)
rects.append(rect)
'''
img_rgb = img_ori[:, :, ::-1]
ptss = face_regressor.get_landmarks(img_rgb)
pts_res = []
Ps = [] # Camera matrix collection
poses = [] # pose collection, [todo: validate it]
vertices_lst = [] # store multiple face vertices
ind = 0
for pts in ptss:
# whether use dlib landmark to crop image, if not, use only face bbox to calc roi bbox for cropping
if args.dlib_landmark:
# - use landmark for cropping
# pts = face_regressor(img_ori, rect).parts()
# pts = np.array([[pt.x, pt.y] for pt in pts]).T
roi_box = parse_roi_box_from_landmark(pts.T)
else:
# - use detected face bbox
bbox = [rect.left(), rect.top(), rect.right(), rect.bottom()]
roi_box = parse_roi_box_from_bbox(bbox)
img = crop_img(img_ori, roi_box)
# forward: one step
img = cv2.resize(img, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR)
input = transform(img).unsqueeze(0)
with torch.no_grad():
if args.mode == 'gpu':
input = input.cuda()
param = model(input)
param = param.squeeze().cpu().numpy().flatten().astype(np.float32)
# 68 pts
pts68 = predict_68pts(param, roi_box)
# two-step for more accurate bbox to crop face
if args.bbox_init == 'two':
roi_box = parse_roi_box_from_landmark(pts68)
img_step2 = crop_img(img_ori, roi_box)
img_step2 = cv2.resize(img_step2, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR)
input = transform(img_step2).unsqueeze(0)
with torch.no_grad():
if args.mode == 'gpu':
input = input.cuda()
param = model(input)
param = param.squeeze().cpu().numpy().flatten().astype(np.float32)
pts68 = predict_68pts(param, roi_box)
pts_res.append(pts68)
P, pose = parse_pose(param)
Ps.append(P)
poses.append(pose)
# dense face 3d vertices
if args.dump_ply or args.dump_vertex or args.dump_depth or args.dump_pncc or args.dump_obj:
vertices = predict_dense(param, roi_box)
vertices_lst.append(vertices)
if args.dump_ply:
dump_to_ply(vertices, tri, '{}_{}.ply'.format(img_fp.replace(suffix, ''), ind))
if args.dump_vertex:
dump_vertex(vertices, '{}_{}.mat'.format(img_fp.replace(suffix, ''), ind))
if args.dump_pts:
wfp = '{}_{}.txt'.format(img_fp.replace(suffix, ''), ind)
np.savetxt(wfp, pts68, fmt='%.3f')
print('Save 68 3d landmarks to {}'.format(wfp))
if args.dump_roi_box:
wfp = '{}_{}.roibox'.format(img_fp.replace(suffix, ''), ind)
np.savetxt(wfp, roi_box, fmt='%.3f')
print('Save roi box to {}'.format(wfp))
if args.dump_paf:
wfp_paf = '{}_{}_paf.jpg'.format(img_fp.replace(suffix, ''), ind)
wfp_crop = '{}_{}_crop.jpg'.format(img_fp.replace(suffix, ''), ind)
paf_feature = gen_img_paf(img_crop=img, param=param, kernel_size=args.paf_size)
cv2.imwrite(wfp_paf, paf_feature)
cv2.imwrite(wfp_crop, img)
print('Dump to {} and {}'.format(wfp_crop, wfp_paf))
if args.dump_obj:
wfp = '{}_{}.obj'.format(img_fp.replace(suffix, ''), ind)
colors = get_colors(img_ori, vertices)
write_obj_with_colors(wfp, vertices, tri, colors)
print('Dump obj with sampled texture to {}'.format(wfp))
ind += 1
if args.dump_pose:
# P, pose = parse_pose(param) # Camera matrix (without scale), and pose (yaw, pitch, roll, to verify)
img_pose = plot_pose_box(img_ori, Ps, pts_res)
wfp = img_fp.replace(suffix, '_pose.jpg')
cv2.imwrite(wfp, img_pose)
print('Dump to {}'.format(wfp))
if args.dump_depth:
wfp = img_fp.replace(suffix, '_depth.png')
# depths_img = get_depths_image(img_ori, vertices_lst, tri-1) # python version
depths_img = cget_depths_image(img_ori, vertices_lst, tri - 1) # cython version
cv2.imwrite(wfp, depths_img)
print('Dump to {}'.format(wfp))
if args.dump_pncc:
wfp = img_fp.replace(suffix, '_pncc.png')
pncc_feature = cpncc(img_ori, vertices_lst, tri - 1) # cython version
cv2.imwrite(wfp, pncc_feature[:, :, ::-1]) # cv2.imwrite will swap RGB -> BGR
print('Dump to {}'.format(wfp))
if args.dump_res:
draw_landmarks(img_ori, pts_res, wfp=img_fp.replace(suffix, '_3DDFA.jpg'), show_flg=args.show_flg)
else:
print("Main_Done")
'''
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='3DDFA inference pipeline')
parser.add_argument('-f', '--files', nargs='+',
help='image files paths fed into network, single or multiple images')
parser.add_argument('-m', '--mode', default='cpu', type=str, help='gpu or cpu mode')
parser.add_argument('--show_flg', default='true', type=str2bool, help='whether show the visualization result')
parser.add_argument('--bbox_init', default='one', type=str,
help='one|two: one-step bbox initialization or two-step')
parser.add_argument('--dump_res', default='true', type=str2bool, help='whether write out the visualization image')
parser.add_argument('--dump_vertex', default='false', type=str2bool,
help='whether write out the dense face vertices to mat')
parser.add_argument('--dump_ply', default='true', type=str2bool)
parser.add_argument('--dump_pts', default='true', type=str2bool)
parser.add_argument('--dump_roi_box', default='false', type=str2bool)
parser.add_argument('--dump_pose', default='true', type=str2bool)
parser.add_argument('--dump_depth', default='true', type=str2bool)
parser.add_argument('--dump_pncc', default='true', type=str2bool)
parser.add_argument('--dump_paf', default='true', type=str2bool)
parser.add_argument('--paf_size', default=3, type=int, help='PAF feature kernel size')
parser.add_argument('--dump_obj', default='true', type=str2bool)
parser.add_argument('--dlib_bbox', default='true', type=str2bool, help='whether use dlib to predict bbox')
parser.add_argument('--dlib_landmark', default='true', type=str2bool,
help='whether use dlib landmark to crop image')
args = parser.parse_args()
main(args)
'''
#!/usr/bin/env python3
# coding: utf-8
"""
The pipeline of 3DDFA prediction: given one image, predict the 3d face vertices, 68 landmarks and visualization.
[todo]
1. CPU optimization: https://pmchojnacki.wordpress.com/2018/10/07/slow-pytorch-cpu-performance
"""
import torch
import torchvision.transforms as transforms
from . import mobilenet_v1
import numpy as np
import cv2
import os
from tqdm import tqdm
import face_alignment
from .utils.ddfa import ToTensorGjz, NormalizeGjz
import scipy.io as sio
from .utils.inference import parse_roi_box_from_landmark, crop_img, predict_68pts, predict_dense, get_colors, \
get_5lmk_from_68lmk
from .utils.estimate_pose import parse_pose
from .utils.params import param_mean, param_std
from .utils.render import crender_colors
import torch.backends.cudnn as cudnn
__author__ = 'cleardusk'
STD_SIZE = 120
def main(args):
# 1. load pre-tained model
checkpoint_fp = 'algorithm/DDFA/models/phase1_wpdc_vdc.pth.tar'
arch = 'mobilenet_1'
checkpoint = torch.load(checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict']
model = getattr(mobilenet_v1, arch)(num_classes=62) # 62 = 12(pose) + 40(shape) +10(expression)
model_dict = model.state_dict()
# because the model is trained by multiple gpus, prefix module should be removed
for k in checkpoint.keys():
model_dict[k.replace('module.', '')] = checkpoint[k]
model.load_state_dict(model_dict)
if args.mode == 'gpu':
cudnn.benchmark = True
model = model.cuda()
model.eval()
tri = sio.loadmat('algorithm/DDFA/visualize/tri.mat')['tri']
transform = transforms.Compose([ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)])
# 2. parse images list
with open(args.img_list) as f:
img_list = [x.strip() for x in f.readlines()]
landmark_list = []
alignment_model = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=False)
if not os.path.exists(args.save_dir):
os.mkdir(args.save_dir)
if not os.path.exists(args.save_lmk_dir):
os.mkdir(args.save_lmk_dir)
for img_idx, img_fp in enumerate(tqdm(img_list)):
img_ori = cv2.imread(os.path.join(args.img_prefix, img_fp))
print("Image", img_fp)
pts_res = []
Ps = [] # Camera matrix collection
poses = [] # pose collection, [todo: validate it]
# vertices_lst = [] # store multiple face vertices
# ind = 0
# suffix = get_suffix(img_fp)
# face alignment model use RGB as input, result is a tuple with landmarks and boxes
preds = alignment_model.get_landmarks(img_ori[:, :, ::-1])
pts_2d_68 = preds[0]
pts_2d_5 = get_5lmk_from_68lmk(pts_2d_68)
landmark_list.append(pts_2d_5)
roi_box = parse_roi_box_from_landmark(pts_2d_68.T)
img = crop_img(img_ori, roi_box)
# import pdb; pdb.set_trace()
# forward: one step
img = cv2.resize(img, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR)
input = transform(img).unsqueeze(0)
with torch.no_grad():
if args.mode == 'gpu':
input = input.cuda()
param = model(input)
param = param.squeeze().cpu().numpy().flatten().astype(np.float32)
# 68 pts
pts68 = predict_68pts(param, roi_box)
# two-step for more accurate bbox to crop face
if args.bbox_init == 'two':
roi_box = parse_roi_box_from_landmark(pts68)
img_step2 = crop_img(img_ori, roi_box)
img_step2 = cv2.resize(img_step2, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR)
input = transform(img_step2).unsqueeze(0)
with torch.no_grad():
if args.mode == 'gpu':
input = input.cuda()
param = model(input)
param = param.squeeze().cpu().numpy().flatten().astype(np.float32)
pts68 = predict_68pts(param, roi_box)
pts_res.append(pts68)
P, pose = parse_pose(param)
Ps.append(P)
poses.append(pose)
# dense face 3d vertices
vertices = predict_dense(param, roi_box)
if args.dump_2d_img:
wfp_2d_img = os.path.join(args.save_dir, os.path.basename(img_fp))
colors = get_colors(img_ori, vertices)
# aligned_param = get_aligned_param(param)
# vertices_aligned = predict_dense(aligned_param, roi_box)
# h, w, c = 120, 120, 3
h, w, c = img_ori.shape
img_2d = crender_colors(vertices.T, (tri - 1).T, colors[:, ::-1], h, w)
cv2.imwrite(wfp_2d_img, img_2d[:, :, ::-1])
if args.dump_param:
split = img_fp.split('/')
save_name = os.path.join(args.save_dir, '{}.txt'.format(os.path.splitext(split[-1])[0]))
this_param = param * param_std + param_mean
this_param = np.concatenate((this_param, roi_box))
this_param.tofile(save_name, sep=' ')
if args.dump_lmk:
save_path = os.path.join(args.save_lmk_dir, 'realign_lmk_')
with open(save_path, 'w') as f:
for idx, (fname, land) in enumerate(zip(img_list, landmark_list)):
# f.write('{} {} {} {}')
land = land.astype(np.int)
land_str = ' '.join([str(x) for x in land])
msg = f'{fname} {idx} {land_str}\n'
f.write(msg)
if __name__ == '__main__':
'''
parser = argparse.ArgumentParser(description='3DDFA inference pipeline')
parser.add_argument('-m', '--mode', default='gpu', type=str, help='gpu or cpu mode')
parser.add_argument('--bbox_init', default='two', type=str,
help='one|two: one-step bbox initialization or two-step')
parser.add_argument('--dump_2d_img', default='true', type=str2bool, help='whether to save 3d rendered image')
parser.add_argument('--dump_param', default='true', type=str2bool, help='whether to save param')
parser.add_argument('--dump_lmk', default='true', type=str2bool, help='whether to save landmarks')
parser.add_argument('--save_dir', default='results', type=str, help='dir to save result')
parser.add_argument('--save_lmk_dir', default='example', type=str, help='dir to save landmark result')
parser.add_argument('--img_list', default='example/file_list.txt', type=str, help='test image list file')
parser.add_argument('--img_prefix', default='example/Images/', type=str, help='test image prefix')
parser.add_argument('--rank', default=0, type=int, help='used when parallel run')
parser.add_argument('--world_size', default=1, type=int, help='used when parallel run')
parser.add_argument('--resume_idx', default=0, type=int)
args = parser.parse_args()
'''
# main(args)
from torch.utils.data import Dataset
import numpy as np
import cv2
from utils.inference import crop_img, parse_roi_box_from_landmark
def cv2_loader(img_str):
img_array = np.frombuffer(img_str, dtype=np.uint8)
return cv2.imdecode(img_array, cv2.IMREAD_COLOR)
class McDataset(Dataset):
def __init__(self, img_list, landmarks, std_size=120, transform=None):
self.img_list = img_list
self.landmarks = landmarks
self.transform = transform
self.std_size = std_size
assert len(self.img_list) == len(self.landmarks)
self.num = len(self.img_list)
self.initialized = False
def __len__(self):
return self.num
def __getitem__(self, idx):
filename = self.img_list[idx]
ori_img = cv2.imread(filename)
landmark = self.landmarks[idx]
# preprocess img
roi_box = parse_roi_box_from_landmark(landmark.T)
img = crop_img(ori_img, roi_box)
img = cv2.resize(img, dsize=(self.std_size, self.std_size), interpolation=cv2.INTER_LINEAR)
if self.transform is not None:
img = self.transform(img)
return img, ori_img, filename, np.array(roi_box)
#!/usr/bin/env python3
# coding: utf-8
"""
The pipeline of 3DDFA prediction: given one image, predict the 3d face vertices, 68 landmarks and visualization.
[todo]
1. CPU optimization: https://pmchojnacki.wordpress.com/2018/10/07/slow-pytorch-cpu-performance
"""
import torch
import torchvision.transforms as transforms
import mobilenet_v1
import numpy as np
import cv2
import os
from tqdm import tqdm
import time
from utils.ddfa import ToTensorGjz, NormalizeGjz, str2bool
from utils.inference import parse_roi_box_from_landmark, crop_img, predict_68pts, parse_quality_list_part
from utils.params import param_mean, param_std
import argparse
import torch.backends.cudnn as cudnn
from simple_dataset import McDataset
from torch.utils.data import DataLoader
__author__ = 'cleardusk'
STD_SIZE = 120
def main(args):
# 1. load pre-tained model
checkpoint_fp = 'models/phase1_wpdc_vdc.pth.tar'
arch = 'mobilenet_1'
checkpoint = torch.load(checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict']
model = getattr(mobilenet_v1, arch)(num_classes=62) # 62 = 12(pose) + 40(shape) +10(expression)
model_dict = model.state_dict()
# because the model is trained by multiple gpus, prefix module should be removed
for k in checkpoint.keys():
model_dict[k.replace('module.', '')] = checkpoint[k]
model.load_state_dict(model_dict)
if args.mode == 'gpu':
cudnn.benchmark = True
model = model.cuda()
model.eval()
# tri = sio.loadmat('visualize/tri.mat')['tri']
transform = transforms.Compose([ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)])
if not os.path.exists(args.save_dir):
os.mkdir(args.save_dir)
# 2. parse images list and landmark
lmk_file = args.lmk_file
ts = time.time()
rank_land, rank_img_list, start, end = parse_quality_list_part(lmk_file, args.world_size, args.rank,
args.resume_idx)
print('parse land file in {:.3f} seconds'.format(time.time() - ts))
# for batch processing
print('World size {}, rank {}, start from {}, end with {}'.format(args.world_size, args.rank, start, end))
dataset = McDataset(rank_img_list, rank_land, transform=transform, std_size=STD_SIZE)
dataloader = DataLoader(dataset, batch_size=args.batch_size, shuffle=False, num_workers=2, pin_memory=True)
for img_idx, (inputs, ori_imgs, img_fps, roi_boxes) in enumerate(tqdm(dataloader)):
# forward: one step
with torch.no_grad():
if args.mode == 'gpu':
inputs = inputs.cuda()
params = model(inputs)
params = params.cpu().numpy()
roi_boxes = roi_boxes.numpy()
outputs_roi_boxes = roi_boxes
if args.bbox_init == 'two':
step_two_ori_imgs = []
step_two_roi_boxes = []
ori_imgs = ori_imgs.numpy()
for ii in range(params.shape[0]):
# 68 pts
pts68 = predict_68pts(params[ii], roi_boxes[ii])
# two-step for more accurate bbox to crop face
roi_box = parse_roi_box_from_landmark(pts68)
img_step2 = crop_img(ori_imgs[ii], roi_box)
img_step2 = cv2.resize(img_step2, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR)
# input = transform(img_step2).unsqueeze(0)
step_two_ori_imgs.append(transform(img_step2))
step_two_roi_boxes.append(roi_box)
with torch.no_grad():
step_two_ori_imgs = torch.stack(step_two_ori_imgs, dim=0)
inputs = step_two_ori_imgs
if args.mode == 'gpu':
inputs = inputs.cuda()
params = model(inputs)
params = params.cpu().numpy()
outputs_roi_boxes = step_two_roi_boxes
# dump results
if args.dump_param:
for img_fp, param, roi_box in zip(img_fps, params, outputs_roi_boxes):
split = img_fp.split('/')
save_name = os.path.join(args.save_dir, '{}.txt'.format(os.path.splitext(split[-1])[0]))
this_param = param * param_std + param_mean
this_param = np.concatenate((this_param, roi_box))
this_param.tofile(save_name, sep=' ')
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='3DDFA inference pipeline')
parser.add_argument('-m', '--mode', default='gpu', type=str, help='gpu or cpu mode')
parser.add_argument('--bbox_init', default='two', type=str,
help='one|two: one-step bbox initialization or two-step')
parser.add_argument('--dump_2d_img', default='false', type=str2bool, help='whether to save 3d rendered image')
parser.add_argument('--dump_param', default='true', type=str2bool, help='whether to save param')
parser.add_argument('--save_dir', default='results', type=str, help='dir to save result')
parser.add_argument('--lmk_file', default='quality_list', type=str, help='landmarks file')
parser.add_argument('--rank', default=0, type=int, help='used when parallel run')
parser.add_argument('--world_size', default=1, type=int, help='used when parallel run')
parser.add_argument('--resume_idx', default=0, type=int)
parser.add_argument('--batch_size', default=80, type=int, help='batch size')
args = parser.parse_args()
main(args)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment