Skip to content
Snippets Groups Projects
Unverified Commit 26e0ae76 authored by ad-daniel's avatar ad-daniel Committed by GitHub
Browse files

Merge branch 'develop' into merge_master_into_develop

parents 8cf2adbd 2a0e6418
No related branches found
No related tags found
No related merge requests found
......@@ -13,12 +13,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import rospy
import torch
import torchvision
import cv2
import numpy as np
from pathlib import Path
from std_msgs.msg import String
from vision_msgs.msg import ObjectHypothesis
......@@ -31,20 +30,19 @@ from opendr.perception.activity_recognition import X3DLearner
class HumanActivityRecognitionNode:
def __init__(
self,
input_image_topic="/usb_cam/image_raw",
input_rgb_image_topic="/usb_cam/image_raw",
output_category_topic="/opendr/human_activity_recognition",
output_category_description_topic="/opendr/human_activity_recognition_description",
device="cuda",
model='cox3d-m'
model="cox3d-m",
):
"""
Creates a ROS Node for face recognition
:param input_image_topic: Topic from which we are reading the input image
:type input_image_topic: str
:param output_category_topic: Topic to which we are publishing the recognized face info
Creates a ROS Node for video-based human activity recognition.
:param input_rgb_image_topic: Topic from which we are reading the input image
:type input_rgb_image_topic: str
:param output_category_topic: Topic to which we are publishing the recognized activity
(if None, we are not publishing the info)
:type output_category_topic: str
:param output_category_description_topic: Topic to which we are publishing the ID of the recognized action
......@@ -52,12 +50,20 @@ class HumanActivityRecognitionNode:
:type output_category_description_topic: str
:param device: device on which we are running inference ('cpu' or 'cuda')
:type device: str
:param model: architecture to use for human activity recognition.
:param model: Architecture to use for human activity recognition.
(Options: 'cox3d-s', 'cox3d-m', 'cox3d-l', 'x3d-xs', 'x3d-s', 'x3d-m', 'x3d-l')
:type model: str
"""
assert model in {"cox3d-s", "cox3d-m", "cox3d-l", "x3d-xs", "x3d-s", "x3d-m", "x3d-l"}
assert model in {
"cox3d-s",
"cox3d-m",
"cox3d-l",
"x3d-xs",
"x3d-s",
"x3d-m",
"x3d-l",
}
model_name, model_size = model.split("-")
Learner = {"cox3d": CoX3DLearner, "x3d": X3DLearner}[model_name]
......@@ -68,7 +74,9 @@ class HumanActivityRecognitionNode:
# Set up preprocessing
if model_name == "cox3d":
self.preprocess = _image_preprocess(image_size=self.learner.model_hparams["image_size"])
self.preprocess = _image_preprocess(
image_size=self.learner.model_hparams["image_size"]
)
else: # == x3d
self.preprocess = _video_preprocess(
image_size=self.learner.model_hparams["image_size"],
......@@ -76,22 +84,32 @@ class HumanActivityRecognitionNode:
)
# Set up ROS topics and bridge
self.input_rgb_image_topic = input_rgb_image_topic
self.hypothesis_publisher = (
rospy.Publisher(output_category_topic, ObjectHypothesis, queue_size=10) if output_category_topic else None
rospy.Publisher(output_category_topic, ObjectHypothesis, queue_size=1)
if output_category_topic
else None
)
self.string_publisher = (
rospy.Publisher(output_category_description_topic, String, queue_size=10) if output_category_topic else None
rospy.Publisher(output_category_description_topic, String, queue_size=1)
if output_category_description_topic
else None
)
rospy.Subscriber(input_image_topic, ROS_Image, self.callback)
self.bridge = ROSBridge()
def listen(self):
"""
Start the node and begin processing input data
"""
rospy.init_node('opendr_human_activity_recognition', anonymous=True)
rospy.init_node("opendr_human_activity_recognition", anonymous=True)
rospy.Subscriber(
self.input_rgb_image_topic,
ROS_Image,
self.callback,
queue_size=1,
buff_size=10000000,
)
rospy.loginfo("Human activity recognition node started!")
rospy.spin()
......@@ -101,49 +119,43 @@ class HumanActivityRecognitionNode:
:param data: input message
:type data: sensor_msgs.msg.Image
"""
image = self.bridge.from_ros_image(data)
image = self.bridge.from_ros_image(data, encoding="rgb8")
if image is None:
return
x = self.preprocess(image.numpy())
x = self.preprocess(image.convert("channels_first", "rgb"))
result = self.learner.infer(x)
assert len(result) == 1
category = result[0]
category.confidence = float(max(category.confidence.max())) # Confidence for predicted class
category.confidence = float(category.confidence.max()) # Confidence for predicted class
category.description = KINETICS400_CLASSES[category.data] # Class name
if self.hypothesis_publisher is not None:
self.hypothesis_publisher.publish(self.bridge.to_ros_category(category))
if self.string_publisher is not None:
self.string_publisher.publish(self.bridge.to_ros_category_description(category))
self.string_publisher.publish(
self.bridge.to_ros_category_description(category)
)
def _resize(image, width=None, height=None, inter=cv2.INTER_AREA):
def _resize(image, size=None, inter=cv2.INTER_AREA):
# initialize the dimensions of the image to be resized and
# grab the image size
dim = None
(h, w) = image.shape[:2]
# if both the width and height are None, then return the
# original image
if width is None and height is None:
return image
# check to see if the width is None
if width is None:
# calculate the ratio of the height and construct the
if h > w:
# calculate the ratio of the width and construct the
# dimensions
r = height / float(h)
dim = (int(w * r), height)
# otherwise, the height is None
r = size / float(w)
dim = (size, int(h * r))
else:
# calculate the ratio of the width and construct the
# calculate the ratio of the height and construct the
# dimensions
r = width / float(w)
dim = (width, int(h * r))
r = size / float(h)
dim = (int(w * r), size)
# resize the image
resized = cv2.resize(image, dim, interpolation=inter)
......@@ -160,11 +172,11 @@ def _image_preprocess(image_size: int):
def wrapped(frame):
nonlocal standardize
frame = frame.transpose((1, 2, 0)) # C, H, W -> H, W, C
frame = _resize(frame, height=image_size, width=image_size)
frame = _resize(frame, size=image_size)
frame = torch.tensor(frame).permute((2, 0, 1)) # H, W, C -> C, H, W
frame = frame / 255.0 # [0, 255] -> [0.0, 1.0]
frame = standardize(frame)
return Image(frame, dtype=np.float)
return Image(frame, dtype=float)
return wrapped
......@@ -179,7 +191,7 @@ def _video_preprocess(image_size: int, window_size: int):
def wrapped(frame):
nonlocal frames, standardize
frame = frame.transpose((1, 2, 0)) # C, H, W -> H, W, C
frame = _resize(frame, height=image_size, width=image_size)
frame = _resize(frame, size=image_size)
frame = torch.tensor(frame).permute((2, 0, 1)) # H, W, C -> C, H, W
frame = frame / 255.0 # [0, 255] -> [0.0, 1.0]
frame = standardize(frame)
......@@ -194,17 +206,67 @@ def _video_preprocess(image_size: int, window_size: int):
return wrapped
if __name__ == '__main__':
# Select the device for running the
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
"-i",
"--input_rgb_image_topic",
help="Topic name for input rgb image",
type=str,
default="/usb_cam/image_raw",
)
parser.add_argument(
"-o",
"--output_category_topic",
help="Topic to which we are publishing the recognized activity",
type=lambda value: value if value.lower() != "none" else None,
default="/opendr/human_activity_recognition",
)
parser.add_argument(
"-od",
"--output_category_description_topic",
help="Topic to which we are publishing the ID of the recognized action",
type=lambda value: value if value.lower() != "none" else None,
default="/opendr/human_activity_recognition_description",
)
parser.add_argument(
"--device",
help='Device to use, either "cpu" or "cuda", defaults to "cuda"',
type=str,
default="cuda",
choices=["cuda", "cpu"],
)
parser.add_argument(
"--model",
help="Architecture to use for human activity recognition.",
type=str,
default="cox3d-m",
choices=["cox3d-s", "cox3d-m", "cox3d-l", "x3d-xs", "x3d-s", "x3d-m", "x3d-l"],
)
args = parser.parse_args()
try:
if torch.cuda.is_available():
print("GPU found.")
device = 'cuda'
else:
if args.device == "cuda" and torch.cuda.is_available():
device = "cuda"
elif args.device == "cuda":
print("GPU not found. Using CPU instead.")
device = 'cpu'
device = "cpu"
else:
print("Using CPU.")
device = "cpu"
except:
device = 'cpu'
human_activity_recognition_node = HumanActivityRecognitionNode(device=device)
print("Using CPU.")
device = "cpu"
human_activity_recognition_node = HumanActivityRecognitionNode(
input_rgb_image_topic=args.input_rgb_image_topic,
output_category_topic=args.output_category_topic,
output_category_description_topic=args.output_category_description_topic,
device=device,
model=args.model,
)
human_activity_recognition_node.listen()
if __name__ == "__main__":
main()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment