Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
Variational Voxel 3D Detection
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
MaLeCi
OpenDR
Variational Voxel 3D Detection
Commits
26e0ae76
Unverified
Commit
26e0ae76
authored
2 years ago
by
ad-daniel
Committed by
GitHub
2 years ago
Browse files
Options
Downloads
Plain Diff
Merge branch 'develop' into merge_master_into_develop
parents
8cf2adbd
2a0e6418
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
projects/opendr_ws/src/perception/scripts/video_activity_recognition.py
+112
-50
112 additions, 50 deletions
...r_ws/src/perception/scripts/video_activity_recognition.py
with
112 additions
and
50 deletions
projects/opendr_ws/src/perception/scripts/video_activity_recognition.py
+
112
−
50
View file @
26e0ae76
...
...
@@ -13,12 +13,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import
argparse
import
rospy
import
torch
import
torchvision
import
cv2
import
numpy
as
np
from
pathlib
import
Path
from
std_msgs.msg
import
String
from
vision_msgs.msg
import
ObjectHypothesis
...
...
@@ -31,20 +30,19 @@ from opendr.perception.activity_recognition import X3DLearner
class
HumanActivityRecognitionNode
:
def
__init__
(
self
,
input_image_topic
=
"
/usb_cam/image_raw
"
,
input_
rgb_
image_topic
=
"
/usb_cam/image_raw
"
,
output_category_topic
=
"
/opendr/human_activity_recognition
"
,
output_category_description_topic
=
"
/opendr/human_activity_recognition_description
"
,
device
=
"
cuda
"
,
model
=
'
cox3d-m
'
model
=
"
cox3d-m
"
,
):
"""
Creates a ROS Node for
face
recognition
:param input_image_topic: Topic from which we are reading the input image
:type input_image_topic: str
:param output_category_topic: Topic to which we are publishing the recognized
f
ac
e info
Creates a ROS Node for
video-based human activity
recognition
.
:param input_
rgb_
image_topic: Topic from which we are reading the input image
:type input_
rgb_
image_topic: str
:param output_category_topic: Topic to which we are publishing the recognized ac
tivity
(if None, we are not publishing the info)
:type output_category_topic: str
:param output_category_description_topic: Topic to which we are publishing the ID of the recognized action
...
...
@@ -52,12 +50,20 @@ class HumanActivityRecognitionNode:
:type output_category_description_topic: str
:param device: device on which we are running inference (
'
cpu
'
or
'
cuda
'
)
:type device: str
:param model:
a
rchitecture to use for human activity recognition.
:param model:
A
rchitecture to use for human activity recognition.
(Options:
'
cox3d-s
'
,
'
cox3d-m
'
,
'
cox3d-l
'
,
'
x3d-xs
'
,
'
x3d-s
'
,
'
x3d-m
'
,
'
x3d-l
'
)
:type model: str
"""
assert
model
in
{
"
cox3d-s
"
,
"
cox3d-m
"
,
"
cox3d-l
"
,
"
x3d-xs
"
,
"
x3d-s
"
,
"
x3d-m
"
,
"
x3d-l
"
}
assert
model
in
{
"
cox3d-s
"
,
"
cox3d-m
"
,
"
cox3d-l
"
,
"
x3d-xs
"
,
"
x3d-s
"
,
"
x3d-m
"
,
"
x3d-l
"
,
}
model_name
,
model_size
=
model
.
split
(
"
-
"
)
Learner
=
{
"
cox3d
"
:
CoX3DLearner
,
"
x3d
"
:
X3DLearner
}[
model_name
]
...
...
@@ -68,7 +74,9 @@ class HumanActivityRecognitionNode:
# Set up preprocessing
if
model_name
==
"
cox3d
"
:
self
.
preprocess
=
_image_preprocess
(
image_size
=
self
.
learner
.
model_hparams
[
"
image_size
"
])
self
.
preprocess
=
_image_preprocess
(
image_size
=
self
.
learner
.
model_hparams
[
"
image_size
"
]
)
else
:
# == x3d
self
.
preprocess
=
_video_preprocess
(
image_size
=
self
.
learner
.
model_hparams
[
"
image_size
"
],
...
...
@@ -76,22 +84,32 @@ class HumanActivityRecognitionNode:
)
# Set up ROS topics and bridge
self
.
input_rgb_image_topic
=
input_rgb_image_topic
self
.
hypothesis_publisher
=
(
rospy
.
Publisher
(
output_category_topic
,
ObjectHypothesis
,
queue_size
=
10
)
if
output_category_topic
else
None
rospy
.
Publisher
(
output_category_topic
,
ObjectHypothesis
,
queue_size
=
1
)
if
output_category_topic
else
None
)
self
.
string_publisher
=
(
rospy
.
Publisher
(
output_category_description_topic
,
String
,
queue_size
=
10
)
if
output_category_topic
else
None
rospy
.
Publisher
(
output_category_description_topic
,
String
,
queue_size
=
1
)
if
output_category_description_topic
else
None
)
rospy
.
Subscriber
(
input_image_topic
,
ROS_Image
,
self
.
callback
)
self
.
bridge
=
ROSBridge
()
def
listen
(
self
):
"""
Start the node and begin processing input data
"""
rospy
.
init_node
(
'
opendr_human_activity_recognition
'
,
anonymous
=
True
)
rospy
.
init_node
(
"
opendr_human_activity_recognition
"
,
anonymous
=
True
)
rospy
.
Subscriber
(
self
.
input_rgb_image_topic
,
ROS_Image
,
self
.
callback
,
queue_size
=
1
,
buff_size
=
10000000
,
)
rospy
.
loginfo
(
"
Human activity recognition node started!
"
)
rospy
.
spin
()
...
...
@@ -101,49 +119,43 @@ class HumanActivityRecognitionNode:
:param data: input message
:type data: sensor_msgs.msg.Image
"""
image
=
self
.
bridge
.
from_ros_image
(
data
)
image
=
self
.
bridge
.
from_ros_image
(
data
,
encoding
=
"
rgb8
"
)
if
image
is
None
:
return
x
=
self
.
preprocess
(
image
.
numpy
(
))
x
=
self
.
preprocess
(
image
.
convert
(
"
channels_first
"
,
"
rgb
"
))
result
=
self
.
learner
.
infer
(
x
)
assert
len
(
result
)
==
1
category
=
result
[
0
]
category
.
confidence
=
float
(
max
(
category
.
confidence
.
max
())
)
# Confidence for predicted class
category
.
confidence
=
float
(
category
.
confidence
.
max
())
# Confidence for predicted class
category
.
description
=
KINETICS400_CLASSES
[
category
.
data
]
# Class name
if
self
.
hypothesis_publisher
is
not
None
:
self
.
hypothesis_publisher
.
publish
(
self
.
bridge
.
to_ros_category
(
category
))
if
self
.
string_publisher
is
not
None
:
self
.
string_publisher
.
publish
(
self
.
bridge
.
to_ros_category_description
(
category
))
self
.
string_publisher
.
publish
(
self
.
bridge
.
to_ros_category_description
(
category
)
)
def
_resize
(
image
,
width
=
None
,
height
=
None
,
inter
=
cv2
.
INTER_AREA
):
def
_resize
(
image
,
size
=
None
,
inter
=
cv2
.
INTER_AREA
):
# initialize the dimensions of the image to be resized and
# grab the image size
dim
=
None
(
h
,
w
)
=
image
.
shape
[:
2
]
# if both the width and height are None, then return the
# original image
if
width
is
None
and
height
is
None
:
return
image
# check to see if the width is None
if
width
is
None
:
# calculate the ratio of the height and construct the
if
h
>
w
:
# calculate the ratio of the width and construct the
# dimensions
r
=
height
/
float
(
h
)
dim
=
(
int
(
w
*
r
),
height
)
# otherwise, the height is None
r
=
size
/
float
(
w
)
dim
=
(
size
,
int
(
h
*
r
))
else
:
# calculate the ratio of the
width
and construct the
# calculate the ratio of the
height
and construct the
# dimensions
r
=
width
/
float
(
w
)
dim
=
(
width
,
int
(
h
*
r
))
r
=
size
/
float
(
h
)
dim
=
(
int
(
w
*
r
)
,
size
)
# resize the image
resized
=
cv2
.
resize
(
image
,
dim
,
interpolation
=
inter
)
...
...
@@ -160,11 +172,11 @@ def _image_preprocess(image_size: int):
def
wrapped
(
frame
):
nonlocal
standardize
frame
=
frame
.
transpose
((
1
,
2
,
0
))
# C, H, W -> H, W, C
frame
=
_resize
(
frame
,
height
=
image_size
,
width
=
image_size
)
frame
=
_resize
(
frame
,
size
=
image_size
)
frame
=
torch
.
tensor
(
frame
).
permute
((
2
,
0
,
1
))
# H, W, C -> C, H, W
frame
=
frame
/
255.0
# [0, 255] -> [0.0, 1.0]
frame
=
standardize
(
frame
)
return
Image
(
frame
,
dtype
=
np
.
float
)
return
Image
(
frame
,
dtype
=
float
)
return
wrapped
...
...
@@ -179,7 +191,7 @@ def _video_preprocess(image_size: int, window_size: int):
def
wrapped
(
frame
):
nonlocal
frames
,
standardize
frame
=
frame
.
transpose
((
1
,
2
,
0
))
# C, H, W -> H, W, C
frame
=
_resize
(
frame
,
height
=
image_size
,
width
=
image_size
)
frame
=
_resize
(
frame
,
size
=
image_size
)
frame
=
torch
.
tensor
(
frame
).
permute
((
2
,
0
,
1
))
# H, W, C -> C, H, W
frame
=
frame
/
255.0
# [0, 255] -> [0.0, 1.0]
frame
=
standardize
(
frame
)
...
...
@@ -194,17 +206,67 @@ def _video_preprocess(image_size: int, window_size: int):
return
wrapped
if
__name__
==
'
__main__
'
:
# Select the device for running the
def
main
():
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"
-i
"
,
"
--input_rgb_image_topic
"
,
help
=
"
Topic name for input rgb image
"
,
type
=
str
,
default
=
"
/usb_cam/image_raw
"
,
)
parser
.
add_argument
(
"
-o
"
,
"
--output_category_topic
"
,
help
=
"
Topic to which we are publishing the recognized activity
"
,
type
=
lambda
value
:
value
if
value
.
lower
()
!=
"
none
"
else
None
,
default
=
"
/opendr/human_activity_recognition
"
,
)
parser
.
add_argument
(
"
-od
"
,
"
--output_category_description_topic
"
,
help
=
"
Topic to which we are publishing the ID of the recognized action
"
,
type
=
lambda
value
:
value
if
value
.
lower
()
!=
"
none
"
else
None
,
default
=
"
/opendr/human_activity_recognition_description
"
,
)
parser
.
add_argument
(
"
--device
"
,
help
=
'
Device to use, either
"
cpu
"
or
"
cuda
"
, defaults to
"
cuda
"'
,
type
=
str
,
default
=
"
cuda
"
,
choices
=
[
"
cuda
"
,
"
cpu
"
],
)
parser
.
add_argument
(
"
--model
"
,
help
=
"
Architecture to use for human activity recognition.
"
,
type
=
str
,
default
=
"
cox3d-m
"
,
choices
=
[
"
cox3d-s
"
,
"
cox3d-m
"
,
"
cox3d-l
"
,
"
x3d-xs
"
,
"
x3d-s
"
,
"
x3d-m
"
,
"
x3d-l
"
],
)
args
=
parser
.
parse_args
()
try
:
if
torch
.
cuda
.
is_available
():
print
(
"
GPU found.
"
)
device
=
'
cuda
'
else
:
if
args
.
device
==
"
cuda
"
and
torch
.
cuda
.
is_available
():
device
=
"
cuda
"
elif
args
.
device
==
"
cuda
"
:
print
(
"
GPU not found. Using CPU instead.
"
)
device
=
'
cpu
'
device
=
"
cpu
"
else
:
print
(
"
Using CPU.
"
)
device
=
"
cpu
"
except
:
device
=
'
cpu
'
human_activity_recognition_node
=
HumanActivityRecognitionNode
(
device
=
device
)
print
(
"
Using CPU.
"
)
device
=
"
cpu
"
human_activity_recognition_node
=
HumanActivityRecognitionNode
(
input_rgb_image_topic
=
args
.
input_rgb_image_topic
,
output_category_topic
=
args
.
output_category_topic
,
output_category_description_topic
=
args
.
output_category_description_topic
,
device
=
device
,
model
=
args
.
model
,
)
human_activity_recognition_node
.
listen
()
if
__name__
==
"
__main__
"
:
main
()
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment