diff --git a/projects/opendr_ws/src/perception/scripts/audiovisual_emotion_recognition.py b/projects/opendr_ws/src/perception/scripts/audiovisual_emotion_recognition.py index c4fe3e126a665955105d03520bcc0a559686e6db..8c0ad8e53597cffbcb79d043fade8710981abf28 100644 --- a/projects/opendr_ws/src/perception/scripts/audiovisual_emotion_recognition.py +++ b/projects/opendr_ws/src/perception/scripts/audiovisual_emotion_recognition.py @@ -19,6 +19,7 @@ import argparse import numpy as np import torch import librosa +import cv2 import rospy import message_filters @@ -35,28 +36,25 @@ from opendr.engine.data import Video, Timeseries class AudiovisualEmotionNode: def __init__(self, input_video_topic="/usb_cam/image_raw", input_audio_topic="/audio/audio", - annotations_topic="/opendr/audiovisual_emotion", buffer_size=3.6, device="cuda"): + output_emotions_topic="/opendr/audiovisual_emotion", buffer_size=3.6, device="cuda"): """ Creates a ROS Node for audiovisual emotion recognition :param input_video_topic: Topic from which we are reading the input video. Expects detected face of size 224x224 :type input_video_topic: str :param input_audio_topic: Topic from which we are reading the input audio :type input_audio_topic: str - :param annotations_topic: Topic to which we are publishing the predicted class - :type annotations_topic: str + :param output_emotions_topic: Topic to which we are publishing the predicted class + :type output_emotions_topic: str :param buffer_size: length of audio and video in sec :type buffer_size: float :param device: device on which we are running inference ('cpu' or 'cuda') :type device: str """ - self.publisher = rospy.Publisher(annotations_topic, Classification2D, queue_size=10) + self.publisher = rospy.Publisher(output_emotions_topic, Classification2D, queue_size=10) - video_sub = message_filters.Subscriber(input_video_topic, ROS_Image) - audio_sub = message_filters.Subscriber(input_audio_topic, AudioData) - # synchronize video and audio data topics - ts = message_filters.ApproximateTimeSynchronizer([video_sub, audio_sub], 10, 0.1, allow_headerless=True) - ts.registerCallback(self.callback) + self.input_video_topic = input_video_topic + self.input_audio_topic = input_audio_topic self.bridge = ROSBridge() @@ -78,20 +76,30 @@ class AudiovisualEmotionNode: Start the node and begin processing input data """ rospy.init_node('opendr_audiovisualemotion_recognition', anonymous=True) + + video_sub = message_filters.Subscriber(self.input_video_topic, ROS_Image) + audio_sub = message_filters.Subscriber(self.input_audio_topic, AudioData) + # synchronize video and audio data topics + ts = message_filters.ApproximateTimeSynchronizer([video_sub, audio_sub], 10, 0.1, allow_headerless=True) + ts.registerCallback(self.callback) + rospy.loginfo("Audiovisual emotion recognition node started!") rospy.spin() def callback(self, image_data, audio_data): """ Callback that process the input data and publishes to the corresponding topics - :param image_data: input image message, face image of size 224x224 + :param image_data: input image message, face image :type image_data: sensor_msgs.msg.Image :param audio_data: input audio message, speech :type audio_data: audio_common_msgs.msg.AudioData """ audio_data = np.reshape(np.frombuffer(audio_data.data, dtype=np.int16)/32768.0, (1, -1)) self.data_buffer = np.append(self.data_buffer, audio_data) + image_data = self.bridge.from_ros_image(image_data, encoding='bgr8').convert(format='channels_last') + image_data = cv2.resize(image_data, (224, 224)) + self.video_buffer = np.append(self.video_buffer, np.expand_dims(image_data.data, 0), axis=0) if self.data_buffer.shape[0] > 16000*self.buffer_size: @@ -116,16 +124,36 @@ class AudiovisualEmotionNode: def select_distributed(m, n): return [i*n//m + n//(2*m) for i in range(m)] -if __name__ == '__main__': - device = 'cuda' if torch.cuda.is_available() else 'cpu' +if __name__ == '__main__': parser = argparse.ArgumentParser() - parser.add_argument('--video_topic', type=str, help='listen to video input data on this topic') - parser.add_argument('--audio_topic', type=str, help='listen to audio input data on this topic') - parser.add_argument('--buffer_size', type=float, default=3.6, help='size of the audio buffer in seconds') + parser.add_argument("--input_video_topic", type=str, default="/usb_cam/image_raw", + help="Listen to video input data on this topic") + parser.add_argument("--input_audio_topic", type=str, default="/audio/audio", + help="Listen to audio input data on this topic") + parser.add_argument("--output_emotions_topic", type=str, default="/opendr/audiovisual_emotion", + help="Topic name for output emotions recognition") + parser.add_argument("--buffer_size", type=float, default=3.6, + help="Size of the audio buffer in seconds") + parser.add_argument("--device", type=str, default="cuda", + help="Device to use (cpu, cuda)", choices=["cuda", "cpu"]) args = parser.parse_args() - avnode = AudiovisualEmotionNode(input_video_topic=args.video_topic, input_audio_topic=args.audio_topic, - annotations_topic="/opendr/audiovisual_emotion", + # Select the device for running + try: + if args.device == "cuda" and torch.cuda.is_available(): + device = "cuda" + elif args.device == "cuda": + print("GPU not found. Using CPU instead.") + device = "cpu" + else: + print("Using CPU") + device = "cpu" + except: + print("Using CPU") + device = "cpu" + + avnode = AudiovisualEmotionNode(input_video_topic=args.input_video_topic, input_audio_topic=args.input_audio_topic, + output_emotions_topic=args.output_emotions_topic, buffer_size=args.buffer_size, device=device) avnode.listen() diff --git a/projects/opendr_ws/src/perception/scripts/heart_anomaly_detection.py b/projects/opendr_ws/src/perception/scripts/heart_anomaly_detection.py index 4e72471b9d5df26fadc44bc7263f5a76f04c9723..b36ecbdbdba63234f237716d0b3e813d60a8ea62 100755 --- a/projects/opendr_ws/src/perception/scripts/heart_anomaly_detection.py +++ b/projects/opendr_ws/src/perception/scripts/heart_anomaly_detection.py @@ -25,22 +25,23 @@ from opendr.perception.heart_anomaly_detection import GatedRecurrentUnitLearner, class HeartAnomalyNode: - def __init__(self, input_topic="/ecg/ecg", prediction_topic="/opendr/heartanomaly", device="cuda", model='anbof'): + def __init__(self, input_ecg_topic="/ecg/ecg", output_heart_anomaly_topic="/opendr/heart_anomaly", + device="cuda", model="anbof"): """ Creates a ROS Node for heart anomaly (atrial fibrillation) detection from ecg data - :param input_topic: Topic from which we are reading the input array data - :type input_topic: str - :param prediction_topic: Topic to which we are publishing the predicted class - :type prediction_topic: str + :param input_ecg_topic: Topic from which we are reading the input array data + :type input_ecg_topic: str + :param output_heart_anomaly_topic: Topic to which we are publishing the predicted class + :type output_heart_anomaly_topic: str :param device: device on which we are running inference ('cpu' or 'cuda') :type device: str :param model: model to use: anbof or gru :type model: str """ - self.publisher = rospy.Publisher(prediction_topic, Classification2D, queue_size=10) + self.publisher = rospy.Publisher(output_heart_anomaly_topic, Classification2D, queue_size=10) - rospy.Subscriber(input_topic, Float32MultiArray, self.callback) + rospy.Subscriber(input_ecg_topic, Float32MultiArray, self.callback) self.bridge = ROSBridge() @@ -70,8 +71,8 @@ class HeartAnomalyNode: def callback(self, msg_data): """ Callback that process the input data and publishes to the corresponding topics - :param data: input message - :type data: std_msgs.msg.Float32MultiArray + :param msg_data: input message + :type msg_data: std_msgs.msg.Float32MultiArray """ # Convert Float32MultiArray to OpenDR Timeseries data = self.bridge.from_rosarray_to_timeseries(msg_data, self.channels, self.series_length) @@ -83,17 +84,34 @@ class HeartAnomalyNode: ros_class = self.bridge.from_category_to_rosclass(class_pred) self.publisher.publish(ros_class) -if __name__ == '__main__': - # Select the device for running - try: - device = 'cuda' if torch.cuda.is_available() else 'cpu' - except: - device = 'cpu' +if __name__ == '__main__': parser = argparse.ArgumentParser() - parser.add_argument('input_topic', type=str, help='listen to input data on this topic') - parser.add_argument('model', type=str, help='model to be used for prediction: anbof or gru') + parser.add_argument("--input_ecg_topic", type=str, default="/ecg/ecg", + help="listen to input ECG data on this topic") + parser.add_argument("--model", type=str, default="anbof", help="model to be used for prediction: anbof or gru", + choices=["anbof", "gru"]) + parser.add_argument("--output_heart_anomaly_topic", type=str, default="/opendr/heart_anomaly", + help="Topic name for heart anomaly detection topic") + parser.add_argument("--device", type=str, default="cuda", help="Device to use (cpu, cuda)", + choices=["cuda", "cpu"]) + args = parser.parse_args() - gesture_node = HeartAnomalyNode(input_topic=args.input_topic, model=args.model, device=device) + try: + if args.device == "cuda" and torch.cuda.is_available(): + device = "cuda" + elif args.device == "cuda": + print("GPU not found. Using CPU instead.") + device = "cpu" + else: + print("Using CPU") + device = "cpu" + except: + print("Using CPU") + device = "cpu" + + gesture_node = HeartAnomalyNode(input_ecg_topic=args.input_ecg_topic, + output_heart_anomaly_topic=args.output_heart_anomaly_topic, + model=args.model, device=device) gesture_node.listen() diff --git a/projects/opendr_ws/src/perception/scripts/rgbd_hand_gesture_recognition.py b/projects/opendr_ws/src/perception/scripts/rgbd_hand_gesture_recognition.py index d81f2692add671b99171d58e44b9302580c68971..a21f10974c4ebbd5f66f945e4782494d5fbb9e1d 100755 --- a/projects/opendr_ws/src/perception/scripts/rgbd_hand_gesture_recognition.py +++ b/projects/opendr_ws/src/perception/scripts/rgbd_hand_gesture_recognition.py @@ -48,13 +48,10 @@ class RgbdHandGestureNode: :type device: str """ - self.gesture_publisher = rospy.Publisher(output_gestures_topic, Classification2D, queue_size=10) + self.input_rgb_image_topic = input_rgb_image_topic + self.input_depth_image_topic = input_depth_image_topic - image_sub = message_filters.Subscriber(input_rgb_image_topic, ROS_Image, queue_size=1, buff_size=10000000) - depth_sub = message_filters.Subscriber(input_depth_image_topic, ROS_Image, queue_size=1, buff_size=10000000) - # synchronize image and depth data topics - ts = message_filters.TimeSynchronizer([image_sub, depth_sub], 10) - ts.registerCallback(self.callback) + self.gesture_publisher = rospy.Publisher(output_gestures_topic, Classification2D, queue_size=10) self.bridge = ROSBridge() @@ -74,6 +71,13 @@ class RgbdHandGestureNode: Start the node and begin processing input data """ rospy.init_node('opendr_gesture_recognition', anonymous=True) + + image_sub = message_filters.Subscriber(self.input_rgb_image_topic, ROS_Image, queue_size=1, buff_size=10000000) + depth_sub = message_filters.Subscriber(self.input_depth_image_topic, ROS_Image, queue_size=1, buff_size=10000000) + # synchronize image and depth data topics + ts = message_filters.TimeSynchronizer([image_sub, depth_sub], 10) + ts.registerCallback(self.callback) + rospy.loginfo("RGBD gesture recognition node started!") rospy.spin() diff --git a/projects/opendr_ws/src/perception/scripts/speech_command_recognition.py b/projects/opendr_ws/src/perception/scripts/speech_command_recognition.py index 4726b478a140d638cf297abcb00ed942acbbe954..7acd13578dfc6564f2b84c3a1e9821598ee0161d 100755 --- a/projects/opendr_ws/src/perception/scripts/speech_command_recognition.py +++ b/projects/opendr_ws/src/perception/scripts/speech_command_recognition.py @@ -28,26 +28,26 @@ from opendr.perception.speech_recognition import MatchboxNetLearner, EdgeSpeechN class SpeechRecognitionNode: - def __init__(self, input_topic='/audio/audio', prediction_topic="/opendr/speech_recognition", - buffer_size=1.5, model='matchboxnet', model_path=None, device='cuda'): + def __init__(self, input_audio_topic="/audio/audio", output_speech_command_topic="/opendr/speech_recognition", + buffer_size=1.5, model="matchboxnet", model_path=None, device="cuda"): """ Creates a ROS Node for speech command recognition - :param input_topic: Topic from which the audio data is received - :type input_topic: str - :param prediction_topic: Topic to which the predictions are published - :type prediction_topic: str + :param input_audio_topic: Topic from which the audio data is received + :type input_audio_topic: str + :param output_speech_command_topic: Topic to which the predictions are published + :type output_speech_command_topic: str :param buffer_size: Length of the audio buffer in seconds :type buffer_size: float :param model: base speech command recognition model: matchboxnet or quad_selfonn :type model: str - :param device: device for inference ('cpu' or 'cuda') + :param device: device for inference ("cpu" or "cuda") :type device: str """ - self.publisher = rospy.Publisher(prediction_topic, Classification2D, queue_size=10) + self.publisher = rospy.Publisher(output_speech_command_topic, Classification2D, queue_size=10) - rospy.Subscriber(input_topic, AudioData, self.callback) + rospy.Subscriber(input_audio_topic, AudioData, self.callback) self.bridge = ROSBridge() @@ -59,17 +59,17 @@ class SpeechRecognitionNode: # Initialize the recognition model if model == "matchboxnet": self.learner = MatchboxNetLearner(output_classes_n=20, device=device) - load_path = './MatchboxNet' + load_path = "./MatchboxNet" elif model == "edgespeechnets": self.learner = EdgeSpeechNetsLearner(output_classes_n=20, device=device) assert model_path is not None, "No pretrained EdgeSpeechNets model available for download" elif model == "quad_selfonn": self.learner = QuadraticSelfOnnLearner(output_classes_n=20, device=device) - load_path = './QuadraticSelfOnn' + load_path = "./QuadraticSelfOnn" # Download the recognition model if model_path is None: - self.learner.download_pretrained(path='.') + self.learner.download_pretrained(path=".") self.learner.load(load_path) else: self.learner.load(model_path) @@ -78,15 +78,15 @@ class SpeechRecognitionNode: """ Start the node and begin processing input data """ - rospy.init_node('opendr_speech_command_recognition', anonymous=True) + rospy.init_node("opendr_speech_command_recognition", anonymous=True) rospy.loginfo("Speech command recognition node started!") rospy.spin() def callback(self, msg_data): """ Callback that processes the input data and publishes predictions to the output topic - :param data: incoming message - :type data: audio_common_msgs.msg.AudioData + :param msg_data: incoming message + :type msg_data: audio_common_msgs.msg.AudioData """ # Accumulate data until the buffer is full data = np.reshape(np.frombuffer(msg_data.data, dtype=np.int16)/32768.0, (1, -1)) @@ -105,22 +105,37 @@ class SpeechRecognitionNode: self.data_buffer = np.zeros((1, 1)) -if __name__ == '__main__': +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--input_audio_topic", type=str, default="audio/audio", + help="Listen to input data on this topic") + parser.add_argument("--output_speech_command_topic", type=str, default="/opendr/speech_recognition", + help="Topic name for speech command output") + parser.add_argument("--buffer_size", type=float, default=1.5, help="Size of the audio buffer in seconds") + parser.add_argument("--model", default="matchboxnet", choices=["matchboxnet", "edgespeechnets", "quad_selfonn"], + help="Model to be used for prediction: matchboxnet, edgespeechnets or quad_selfonn") + parser.add_argument("--model_path", type=str, + help="Path to the model files, if not given, the pretrained model will be downloaded") + parser.add_argument("--device", type=str, default="cuda", choices=["cuda", "cpu"], + help="Device to use (cpu, cuda)") + args = parser.parse_args() + # Select the device for running try: - device = 'cuda' if torch.cuda.is_available() else 'cpu' + if args.device == "cuda" and torch.cuda.is_available(): + device = "cuda" + elif args.device == "cuda": + print("GPU not found. Using CPU instead.") + device = "cpu" + else: + print("Using CPU") + device = "cpu" except: - device = 'cpu' - - parser = argparse.ArgumentParser() - parser.add_argument('input_topic', type=str, help='listen to input data on this topic') - parser.add_argument('--buffer_size', type=float, default=1.5, help='size of the audio buffer in seconds') - parser.add_argument('--model', choices=["matchboxnet", "edgespeechnets", "quad_selfonn"], default="matchboxnet", - help='model to be used for prediction: matchboxnet or quad_selfonn') - parser.add_argument('--model_path', type=str, - help='path to the model files, if not given, the pretrained model will be downloaded') - args = parser.parse_args() + print("Using CPU") + device = "cpu" - speech_node = SpeechRecognitionNode(input_topic=args.input_topic, buffer_size=args.buffer_size, - model=args.model, model_path=args.model_path, device=device) + speech_node = SpeechRecognitionNode(input_audio_topic=args.input_audio_topic, + output_speech_command_topic=args.output_speech_command_topic, + buffer_size=args.buffer_size, model=args.model, model_path=args.model_path, + device=device) speech_node.listen() diff --git a/src/opendr/perception/panoptic_segmentation/efficient_ps/efficient_ps_learner.py b/src/opendr/perception/panoptic_segmentation/efficient_ps/efficient_ps_learner.py index 161137bd0dd43ce4ee817220a2085bb5e60551ac..4036a82c127ea6e9fd56cda63445f11668149d6e 100644 --- a/src/opendr/perception/panoptic_segmentation/efficient_ps/efficient_ps_learner.py +++ b/src/opendr/perception/panoptic_segmentation/efficient_ps/efficient_ps_learner.py @@ -456,15 +456,15 @@ class EfficientPsLearner(Learner): """ if mode == 'model': models = { - 'cityscapes': f'{OPENDR_SERVER_URL}perception/panoptic_segmentation/models/model_cityscapes.pth', - 'kitti': f'{OPENDR_SERVER_URL}perception/panoptic_segmentation/models/model_kitti.pth' + 'cityscapes': f'{OPENDR_SERVER_URL}perception/panoptic_segmentation/efficient_ps/models/model_cityscapes.pth', + 'kitti': f'{OPENDR_SERVER_URL}perception/panoptic_segmentation/efficient_ps/models/model_kitti.pth' } if trained_on not in models.keys(): raise ValueError(f'Could not find model weights pre-trained on {trained_on}. ' f'Valid options are {list(models.keys())}') url = models[trained_on] elif mode == 'test_data': - url = f'{OPENDR_SERVER_URL}perception/panoptic_segmentation/test_data/test_data.zip' + url = f'{OPENDR_SERVER_URL}perception/panoptic_segmentation/efficient_ps/test_data.zip' else: raise ValueError('Invalid mode. Valid options are ["model", "test_data"]')