Upload New File

0c191f20 · Laura Rævsbæk Birch · 95b833f1 · 0c191f20
Commit 0c191f20 authored 1 year ago by Laura Rævsbæk Birch
--- a/laura/BIDS_template.py
+++ b/laura/BIDS_template.py
+# Created by: Laura Birch - 201804337@post.au.dk
+# Last reviewed: 01/08/2023
+# This main script can be used to format data to BIDS. Note that each script might need changes to fit your data.
+# Step 1-4 prepares the data and step 5-6 converts data to the BIDS format
+#
+# COMMENTS
+# Step 1: Change variables 'basepath' and 'original_data_path' to fit your data
+# Step 2: Skip if you already have these folders
+# Step 5: Insert required info for the dataset description + choose the appropriate function for handling the participants.tsv
+# Step 6: Insert task and acquisition, leave empty if none apply + insert all relevant info for the _eeg.json file
+
+
+#region Imports
+import os
+import mne
+from ntlab_create_folders import create_sourcedata_bids_folders
+from ntlab_create_dataset_description_json import create_dataset_description_json
+from ntlab_create_participants_subonly_tsv import create_participants_subonly_tsv
+from ntlab_create_participants_excel_tsv import create_participants_excel_tsv
+from ntlab_create_participants_json import create_participants_json
+from ntlab_create_sessions_tsv import create_sessions_tsv
+from ntlab_create_loaded_channels_tsv import create_loaded_channels_tsv
+from ntlab_create_recording_data_json import create_recording_data_json
+from ntlab_create_events_json import create_events_json
+from ntlab_create_scoringfile_json import create_scoringFile_json
+from ntlab_create_scoringfile_tsv import create_scoringFile_tsv
+# import matlab.engine
+#endregion
+
+
+#region STEP 1: Change path name to relevant path
+    # Basepath is the path in which your data lies
+    # Sourcepath is the path for your 'sourcedata' folder
+basepath = os.path.join('') # Add path for new BIDS folder
+sourcepath = os.path.join(basepath, 'sourcedata')
+original_data_path = os.path.join('') # Add path for folder with original data
+#endregion
+
+
+#region STEP 2: Create folders for sourcedata and bids
+create_sourcedata_bids_folders(basepath, original_data_path)
+#endregion
+
+
+#region STEP 3: Makes a list of all subjects and a list of all their sessions from the folders in the sourcedata folder
+# Create list of all subjects
+subfolders = [folder for folder in os.listdir(sourcepath) if os.path.isdir(os.path.join(sourcepath, folder))]
+nSubs=len(subfolders)
+
+# Create list of all sessions for each subject
+sesfolders=[]
+for sub in subfolders:
+    subpath = os.path.join(basepath, sub)
+    try:       
+        sesfolders.append([folder for folder in os.listdir(subpath) if os.path.isdir(os.path.join(subpath, folder))])
+    except:
+        print('No sessions for subject ' + sub)
+#endregion
+
+
+#region STEP 4: Create lists for each filetype such as EDF, Poly5, scoring files etc. These are found in the sourcedata folder
+    #Requirement: the original data have been moved to the sourcedata folder
+all_edf_files = []
+all_scoring_files = []
+all_events_files = []
+for iSub in range(len(subfolders)):
+    for iSes in range(len(sesfolders[iSub])):
+        sespath = os.path.join(sourcepath, subfolders[iSub], sesfolders[iSub][iSes])
+        edf_files = [os.path.join(sespath, file) for file in os.listdir(sespath) if file.endswith('.edf')]
+        scoring_files = [os.path.join(sespath, file) for file in os.listdir(sespath) if file.endswith('.txt')]
+        events_files = [os.path.join(sespath, file) for file in os.listdir(sespath) if file.endswith('events.txt')] # Might require other filename
+        counter = 0
+        for iEdf in range(len(edf_files)):
+            edfInfo = {}
+            edfInfo['sub'] = subfolders[iSub]
+            edfInfo['ses'] = sesfolders[iSub][iSes]
+            edfInfo['file'] = edf_files[iEdf]
+            all_edf_files.append(edfInfo)
+
+        for iScoring in range(len(scoring_files)):
+            scoringInfo = {}
+            scoringInfo['sub'] = subfolders[iSub]
+            scoringInfo['ses'] = sesfolders[iSub][iSes]
+            scoringInfo['file'] = scoring_files[iScoring]
+            all_scoring_files.append(scoringInfo)
+
+        for iEvents in range(len(events_files)):
+            eventsInfo = {}
+            eventsInfo['sub'] = subfolders[iSub]
+            eventsInfo['ses'] = sesfolders[iSub][iSes]
+            eventsInfo['file'] = events_files[iEdf]
+            all_events_files.append(eventsInfo)
+#endregion
+
+
+#region STEP 5: Create modality agnostic files
+#### Create dataset description ####
+dataset_description_path = basepath + '\dataset_description.json'
+dataset_name = "" # Enter dataset name here
+authors = [""] # Enter authors here
+bids_version = "1.7.0"
+dataset_type = "raw"
+create_dataset_description_json(dataset_description_path, dataset_name, bids_version, dataset_type, authors)
+
+
+#### Create participants files ####
+participants_path_json = basepath + '\participants.json'
+create_participants_json(participants_path_json)
+participants_path_tsv = basepath + '\participants.tsv'
+## A script for creating a .tsv file with only a participants id column
+# create_participants_subonly_tsv(participants_path_tsv, subfolders)
+## A script for loading participant data from an Excel file and creating a .tsv file with all the columns
+# create_participants_excel_tsv(participants_path_tsv, subfolders)
+
+
+#### Create sessions.tsv files for all subjects with column 'session id' ####
+for sub in subfolders:
+    scoring_files_list = []
+    for ses in sesfolders[subfolders.index(sub)]:
+        scoring_files_list.append({'sub': sub, 'ses': ses})
+        subPath = os.path.join(basepath, sub, sub + "_sessions")
+        create_sessions_tsv(subPath + '.tsv', scoring_files_list)
+#endregion
+
+
+#region STEP 6: Create modality specific files
+task = 'sleep' # Add task here
+acq = '' # Add aquisition here
+
+#### Create channels.tsv ####
+for edf_file in all_edf_files:
+    # Extract subject and session from setfile name
+    sub, ses = edf_file['sub'], edf_file['ses']
+    
+    # Read setfile and extract channel names and recording duration
+    edf_file = os.path.join(basepath, sub, ses, 'eeg', edf_file['file'])
+    edf_file = mne.io.read_raw_eeglab(edf_file['file'], preload=True)
+    channels = edf_file.ch_names
+
+    # Create task and acquisition type based on setfile name and create channels.tsv file
+    filename = sub + '_' + ses + '_task-' + task + '_acq-' + acq + '_channels.tsv'
+    filepath = os.path.join(basepath, sub, ses, 'eeg', filename)
+    create_loaded_channels_tsv(filepath, channels)
+
+
+#### Create scoring_events.tsv and scoring_events.json ####
+task = 'sleep' # Add task here
+acq = 'scoring' # Add aquisition here
+for scoring_file in all_scoring_files:
+    sub, ses = scoring_file['sub'], scoring_file['ses']
+    scoring_file = scoring_file['file']
+    filename = sub + '_' + ses + '_task-' + task + '_acq-' + acq + '_events.tsv'
+    filepath = os.path.join(basepath, sub, ses, 'eeg', filename)
+    create_scoringFile_tsv(filepath, scoring_file)
+
+# Create one scoring_events.json file at top directory level applicable to all scoring_events.tsv files
+create_scoringFile_json(os.path.join(basepath, 'task-' + task + '_acq-' + acq + '_events.json'))
+
+
+#### Create events.json ####
+# Create one events.json file at top directory level applicable to all events.tsv files
+# See matlab script "extractEvents.m" for details on how the events.tsv files are created
+create_events_json(os.path.join(basepath, 'task-' + task +'_events.json'))
+
+#### Create eeg_recording_data file ####
+eeg_dict = {
+    "TaskName": "",
+    "SamplingFrequency": 250,
+    "EEGReference": "average",
+    "PowerLineFrequency": 50,
+    "SoftwareFilters": "n/a",
+    "TaskDescription": "",
+    "InstitutionName": "",
+    "InstitutionalDepartmentName": "",
+    "InstitutionAddress": "",
+    "TriggerChannelCount": 1,
+    "EEGPlacementScheme": "",
+    "RecordingDuration": 0
+}
+
+for edf_file in edf_files:
+    # Extract subject and session from edfFile name
+    try:
+        sub, ses = edf_file.split('_')[0], edf_file.split('_')[1]
+    except:
+        sub = edf_file[:3].lower() + '-' + edf_file[3:6]
+        ses = edf_file[6:9].lower() + '-0' + edf_file[9:11]
+
+    # Create task, acquisition type and eeg placement scheme based on edfFile name
+    acq = '' # Add aquisition here
+    task = 'sleep' # Add task here
+    eeg_placement_scheme = '10/20' # Add scheme here
+
+    # Read edfFile and extract recording duration
+    edf_file = os.path.join(basepath, sub, ses, 'eeg', edf_file)
+    try:
+        edf_file = mne.io.read_raw_eeglab(edf_file, preload=True)
+        recording_duration = edf_file.tmax
+    except:
+        recording_duration = 0
+    
+    # Add subject and session specific information to eeg_dict and create eeg_recording_data.json file
+    filename = sub + '_' + ses + '_task-' + task.lower() + '_acq-' + acq + '_eeg.json'
+    filepath = os.path.join(basepath, sub, ses, 'eeg', filename)
+    eeg_dict['TaskName'] = task
+    eeg_dict['TaskDescription'] = ''
+    eeg_dict['InstitutionName'] = ''
+    eeg_dict['InstitutionalDepartmentName'] = ''
+    eeg_dict['InstitutionAddress'] = ''
+    eeg_dict['EEGPlacementScheme'] = eeg_placement_scheme
+    eeg_dict['RecordingDuration'] = recording_duration
+    create_recording_data_json(filepath, eeg_dict)
+#endregion
\ No newline at end of file