Skip to content
Snippets Groups Projects
Commit 0c191f20 authored by Laura Rævsbæk Birch's avatar Laura Rævsbæk Birch
Browse files

Upload New File

parent 95b833f1
No related branches found
No related tags found
1 merge request!1new laura folder
This commit is part of merge request !1. Comments created here will be created in the context of that merge request.
# Created by: Laura Birch - 201804337@post.au.dk
# Last reviewed: 01/08/2023
# This main script can be used to format data to BIDS. Note that each script might need changes to fit your data.
# Step 1-4 prepares the data and step 5-6 converts data to the BIDS format
#
# COMMENTS
# Step 1: Change variables 'basepath' and 'original_data_path' to fit your data
# Step 2: Skip if you already have these folders
# Step 5: Insert required info for the dataset description + choose the appropriate function for handling the participants.tsv
# Step 6: Insert task and acquisition, leave empty if none apply + insert all relevant info for the _eeg.json file
#region Imports
import os
import mne
from ntlab_create_folders import create_sourcedata_bids_folders
from ntlab_create_dataset_description_json import create_dataset_description_json
from ntlab_create_participants_subonly_tsv import create_participants_subonly_tsv
from ntlab_create_participants_excel_tsv import create_participants_excel_tsv
from ntlab_create_participants_json import create_participants_json
from ntlab_create_sessions_tsv import create_sessions_tsv
from ntlab_create_loaded_channels_tsv import create_loaded_channels_tsv
from ntlab_create_recording_data_json import create_recording_data_json
from ntlab_create_events_json import create_events_json
from ntlab_create_scoringfile_json import create_scoringFile_json
from ntlab_create_scoringfile_tsv import create_scoringFile_tsv
# import matlab.engine
#endregion
#region STEP 1: Change path name to relevant path
# Basepath is the path in which your data lies
# Sourcepath is the path for your 'sourcedata' folder
basepath = os.path.join('') # Add path for new BIDS folder
sourcepath = os.path.join(basepath, 'sourcedata')
original_data_path = os.path.join('') # Add path for folder with original data
#endregion
#region STEP 2: Create folders for sourcedata and bids
create_sourcedata_bids_folders(basepath, original_data_path)
#endregion
#region STEP 3: Makes a list of all subjects and a list of all their sessions from the folders in the sourcedata folder
# Create list of all subjects
subfolders = [folder for folder in os.listdir(sourcepath) if os.path.isdir(os.path.join(sourcepath, folder))]
nSubs=len(subfolders)
# Create list of all sessions for each subject
sesfolders=[]
for sub in subfolders:
subpath = os.path.join(basepath, sub)
try:
sesfolders.append([folder for folder in os.listdir(subpath) if os.path.isdir(os.path.join(subpath, folder))])
except:
print('No sessions for subject ' + sub)
#endregion
#region STEP 4: Create lists for each filetype such as EDF, Poly5, scoring files etc. These are found in the sourcedata folder
#Requirement: the original data have been moved to the sourcedata folder
all_edf_files = []
all_scoring_files = []
all_events_files = []
for iSub in range(len(subfolders)):
for iSes in range(len(sesfolders[iSub])):
sespath = os.path.join(sourcepath, subfolders[iSub], sesfolders[iSub][iSes])
edf_files = [os.path.join(sespath, file) for file in os.listdir(sespath) if file.endswith('.edf')]
scoring_files = [os.path.join(sespath, file) for file in os.listdir(sespath) if file.endswith('.txt')]
events_files = [os.path.join(sespath, file) for file in os.listdir(sespath) if file.endswith('events.txt')] # Might require other filename
counter = 0
for iEdf in range(len(edf_files)):
edfInfo = {}
edfInfo['sub'] = subfolders[iSub]
edfInfo['ses'] = sesfolders[iSub][iSes]
edfInfo['file'] = edf_files[iEdf]
all_edf_files.append(edfInfo)
for iScoring in range(len(scoring_files)):
scoringInfo = {}
scoringInfo['sub'] = subfolders[iSub]
scoringInfo['ses'] = sesfolders[iSub][iSes]
scoringInfo['file'] = scoring_files[iScoring]
all_scoring_files.append(scoringInfo)
for iEvents in range(len(events_files)):
eventsInfo = {}
eventsInfo['sub'] = subfolders[iSub]
eventsInfo['ses'] = sesfolders[iSub][iSes]
eventsInfo['file'] = events_files[iEdf]
all_events_files.append(eventsInfo)
#endregion
#region STEP 5: Create modality agnostic files
#### Create dataset description ####
dataset_description_path = basepath + '\dataset_description.json'
dataset_name = "" # Enter dataset name here
authors = [""] # Enter authors here
bids_version = "1.7.0"
dataset_type = "raw"
create_dataset_description_json(dataset_description_path, dataset_name, bids_version, dataset_type, authors)
#### Create participants files ####
participants_path_json = basepath + '\participants.json'
create_participants_json(participants_path_json)
participants_path_tsv = basepath + '\participants.tsv'
## A script for creating a .tsv file with only a participants id column
# create_participants_subonly_tsv(participants_path_tsv, subfolders)
## A script for loading participant data from an Excel file and creating a .tsv file with all the columns
# create_participants_excel_tsv(participants_path_tsv, subfolders)
#### Create sessions.tsv files for all subjects with column 'session id' ####
for sub in subfolders:
scoring_files_list = []
for ses in sesfolders[subfolders.index(sub)]:
scoring_files_list.append({'sub': sub, 'ses': ses})
subPath = os.path.join(basepath, sub, sub + "_sessions")
create_sessions_tsv(subPath + '.tsv', scoring_files_list)
#endregion
#region STEP 6: Create modality specific files
task = 'sleep' # Add task here
acq = '' # Add aquisition here
#### Create channels.tsv ####
for edf_file in all_edf_files:
# Extract subject and session from setfile name
sub, ses = edf_file['sub'], edf_file['ses']
# Read setfile and extract channel names and recording duration
edf_file = os.path.join(basepath, sub, ses, 'eeg', edf_file['file'])
edf_file = mne.io.read_raw_eeglab(edf_file['file'], preload=True)
channels = edf_file.ch_names
# Create task and acquisition type based on setfile name and create channels.tsv file
filename = sub + '_' + ses + '_task-' + task + '_acq-' + acq + '_channels.tsv'
filepath = os.path.join(basepath, sub, ses, 'eeg', filename)
create_loaded_channels_tsv(filepath, channels)
#### Create scoring_events.tsv and scoring_events.json ####
task = 'sleep' # Add task here
acq = 'scoring' # Add aquisition here
for scoring_file in all_scoring_files:
sub, ses = scoring_file['sub'], scoring_file['ses']
scoring_file = scoring_file['file']
filename = sub + '_' + ses + '_task-' + task + '_acq-' + acq + '_events.tsv'
filepath = os.path.join(basepath, sub, ses, 'eeg', filename)
create_scoringFile_tsv(filepath, scoring_file)
# Create one scoring_events.json file at top directory level applicable to all scoring_events.tsv files
create_scoringFile_json(os.path.join(basepath, 'task-' + task + '_acq-' + acq + '_events.json'))
#### Create events.json ####
# Create one events.json file at top directory level applicable to all events.tsv files
# See matlab script "extractEvents.m" for details on how the events.tsv files are created
create_events_json(os.path.join(basepath, 'task-' + task +'_events.json'))
#### Create eeg_recording_data file ####
eeg_dict = {
"TaskName": "",
"SamplingFrequency": 250,
"EEGReference": "average",
"PowerLineFrequency": 50,
"SoftwareFilters": "n/a",
"TaskDescription": "",
"InstitutionName": "",
"InstitutionalDepartmentName": "",
"InstitutionAddress": "",
"TriggerChannelCount": 1,
"EEGPlacementScheme": "",
"RecordingDuration": 0
}
for edf_file in edf_files:
# Extract subject and session from edfFile name
try:
sub, ses = edf_file.split('_')[0], edf_file.split('_')[1]
except:
sub = edf_file[:3].lower() + '-' + edf_file[3:6]
ses = edf_file[6:9].lower() + '-0' + edf_file[9:11]
# Create task, acquisition type and eeg placement scheme based on edfFile name
acq = '' # Add aquisition here
task = 'sleep' # Add task here
eeg_placement_scheme = '10/20' # Add scheme here
# Read edfFile and extract recording duration
edf_file = os.path.join(basepath, sub, ses, 'eeg', edf_file)
try:
edf_file = mne.io.read_raw_eeglab(edf_file, preload=True)
recording_duration = edf_file.tmax
except:
recording_duration = 0
# Add subject and session specific information to eeg_dict and create eeg_recording_data.json file
filename = sub + '_' + ses + '_task-' + task.lower() + '_acq-' + acq + '_eeg.json'
filepath = os.path.join(basepath, sub, ses, 'eeg', filename)
eeg_dict['TaskName'] = task
eeg_dict['TaskDescription'] = ''
eeg_dict['InstitutionName'] = ''
eeg_dict['InstitutionalDepartmentName'] = ''
eeg_dict['InstitutionAddress'] = ''
eeg_dict['EEGPlacementScheme'] = eeg_placement_scheme
eeg_dict['RecordingDuration'] = recording_duration
create_recording_data_json(filepath, eeg_dict)
#endregion
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment