Combine data files under different directories¶

For example:

Parent Folder
- Folder
- Folder
  - data1.csv
  - data2.csv
- Folder
  - Folder
    - data1.csv
    - data2.csv

This is a normal data file folder you would have when you collecting data from the experiments. Each run of the test may have different data files saved from each chanel. How do you extract all these data files to the parent folder and combine them together?

The following code might help you.

Import module first.

import os.path
import os
import numpy as np

Get the current working directory.

working_dir = os.getcwd()

You will walk down to all the directories under the current parent directory, extracting all files ends with the name you set. The datasets will be combined and renamed with the directory folder name where it extracted. All the datasets finally saved in a folder names "data".

# walking to the directory top-down
for root, dirs, files in os.walk(working_dir):
# preallocate file list
    file_list = []
    for filename in files:
         # match files that match this name
        if filename.endswith('sec_1-4_001.lvm'):
            D_SEC_1 = np.loadtxt(os.path.join(root, filename))
            file_list.append(os.path.join(root, filename)) 
        elif filename.endswith('sec_5-8_001.lvm'):
            D_SEC_2 = np.loadtxt(os.path.join(root, filename))
            file_list.append(os.path.join(root, filename)) 
        elif filename.endswith('sec_9-12_001.lvm'):
            D_SEC_3 = np.loadtxt(os.path.join(root, filename))
            file_list.append(os.path.join(root, filename)) 
        elif filename.endswith('sec_13-16_001.lvm'):
            D_SEC_4 = np.loadtxt(os.path.join(root, filename))
            file_list.append(os.path.join(root, filename)) 
        elif filename.endswith('sec_17-20_001.lvm'):
            D_SEC_5 = np.loadtxt(os.path.join(root, filename))
            file_list.append(os.path.join(root, filename))
            
    # save current directory's data in a list
    df_list = [np.loadtxt(file) for file in file_list]
    if df_list:
        # constrain all sec arrays are the same dimensions
        rows = min(len(D_SEC_1),len(D_SEC_2),len(D_SEC_3),len(D_SEC_4),len(D_SEC_5))
        D_SEC_1 = D_SEC_1[0:rows,0:5]
        D_SEC_2 = D_SEC_2[0:rows,0:5]
        D_SEC_3 = D_SEC_3[0:rows,0:5]
        D_SEC_4 = D_SEC_4[0:rows,0:5]
        D_SEC_5 = D_SEC_5[0:rows,0:5]
        
        # combine the SEC data from one test in order
        SEC = np.concatenate((D_SEC_1,D_SEC_2,D_SEC_3,D_SEC_4,D_SEC_5),axis = 1)
        
        # define data filename
        # get the directory names from parent to current directories
        temp = root.replace(working_dir,"")
        # remove first backslash
        temp = temp[1:]
        # replace slashes to "_"
        dirname = temp.replace("\\","_")
        # replace spaces to "_"
        dirname = dirname.replace(" ","_")