#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
.. _RADIOMICS:
The **RADIOMICS** module enables the extraction of radiomic features using the PyRadiomics library (https://pyradiomics.readthedocs.io/en/latest/).
Options
-------
The **RADIOMICS** module can be used with the following options:
- ``verbose``: Enable or disable verbose mode.
- ``timer``: Enable or disable the timer to record execution time.
- ``inputFolder``: Path to the input folder containing images.
- ``outputFolder``: Path to the output folder where radiomics results will be saved.
- ``outputFolderSuffix``: Adds a suffix to the input folder name to create an output folder.
- ``log``: Path to a file for saving logs.
- ``new_log_file``: Create a new log file; if a file with the same name already exists, it will be overwritten.
- ``skip``: Path to a file listing subfolders inside the input folder to exclude from processing.
- ``multiprocessing``: Specify the number of cores to use for parallel processing.
- ``configs``: Path to a radiomics configuration file, which can contain multiple configurations for PyRadiomics. See details in the next section :ref:`Radiomics_configuration_file`.
- ``pyradiomics_config``: Specify a PyRadiomics configuration file. Use this instead of ``configs`` if multiple configurations are unnecessary or if using PyRadiomics-specific preprocessing options.
- ``image_filename``: Name of the image file used for radiomic analysis.
- ``mask_filename``: Name of the mask (segmentation) file used for radiomic analysis.
- ``radiomics_filename``: Name of the Excel file that will store radiomics results.
- ``stats_filename``: Name of an optional Excel file to store statistics on radiomic features. If not specified, this file will not be created.
- ``save_at_the_end``: Specify whether the Excel file should be created only after processing all patients. Disabled by default, so new lines are added to the Excel file after processing each patient.
Example Usage
-------------
Below is an example of how to use the **RADIOMICS** module:
.. code-block:: bash
RADIOMICS:
{
inputFolder: /path/to/NIFTI_folder
outputFolder: /path/to/radiomics_results
image_filename: img_111.nii.gz
mask_filename: msk_111.nii.gz
radiomics_filename: radiomics.xlsx
save_at_the_end: false
configs: /path/to/radiomics_config_file
log: /path/to/logs/radiomics.log
}
In this example:
- **inputFolder**: Specifies the folder containing the NIfTI images for radiomics analysis.
- **outputFolder**: Directory where radiomics results are stored.
- **image_filename**: Indicates the name of the image to be analyzed.
- **mask_filename**: Name of the mask file associated with the image.
- **radiomics_filename**: Designates the Excel file that will hold radiomics features extracted by PyRadiomics.
- **save_at_the_end**: If set to `false`, entries are written to the Excel file immediately after processing each patient.
- **configs**: Specifies the configuration file for PyRadiomics, allowing customization of feature extraction.
- **log**: Provides the path to the log file for recording the processing details.
"""
# Extract Radiomics features
#
# This script uses the PyRadiomics library to extract radiomics features using one or multiple configurations.
# The configurations are specified in a config file (see documentation for more details).
# Gabor decomposition is performed by this script and not by PyRadiomics.
# Usage:
# radiomics_multiprocessing.py -i <inputfolder> -o <outputfolder> [options]
#
#__author__ Florent Tixier
#__email__ tixier@jhu.edu
#
# Options:
# -h, --help Show this help message and exit
# -v, --verbose Enable verbose output (default: False)
# -i, --inputFolder <inputfolder> Input folder with images to analyze
# -o, --outputFolder <outputfolder> Output folder to save the results (default: ~/)
# -c, --config <configfile> File with a list of radiomics configurations for PyRadiomics (see CONFIGS_EXAMPLE)
# -p, --pyradiomics_config <file> A PyRadiomics configuration file (use instead of --config if only one configuration is needed)
# -I, --img_filename <filename> Name of images to analyze in the folder (default: img.nii.gz)
# -M, --msk_filename <filename> Name of masks to analyze in the folder (default: msk.nii.gz)
# -R, --radiomics_filename <filename> Name of the Excel file to save radiomics features (default: radiomics.xlsx)
# --stats_filename <filename> Name of the Excel file to save radiomics statistics (optional)
# -x Save Excel file with radiomics after processing all patients
# -S, --skip <skip file path> Path to file with filenames to skip
# --include <include file path> Path to file with filenames to include (default: include all)
# --log <log file path> Redirect stdout to a log file
# --new_log Overwrite previous log file
# -j, --n_jobs <number of jobs> Number of simultaneous jobs (default: 1)
#
# Help:
# radiomics_multiprocessing.py -h
import sys, getopt, os
from tqdm import tqdm
import glob
import logging
import pandas as pd
import math
from math import pi
import SimpleITK as sitk
import multiprocessing
from radiomics import featureextractor
import re
from datetime import datetime
from utils import eprint
from utils import hprint_msg_box
from utils import hprint
from utils import format_list_multiline
[docs]
def main(argv):
inpath = ''
outpath = '~/'
configFile = ''
configs=[] #list of configs for the radiomics feature extraction
pyrconfigFile = ''
img_filename= "img.nii.gz"
msk_filename="msk.nii.gz"
radiomics_filename = 'radiomics.xlsx'
verbose = False
n_jobs = 1
skip_file_name=''
skip_files=[]
include_file_name=''
include_files=[]
save_xlsx_at_the_end= False
features_df =pd.DataFrame() #dataframe for radiomics features
log = ''
new_log = False
stats_filename= ''
try:
opts, args = getopt.getopt(argv, "vhi:o:c:p:j:I:M:R:S:x",["log=","new_log","verbose","skip=","include=","help","config=","pyradiomics_config=","inputFolder=","outFolder=","n_jobs=","img_filename=","msk_filename=","radiomics_filename=","stats_filename="])
except getopt.GetoptError:
print('Usage: radiomics_multiprocessing.py -i <inputfolder> -o <outputfolder> -c <configfile>')
print('For help, use: radiomics_multiprocessing.py -h')
sys.exit(2)
for opt,arg in opts:
if opt in ("-h", "--help"):
print("NAME")
print("\tradiomics_multiprocessing.py\n")
print("SYNOPSIS")
print("\tradiomics_multiprocessing.py [-h|--help][-v|--verbose][-i|--inputFolder <inputfolder>] [-I|--img_filename <img_filename>] [-M|--msk_filename <msk_filename>] [-R|--radiomics_filename <radiomics_filename>] [--stats_filename <stats_filename>] [-o|--outFolder <outFolder>] -c <configfile> [-x] [-S|--skip <skip>] [--include <include>] [--log <log>] [-j|--n_jobs <n_jobs>]")
print("DESRIPTION")
print("\tExtract Radiomics features for patients in the input folder with configurations in the config file\n")
print("OPTIONS")
print("\t -h, --help: print this help page")
print("\t -v, --verbose: False by default")
print("\t -i, --inputFolder: input folder with images to analyze")
print("\t -I, --img_filename: name of images to analyze in the folder (default: img.nii.gz)")
print("\t -M, --msk_filename: name of images to analyze in the folder (default: msk.nii.gz)")
print("\t -R, --radiomics_filename: Name of the Excel file to save radiomics features (default: radiomics.xlsx)")
print("\t --stats_filename: Name of the Excel file to save radiomics features")
print("\t -o, --outFolder: Output folder to save the results (default: ~/)")
print("\t -c, --config: File with a list of radiomics configurations for pyradiomics (see CONFIGS_EXAMPLE)")
print("\t -p, --pyradiomics_config: A pyradiomics configuration file (to use instead of --config if there is no need of multiple configurations or to use preprocessing options of pyradiomics)")
print("\t -x: Save Excel file with radiomics after processing all the patients")
print("\t -S, --skip: Path to file with filenames to skip")
print("\t --include: Path to file with filenames to include (all files included by default)",flush=True)
print("\t --log: redirect stdout to a log file")
print("\t --new_log: overwrite previous log file")
print("\t -j, --n_jobs: Number of simultaneous jobs (default:1)")
sys.exit()
elif opt in ("-i", "--inputFolder"):
inpath = arg
elif opt in ("-o", "--outputFolder"):
outpath = arg
elif opt in ("-I", "--img_filename"):
img_filename = arg
elif opt in ("-M", "--msk_filename"):
msk_filename = arg
elif opt in ("-R", "--radiomics_filename"):
radiomics_filename = arg
elif opt in ("--stats_filename"):
stats_filename = arg
elif opt in ("-v", "--verbose"):
verbose = True
elif opt in ("-x"):
save_xlsx_at_the_end = True
elif opt in ("-j", "--n_jobs"):
n_jobs= int(arg)
elif opt in ("-c","--config"):
configFile = arg
elif opt in ("-p","--pyradiomics_config"):
pyrconfigFile = arg
elif opt in ("-S","--skip"):
skip_file_name= arg
elif opt in ("--include"):
include_file_name= arg
elif opt in ("--log"):
log= arg
elif opt in ("--new_log"):
new_log= True
# set level for all classes
logger = logging.getLogger("radiomics")
logger.setLevel(logging.ERROR)
if log != '':
if new_log:
f = open(log,'w+')
else:
f = open(log,'a+')
sys.stdout = f
if skip_file_name != '':
try:
file= open(skip_file_name, 'r')
skip_files = file.read().splitlines()
except:
print("ERROR! Unable to read the skip file")
if include_file_name != '':
try:
file= open(include_file_name, 'r')
include_files = file.read().splitlines()
except:
print("ERROR! Unable to read the include file",flush=True)
if verbose:
msg = (
f"Input folder: {inpath}\n"
f"Output folder: {outpath}\n"
f"Images name: {img_filename}\n"
f"Masks name: {msk_filename}\n"
f"Configuration file: {configFile}\n"
f"Pyradiomics Configuration file: {pyrconfigFile}\n"
f"Radiomics Excel file name: {radiomics_filename}\n"
f"Save Excel file at the end: {save_xlsx_at_the_end}\n"
)
if stats_filename != '':
msg += f"Save an additional Excel file with radiomics statistics in: {stats_filename}\n"
msg += (
f"Skip file: {skip_file_name}\n"
f"Files to skip: {format_list_multiline(skip_files,5)}\n"
f"Include file: {include_file_name}\n"
f"Files to include: {format_list_multiline(include_files,5)}\n"
f"Verbose: {verbose}\n"
f"n_jobs: {n_jobs}\n"
f"Log: {log}\n"
f"Overwrite previous log file: {str(new_log)}\n"
)
hprint_msg_box(msg=msg, indent=2, title=f"RADIOMICS {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
#create outpath directory if needed
if not os.path.exists(outpath):
os.makedirs(outpath)
if configFile != '':
try:
read_config_file(configFile,configs,verbose)
if verbose:
print("Config file",configFile, " was read with success",flush=True)
except:
print("\033[31mERROR! Invalid config file\033[0m",flush=True)
sys.exit()
elif pyrconfigFile == '':
print("A RADIOMICS_CONFIGS or a pyradiomics configuration file need to be specify",flush=True)
sys.exit()
if n_jobs == 1:
for patient in tqdm(glob.glob(inpath+"/*"),
ncols=100,
desc="Extract Radiomics",
bar_format="{l_bar}{bar} [time left: {remaining}, time spent: {elapsed}]",
colour="yellow"):
features_df=pd.concat([features_df,
extract_radiomics(patient,inpath,outpath,img_filename,msk_filename,configs,pyrconfigFile,features_df,radiomics_filename,save_xlsx_at_the_end,n_jobs,skip_files,include_files,verbose,log)],
axis=0)
if save_xlsx_at_the_end==True:
features_df.to_excel(os.path.join(outpath,radiomics_filename),index=False)
else:
with multiprocessing.Pool(n_jobs) as pool:
tqdm(pool.starmap(extract_radiomics,
[(patient,inpath,outpath,img_filename,msk_filename,configs,pyrconfigFile,features_df,radiomics_filename,save_xlsx_at_the_end,n_jobs,skip_files,include_files,verbose,log) for patient in glob.glob(inpath+"/*")]),
ncols=100,
desc="Extract Radiomics",
bar_format="{l_bar}{bar} [time left: {remaining}, time spent: {elapsed}]",
colour="yellow")
if merge_xlsx(outpath,radiomics_filename): #merge xslx file and delete temporary files (if success)
deleteTmp_xlsx(outpath)
if stats_filename != '':
radiomics_statistics(os.path.join(outpath,radiomics_filename), os.path.join(outpath,stats_filename), verbose, log)
#Take a string and try to parse it to a list, a float, a int or a bool
[docs]
def parse(i):
if i in ['True','true']: #Bool True
return True
elif i in ['False','false']: #Bool False
return False
else:
try: #int
return int(i)
except:
try: #float
return float(i)
except:
if 'pi' in i:
try:
return eval(i)
except:
return i
elif not i[0] == '[': #string
return i
else:
try: #list
split=i.split(',')
split[0]=split[0].strip("[")
split[len(split)-1]=split[len(split)-1].strip("]")
for j in range(len(split)):
split[j]=parse(split[j])
return split
except: #string
return i
[docs]
def read_config_file(config_File,configs,verbose):
config=pd.Series(dtype=object)
with open(config_File, 'r') as infile:
for raw_line in infile:
line=raw_line.strip()
if not line:
if 'configName' and 'imageType' in config.index: #a config needs to contain at least a config name and an image type
configs.append(config)
if verbose:
print("\033[1mThe following configuration was found in",config_File,"\033[0m",flush=True)
print(config,flush=True)
config=pd.Series(dtype=object)
continue
elif line[0]=='#':
continue
elif 'configName' in line:
line=line.strip('configName:')
line=line.replace(' ','')
line=line.replace('\t','')
config=pd.concat([config,pd.Series(line, index=["configName"])])
elif 'imageType' in line:
line=line.strip('imageType:')
line=line.replace(' ','')
line=line.replace('\t','')
config=pd.concat([config,pd.Series(line, index=["imageType"])])
else:
line=line.replace(' ','')
line=line.replace('\t','')
config=pd.concat([config,pd.Series(line.split(':')[1], index=[line.split(':')[0]])])
if 'configName' and 'imageType' in config.index: #a config needs to contain at least a config name and an image type
if verbose:
print("\033[1mThe following configuration was found in",config_File,"\033[0m",flush=True)
print(config,flush=True)
configs.append(config)
#function for gabor filtering
#This function should be used after cropping the image to reduce processing time
[docs]
def gaborFilterImg(img,params,ID,path="~/"):
#default values is missing parameters
if not 'verbose' in params.keys():
params['verbose']=False
if not 'save' in params.keys():
params['save']=False
if not 'padDistance' in params.keys():
params['padDistance']=10
if not 'size' in params.keys():
params['size']=8
if not 'freq' in params.keys():
params['freq']=0.5
if not 'angle' in params.keys():
params['angle']=0
hsize=params['size']*0.5
vecDirection= (math.cos(params['angle']), -math.sin(params['angle']), math.sin(params['angle']), math.cos(params['angle']))
vecOrigin= (hsize-hsize*(math.cos(params['angle'])-math.sin(params['angle'])),hsize-hsize*(math.sin(params['angle'])+math.cos(params['angle'])))
GaborKernel = sitk.GaborImageSource()
GaborKernel.SetOutputPixelType(sitk.sitkFloat32)
GaborKernel.SetDirection(vecDirection)
GaborKernel.SetOrigin(vecOrigin)
GaborKernel.SetSize([params['size']]*2)
GaborKernel.SetSigma([params['size']*.2]*2)
GaborKernel.SetMean([hsize]*2)
GaborKernel.SetFrequency(params['freq'])
KernelImg=GaborKernel.Execute()
if params['save']:
if not os.path.exists(os.path.join(path,ID)):
os.makedirs(os.path.join(path,ID))
sitk.WriteImage(KernelImg, os.path.join(path,ID,"GaborKernel.nii.gz"))
GaborFilter=sitk.ConvolutionImageFilter()
img = sitk.Cast(img, sitk.sitkFloat32)
GabImg = img
if params['verbose']:
for k in tqdm(range(img.GetSize()[2]), ncols = 100, desc="Apply gabor filter", bar_format="{l_bar}{bar} [ time left: {remaining}, time spent: {elapsed}]"):
try:
GabImg[:,:,k]=GaborFilter.Execute(img[:,:,k], KernelImg)
except Exception as e:
print(e,flush=True)
else:
for k in range(img.GetSize()[2]):
try:
GabImg[:,:,k]=GaborFilter.Execute(img[:,:,k], KernelImg)
except Exception as e:
print(e,flush=True)
if params['save']:
if not os.path.exists(os.path.join(path,ID)):
os.makedirs(os.path.join(path,ID))
sitk.WriteImage(GabImg, os.path.join(path,ID,"Gabor_img.nii.gz"))
return GabImg
#merge temporary excel file in one file and delete temporary files
[docs]
def merge_xlsx(path,radiomics_filename):
file_list = glob.glob(path+ "/.tmp___*.xlsx")
try:
excels=[pd.ExcelFile(name) for name in file_list]
frames = [x.parse(x.sheet_names[0],header=None,incex_col=None) for x in excels]
#remove headers row except for the first xlsx
frames[1:] = [df[1:] for df in frames[1:]]
combined=pd.concat(frames)
except:
print("\033[31mERROR! Excel files were not read correctly\033[0m",flush=True)
try:
combined.to_excel(os.path.join(path,radiomics_filename), header=False, index=False)
print("Excel files were merged with success",flush=True)
return 1
except:
print("\033[31mERROR! Excel files were not merged\033[0m",flush=True)
return -1
#Save radiomics statistics
[docs]
def radiomics_statistics(xlsx_input_file, xlsx_output_file, verbose, log):
if log != '':
f = open(log, 'a+')
sys.stdout = f
try:
df = pd.read_excel(xlsx_input_file)
# Define columns to exclude
exclude_pattern = r'^(patientID|sub_Analysis)|diagnostics'
radiomics_columns = [col for col in df.columns if not re.match(exclude_pattern, col)]
# Calculate statistics for the entire dataset
column_stats = df[radiomics_columns].describe()
statistic_names = column_stats.index.tolist()
statistic_df = pd.DataFrame(columns=df.columns)
statistic_df = statistic_df.rename(columns={'patientID': 'statistics'})
statistic_df['statistics'] = statistic_names
statistic_df['sub_Analysis'] = 'all' # Set 'all' for the overall statistics
statistic_df.index = column_stats.index
statistic_df[radiomics_columns] = column_stats
# Handle sub_Analysis-specific statistics
unique_values = df['sub_Analysis'].unique()
for value in unique_values:
if verbose:
print(f"Calculating statistics for sub_Analysis = {value}", flush=True)
# Filter the dataframe by the specific sub_Analysis value
subset_df = df[df['sub_Analysis'] == value]
subset_stats = subset_df[radiomics_columns].describe()
# Prepare a new dataframe for this subset's statistics
subset_stat_df = pd.DataFrame(columns=df.columns)
subset_stat_df = subset_stat_df.rename(columns={'patientID': 'statistics'})
subset_stat_df['statistics'] = statistic_names
subset_stat_df['sub_Analysis'] = value # Set the current sub_Analysis value
subset_stat_df.index = subset_stats.index
subset_stat_df[radiomics_columns] = subset_stats
# Append the subset statistics to the main statistics dataframe
statistic_df = pd.concat([statistic_df, subset_stat_df], ignore_index=True)
# Check if output file exists, and create a timestamped version if it does
if os.path.exists(xlsx_output_file):
timestamp = datetime.now().strftime('%Y%m%d%H%M%S')
base_filename = os.path.basename(xlsx_output_file)
filename, extension = os.path.splitext(base_filename)
new_filename = f"{filename}_{timestamp}{extension}"
if verbose:
print("\033[33mWARNING!",xlsx_output_file, "already exists, results will be saved in", new_filename,"\033[0m", flush=True)
xlsx_output_file = os.path.join(os.path.dirname(xlsx_output_file), new_filename)
# Save the final statistics dataframe to Excel
try:
statistic_df.to_excel(xlsx_output_file, index=False)
hprint("Radiomics statistics saved", xlsx_output_file)
except:
print("\033[31mERROR saving ", xlsx_output_file,"\033[0m", flush=True)
except Exception as e:
print("\033[31mERROR reading ", xlsx_input_file, ": ", str(e),"\033[0m", flush=True)
[docs]
def deleteTmp_xlsx(path):
file_list = glob.glob(path+ "/.tmp___*.xlsx")
for name in file_list:
os.remove(name)
if __name__ == "__main__":
main(sys.argv[1:])