Source code for predict

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
.. _PREDICT:

The PREDICT module enables making predictions on new data using a model previously built with the `sklearn` package and saved as a pickle file.

Here is an example of how to save a model as a pickle file:

.. code-block:: python

    from sklearn import svm
    from sklearn import datasets
    import joblib
    
    iris = datasets.load_iris()
    X, y = iris.data, iris.target
    
    clf = svm.SVC()
    clf.fit(X, y)
    
    joblib.dump(clf, "model.pkl")

The PREDICT module can be used with the following options:

- ``verbose``: Enable or disable verbose mode.
- ``timer``: Enable or disable the timer to record execution time.
- ``log``: Specify the path to a file for saving logs.
- ``new_log_file``: Create a new log file: if a log file with the same name already exists, it will be overwritten.
- ``inputFolder``: Specify the path to the input folder.
- ``outputFolder``: Specify the path to the output folder.
- ``modelFolder``: Specify the path with data from a previously built model (optional, to use with mode: `External`)
- ``radiomics_filename``: Specify the name of the Excel file with the radiomics results.
- ``model_filename``: Specify the name of the pickle file with the model.
- ``predict_filename``: Specify the name of the Excel file where predictions will be saved.

Here is an example of how to use the PREDICT module:

.. code-block:: bash

    PREDICT
    {
        inputFolder: /path/to/radiomics_results
        # No output folder specified: save output in the input folder
        modelFolder: /path/to/radiomics_model
        radiomics_filename: radiomics.xlsx
        model_filename: model.pkl
        predict_filename: predict.xlsx
        log: /path/to/logs/predict.log
    }

In this example:

- **inputFolder**: Specifies the folder containing radiomics results for prediction.
- **modelFolder**: Specifies the folder containing the pre-trained model.
- **radiomics_filename**: Specifies the Excel file containing the radiomics features for prediction.
- **model_filename**: Specifies the pickle file containing the saved model.
- **predict_filename**: Specifies the Excel file where predictions will be saved.
- **log**: Specifies a path for the log file.

"""

# Make predictions on new data using a pre-trained sklearn model.
# 
# Usage:
#     predict.py -i <inputFolder> --radiomicsFile <radiomics excel file> -m <modelFolder> --modelFile <model.pkl>
# 
# Options:
#     -h, --help                       Show this help message and exit
#     -v, --verbose                    Enable verbose output (default: False)
#     -i, --inputFolder <inputFolder>  Input folder containing radiomics features
#     -o, --outputFolder <outputFolder> Output folder to save prediction results
#     -m, --modelFolder <modelFolder>  Folder containing the saved model file (default: model.pkl)
#     -r, --radiomicsFile <radiomicsFile> Name of the Excel file with radiomics data to predict
#     -p, --predictFile <predictFile>  Name of the Excel file to save predictions (default: predicted.xlsx)
#     -M, --modelFile <modelFile>      Name of the pickle file with the sklearn model
#     --log <logFile>                  Redirect stdout to a log file
#     --new_log                        Overwrite previous log file if it exists
#
# Help:
#     predict.py -h

import sys, getopt, os
import joblib
import pandas as pd
from datetime import datetime
from utils import hprint_msg_box

[docs] def main(argv): modelpath = '' inpath = '' outpath = '' radiomics_filename = 'radiomics.xlsx' model_filename = 'model.pkl' prediction_filename='predicted.xlsx' verbose = False log = '' new_log = False try: opts, args = getopt.getopt(argv, "vhi:o:m:M:r:p:",["log=","new_log","verbose","help","radiomicsFile=","predictFile=","modelFile=","inputFolder=","outFolder=","modelFolder="]) except getopt.GetoptError: print('predict.py -i <inputFolder> --radiomicsFile <radiomics excel file> -m <modelFolder> --modelFile <model.pkl>') sys.exit(2) for opt,arg in opts: if opt in ("-h", "--help"): print("NAME") print("\tpredict.py\n") print("SYNOPSIS") print("\predict.py [-h|--help][-v|--verbose][--log <logFile>][-i|--inputFolder <inputfolder>][-o|--outputFolder <outputFolder>][-r|--radiomicsFile <radiomicsFile>]\n") print("DESRIPTION") print("\tMake prediction on new data\n") print("OPTIONS") print("\t -h, --help: print this help page") print("\t -v, --verbose: False by default") print("\t -i, --inputFolder: input folder with radiomics and batch file") print("\t -o, --outFolder: output folder to save radiomics harmonization results") print("\t -m, --modelFolder: folder with model results (to be use with mode='writeEstimates_newData' or 'readEstimates')") print("\t -r, --radiomicsFile: name of the excel file with radiomics results") print("\t -p, --predictFile: name of the excel file to save prediction") print("\t -M, --modelFile: name of the pickel file with sklearn model to apply to new data") print("\t --log: redirect stdout to a log file") print("\t --new_log: overwrite previous log file", flush=True) sys.exit() elif opt in ("-i", "--inputFolder"): inpath = arg elif opt in ("-o", "--outputFolder"): outpath = arg elif opt in ("-m", "--modelFolder"): modelpath = arg elif opt in ("-r", "--radiomicsFile"): radiomics_filename = arg elif opt in ("-p", "--predictFile"): prediction_filename = arg elif opt in ("-M", "--modelFile"): model_filename = arg elif opt in ("-v", "--verbose"): verbose = True elif opt in ("--log"): log= arg elif opt in ("--new_log"): new_log= True if log != '': if new_log: f = open(log,'w+') else: f = open(log,'a+') sys.stdout = f if outpath == '': outpath = inpath if verbose: msg = ( f"Input folder: {inpath}\n" f"Output folder: {outpath}\n" f"Model folder: {modelpath}\n" f"Radiomics file: {radiomics_filename}\n" f"Model file: {model_filename}\n" f"Prediction file: {prediction_filename}\n" f"Verbose: {verbose}\n" f"Overwrite previous log file: {str(new_log)}\n" f"Log: {log}\n" ) hprint_msg_box(msg=msg, indent=2, title=f"PREDICT {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") #IMPORT MODEL if verbose: print("Import model",os.path.join(modelpath,model_filename),flush=True) try: #IMPORT MODEL that have been saved using the following command: #model.fit(X,y) #joblib.dump(model,"model.pkl") model=joblib.load(os.path.join(modelpath,model_filename)) except Exception as e: print(f"\033[31mERROR:\033[0m{e}",flush=True) #IMPORT RADIOMICS if verbose: print("Import radiomics",os.path.join(inpath,radiomics_filename),flush=True) try: df = pd.read_excel(os.path.join(inpath,radiomics_filename)) except Exception as e: print(f"\033[31mERROR:\033[0m{e}",flush=True) #SELECT RADIOMICS FEATURES try: df_selected=df[model.feature_names_in_] #sklearn model except: try: df_selected=df[model.feature_name()] #lightGBM except Exception as e: print(f"\033[31mERROR:\033[0m{e}",flush=True) #MAKE PREDICTION if verbose: print("Apply model on new data",flush=True) try: pred=model.predict(df_selected) except Exception as e: print(f"\033[31mERROR:\033[0m{e}",flush=True) #SAVE PREDICTION if verbose: print("Save prediction in",os.path.join(outpath,prediction_filename),flush=True) try: pd.concat([df[['patientID','sub_Analysis']],pd.DataFrame(pred, columns=['predictions'])],axis=1).to_excel(os.path.join(outpath,prediction_filename),index=False) except Exception as e: print(f"\033[31mERROR:{e}\033[0m",flush=True)
if __name__ == "__main__": main(sys.argv[1:])