Skip to content

io

pacsanini.io special #

The io module provides methods for parsing large quantities of DICOM files

that reside in large directories.

base_parser #

The base_parser module provides methods that internalize the worker-consumer

pattern when parsing DICOM files.

parse_dir(src, parser, callback, callback_args=None, callback_kwargs=None, nb_threads=1, include_path=True) #

Parse a DICOM directory and return the passed results into the

provided callback function.

The callback function is responsible for consuming the results of the parsed DICOM files.

Parameters:

Name Type Description Default
src Union[str, os.PathLike]

The source DICOM path or directory to parse recursively.

required
parser Optional[pacsanini.parse.DicomTagGroup]

The tags to get the DICOM tag values from. If this is None, the results passed to the callback function will not be a dict containing a "dicom" key whose value will be the corresponding pydicom.Dataset object.

required
callback Callable

The callback functions to send results to for consumption. The first argument of the function should be reserved for the parsing result.

required
callback_args tuple

Extra positional arguments to pass to the callback function.

None
callback_kwargs dict

Extra keyword arguments to pass to the callback function.

None
nb_threads int

The number of threads to use for the parsing of DICOM files.

1
include_path bool

If True, add a "dicom_path" key to the results dict.

True
Source code in pacsanini/io/base_parser.py
def parse_dir(
    src: Union[str, os.PathLike],
    parser: Optional[DicomTagGroup],
    callback: Callable,
    callback_args: tuple = None,
    callback_kwargs: dict = None,
    nb_threads: int = 1,
    include_path: bool = True,
):
    """Parse a DICOM directory and return the passed results into the
    provided callback function.

    The callback function is responsible for consuming the results of
    the parsed DICOM files.

    Parameters
    ----------
    src : Union[str, os.PathLike]
        The source DICOM path or directory to parse recursively.
    parser : Optional[DicomTagGroup]
        The tags to get the DICOM tag values from. If this is None,
        the results passed to the callback function will not be a dict
        containing a "dicom" key whose value will be the corresponding
        pydicom.Dataset object.
    callback : Callable
        The callback functions to send results to for consumption.
        The first argument of the function should be reserved for
        the parsing result.
    callback_args : tuple
        Extra positional arguments to pass to the callback function.
    callback_kwargs : dict
        Extra keyword arguments to pass to the callback function.
    nb_threads : int
        The number of threads to use for the parsing of DICOM files.
    include_path : bool
        If True, add a "dicom_path" key to the results dict.
    """
    if not os.path.exists(src):
        raise FileNotFoundError(f"'{src}' does not exist.")

    if nb_threads < 1:
        raise ValueError("nb_threads must be greater than 0")

    if not callable(callback):
        raise ValueError("callback must be a callable.")

    try:
        stop_working = threading.Event()
        stop_consuming = threading.Event()

        worker_queue: queue.Queue = queue.Queue()
        consumer_queue: queue.Queue = queue.Queue()

        consumer_thread = threading.Thread(
            target=_thread_consumer,
            args=(consumer_queue, stop_consuming, callback),
            kwargs={"callback_args": callback_args, "callback_kwargs": callback_kwargs},
            daemon=True,
        )
        consumer_thread.start()

        threads = []
        for _ in range(nb_threads):
            thread = threading.Thread(
                target=_thread_worker,
                args=(parser, worker_queue, consumer_queue, stop_working),
                kwargs={"include_path": include_path},
                daemon=True,
            )
            threads.append(thread)
            thread.start()

        _enqueue_files(src, worker_queue)
    finally:
        stop_working.set()
        for worker in threads:
            worker.join()

        stop_consuming.set()
        consumer_thread.join()

df_parser #

The df_parser module provides methods for parsing DICOM files

using the base_parser module and return data frame instances.

parse_dir2df(src, parser, nb_threads=1, include_path=True) #

Parse a DICOM directory and return the parsed DICOM

tag results as a DataFrame.

Parameters:

Name Type Description Default
src Union[str, os.PathLike]

The input file or DICOM directory to parse.

required
parser DicomTagGroup

The DicomTagGroup instance specifying which DICOM tags to parse and how.

required
nb_threads int

The number of threads to use when parsing DICOM files. The default is 1.

1
include_path bool

If True, add a "dicom_path" key to the parsed results. The default is True.

True

Returns:

Type Description
DataFrame

The parsed DICOM tag results as a DataFrame.

Source code in pacsanini/io/df_parser.py
def parse_dir2df(
    src: Union[str, PathLike],
    parser: DicomTagGroup,
    nb_threads: int = 1,
    include_path: bool = True,
) -> pd.DataFrame:
    """Parse a DICOM directory and return the parsed DICOM
    tag results as a DataFrame.

    Parameters
    ----------
    src : Union[str, PathLike]
        The input file or DICOM directory to parse.
    parser : DicomTagGroup
        The DicomTagGroup instance specifying which DICOM
        tags to parse and how.
    nb_threads : int
        The number of threads to use when parsing DICOM
        files. The default is 1.
    include_path : bool
        If True, add a "dicom_path" key to the parsed results.
        The default is True.

    Returns
    -------
    pd.DataFrame
        The parsed DICOM tag results as a DataFrame.
    """
    results: list = []

    parse_dir(
        src,
        parser,
        _write_results,
        callback_args=(results,),
        nb_threads=nb_threads,
        include_path=include_path,
    )

    return pd.DataFrame(results)

io_parsers #

The io_parsers provides generic extensions of the base_parser

module methods that can be called and conveniently used by users.

parse_dir2csv(src, parser, dest, nb_threads=1, include_path=True, mode='w') #

Parse a DICOM directory and write results to a CSV

file.

Parameters:

Name Type Description Default
src Union[str, os.PathLike]

The DICOM file or directory to parse.

required
parser DicomTagGroup

The DicomTagGroup instance specifying which DICOM tags to parse and how.

required
dest Union[str, os.PathLike, TextIO]

The destination path to write the results to.

required
nb_threads int

The number of threads to use when parsing DICOM files. The default is 1.

1
include_path bool

If True, add a "dicom_path" key to the parsed results. The default is True.

True
mode str

Whether to write ("w") or append ("a") to the destination file.

'w'
Source code in pacsanini/io/io_parsers.py
def parse_dir2csv(
    src: Union[str, PathLike],
    parser: DicomTagGroup,
    dest: Union[str, PathLike, TextIO],
    nb_threads: int = 1,
    include_path: bool = True,
    mode: str = "w",
):
    """Parse a DICOM directory and write results to a CSV
    file.

    Parameters
    ----------
    src : Union[str, PathLike]
        The DICOM file or directory to parse.
    parser : DicomTagGroup
        The DicomTagGroup instance specifying which DICOM
        tags to parse and how.
    dest : Union[str, PathLike, TextIO]
        The destination path to write the results to.
    nb_threads : int
        The number of threads to use when parsing DICOM
        files. The default is 1.
    include_path : bool
        If True, add a "dicom_path" key to the parsed results.
        The default is True.
    mode : str
        Whether to write ("w") or append ("a") to the
        destination file.
    """
    fieldnames = [tag.tag_alias for tag in parser.tags]
    if include_path:
        fieldnames.append("dicom_path")

    if isinstance(dest, (str, PathLike)):
        with open(dest, mode, newline="") as output:
            reader = csv.DictWriter(output, fieldnames=fieldnames)
            if mode == "w":
                reader.writeheader()

            parse_dir(
                src,
                parser,
                _write_results,
                callback_args=(reader,),
                nb_threads=nb_threads,
                include_path=include_path,
            )
    else:
        reader = csv.DictWriter(dest, fieldnames=fieldnames)
        if mode == "w":
            reader.writeheader()

        parse_dir(
            src,
            parser,
            _write_results,
            callback_args=(reader,),
            nb_threads=nb_threads,
            include_path=include_path,
        )

parse_dir2json(src, parser, dest, nb_threads=1, include_path=True, mode='w') #

Parse a DICOM directory and write results to a JSON

file.

Parameters:

Name Type Description Default
src Union[str, os.PathLike]

The DICOM file or directory to parse.

required
parser DicomTagGroup

The DicomTagGroup instance specifying which DICOM tags to parse and how.

required
dest Union[str, os.PathLike, TextIO]

The destination path to write the results to.

required
nb_threads int

The number of threads to use when parsing DICOM files. The default is 1.

1
include_path bool

If True, add a "dicom_path" key to the parsed results. The default is True.

True
mode str

Whether to write ("w") or append ("a") to the destination file.

'w'
Source code in pacsanini/io/io_parsers.py
def parse_dir2json(
    src: Union[str, PathLike],
    parser: DicomTagGroup,
    dest: Union[str, PathLike, TextIO],
    nb_threads: int = 1,
    include_path: bool = True,
    mode: str = "w",
):
    """Parse a DICOM directory and write results to a JSON
    file.

    Parameters
    ----------
    src : Union[str, PathLike]
        The DICOM file or directory to parse.
    parser : DicomTagGroup
        The DicomTagGroup instance specifying which DICOM
        tags to parse and how.
    dest : Union[str, PathLike, TextIO]
        The destination path to write the results to.
    nb_threads : int
        The number of threads to use when parsing DICOM
        files. The default is 1.
    include_path : bool
        If True, add a "dicom_path" key to the parsed results.
        The default is True.
    mode : str
        Whether to write ("w") or append ("a") to the
        destination file.
    """
    fieldnames = [tag.tag_alias for tag in parser.tags]
    if include_path:
        fieldnames.append("dicom_path")

    results: list = []
    parse_dir(
        src,
        parser,
        _append_results,
        callback_kwargs={"results_list": results},
        nb_threads=nb_threads,
        include_path=include_path,
    )

    if isinstance(dest, (str, PathLike)):
        if mode == "a":
            mode = "r+"
        with open(dest, mode) as output:
            if mode == "r+":
                old_results = json.load(output.read())
                results += old_results["dicom_tags"]
            json.dump(
                {"dicom_tags": results}, output, indent=2, default=_json_serializer
            )
    else:
        json.dump({"dicom_tags": results}, dest, indent=2, default=_json_serializer)