io

`pacsanini.io` `special` #

The io module provides methods for parsing large quantities of DICOM files

that reside in large directories.

`base_parser` #

The base_parser module provides methods that internalize the worker-consumer

pattern when parsing DICOM files.

`parse_dir(src, parser, callback, callback_args=None, callback_kwargs=None, nb_threads=1, include_path=True)` #

Parse a DICOM directory and return the passed results into the

provided callback function.

The callback function is responsible for consuming the results of the parsed DICOM files.

Parameters:

Name	Type	Description	Default
`src`	`Union[str, os.PathLike]`	The source DICOM path or directory to parse recursively.	required
`parser`	`Optional[pacsanini.parse.DicomTagGroup]`	The tags to get the DICOM tag values from. If this is None, the results passed to the callback function will not be a dict containing a "dicom" key whose value will be the corresponding pydicom.Dataset object.	required
`callback`	`Callable`	The callback functions to send results to for consumption. The first argument of the function should be reserved for the parsing result.	required
`callback_args`	`tuple`	Extra positional arguments to pass to the callback function.	`None`
`callback_kwargs`	`dict`	Extra keyword arguments to pass to the callback function.	`None`
`nb_threads`	`int`	The number of threads to use for the parsing of DICOM files.	`1`
`include_path`	`bool`	If True, add a "dicom_path" key to the results dict.	`True`

Source code in pacsanini/io/base_parser.py

def parse_dir(
    src: Union[str, os.PathLike],
    parser: Optional[DicomTagGroup],
    callback: Callable,
    callback_args: tuple = None,
    callback_kwargs: dict = None,
    nb_threads: int = 1,
    include_path: bool = True,
):
    """Parse a DICOM directory and return the passed results into the
    provided callback function.

    The callback function is responsible for consuming the results of
    the parsed DICOM files.

    Parameters
    ----------
    src : Union[str, os.PathLike]
        The source DICOM path or directory to parse recursively.
    parser : Optional[DicomTagGroup]
        The tags to get the DICOM tag values from. If this is None,
        the results passed to the callback function will not be a dict
        containing a "dicom" key whose value will be the corresponding
        pydicom.Dataset object.
    callback : Callable
        The callback functions to send results to for consumption.
        The first argument of the function should be reserved for
        the parsing result.
    callback_args : tuple
        Extra positional arguments to pass to the callback function.
    callback_kwargs : dict
        Extra keyword arguments to pass to the callback function.
    nb_threads : int
        The number of threads to use for the parsing of DICOM files.
    include_path : bool
        If True, add a "dicom_path" key to the results dict.
    """
    if not os.path.exists(src):
        raise FileNotFoundError(f"'{src}' does not exist.")

    if nb_threads < 1:
        raise ValueError("nb_threads must be greater than 0")

    if not callable(callback):
        raise ValueError("callback must be a callable.")

    try:
        stop_working = threading.Event()
        stop_consuming = threading.Event()

        worker_queue: queue.Queue = queue.Queue()
        consumer_queue: queue.Queue = queue.Queue()

        consumer_thread = threading.Thread(
            target=_thread_consumer,
            args=(consumer_queue, stop_consuming, callback),
            kwargs={"callback_args": callback_args, "callback_kwargs": callback_kwargs},
            daemon=True,
        )
        consumer_thread.start()

        threads = []
        for _ in range(nb_threads):
            thread = threading.Thread(
                target=_thread_worker,
                args=(parser, worker_queue, consumer_queue, stop_working),
                kwargs={"include_path": include_path},
                daemon=True,
            )
            threads.append(thread)
            thread.start()

        _enqueue_files(src, worker_queue)
    finally:
        stop_working.set()
        for worker in threads:
            worker.join()

        stop_consuming.set()
        consumer_thread.join()

`df_parser` #

The df_parser module provides methods for parsing DICOM files

using the base_parser module and return data frame instances.

`parse_dir2df(src, parser, nb_threads=1, include_path=True)` #

Parse a DICOM directory and return the parsed DICOM

tag results as a DataFrame.

Parameters:

Name	Type	Description	Default
`src`	`Union[str, os.PathLike]`	The input file or DICOM directory to parse.	required
`parser`	`DicomTagGroup`	The DicomTagGroup instance specifying which DICOM tags to parse and how.	required
`nb_threads`	`int`	The number of threads to use when parsing DICOM files. The default is 1.	`1`
`include_path`	`bool`	If True, add a "dicom_path" key to the parsed results. The default is True.	`True`

Returns:

Type	Description
`DataFrame`	The parsed DICOM tag results as a DataFrame.

Source code in pacsanini/io/df_parser.py

def parse_dir2df(
    src: Union[str, PathLike],
    parser: DicomTagGroup,
    nb_threads: int = 1,
    include_path: bool = True,
) -> pd.DataFrame:
    """Parse a DICOM directory and return the parsed DICOM
    tag results as a DataFrame.

    Parameters
    ----------
    src : Union[str, PathLike]
        The input file or DICOM directory to parse.
    parser : DicomTagGroup
        The DicomTagGroup instance specifying which DICOM
        tags to parse and how.
    nb_threads : int
        The number of threads to use when parsing DICOM
        files. The default is 1.
    include_path : bool
        If True, add a "dicom_path" key to the parsed results.
        The default is True.

    Returns
    -------
    pd.DataFrame
        The parsed DICOM tag results as a DataFrame.
    """
    results: list = []

    parse_dir(
        src,
        parser,
        _write_results,
        callback_args=(results,),
        nb_threads=nb_threads,
        include_path=include_path,
    )

    return pd.DataFrame(results)

`io_parsers` #

The io_parsers provides generic extensions of the base_parser

module methods that can be called and conveniently used by users.

`parse_dir2csv(src, parser, dest, nb_threads=1, include_path=True, mode='w')` #

Parse a DICOM directory and write results to a CSV

file.

Parameters:

Name	Type	Description	Default
`src`	`Union[str, os.PathLike]`	The DICOM file or directory to parse.	required
`parser`	`DicomTagGroup`	The DicomTagGroup instance specifying which DICOM tags to parse and how.	required
`dest`	`Union[str, os.PathLike, TextIO]`	The destination path to write the results to.	required
`nb_threads`	`int`	The number of threads to use when parsing DICOM files. The default is 1.	`1`
`include_path`	`bool`	If True, add a "dicom_path" key to the parsed results. The default is True.	`True`
`mode`	`str`	Whether to write ("w") or append ("a") to the destination file.	`'w'`

Source code in pacsanini/io/io_parsers.py

def parse_dir2csv(
    src: Union[str, PathLike],
    parser: DicomTagGroup,
    dest: Union[str, PathLike, TextIO],
    nb_threads: int = 1,
    include_path: bool = True,
    mode: str = "w",
):
    """Parse a DICOM directory and write results to a CSV
    file.

    Parameters
    ----------
    src : Union[str, PathLike]
        The DICOM file or directory to parse.
    parser : DicomTagGroup
        The DicomTagGroup instance specifying which DICOM
        tags to parse and how.
    dest : Union[str, PathLike, TextIO]
        The destination path to write the results to.
    nb_threads : int
        The number of threads to use when parsing DICOM
        files. The default is 1.
    include_path : bool
        If True, add a "dicom_path" key to the parsed results.
        The default is True.
    mode : str
        Whether to write ("w") or append ("a") to the
        destination file.
    """
    fieldnames = [tag.tag_alias for tag in parser.tags]
    if include_path:
        fieldnames.append("dicom_path")

    if isinstance(dest, (str, PathLike)):
        with open(dest, mode, newline="") as output:
            reader = csv.DictWriter(output, fieldnames=fieldnames)
            if mode == "w":
                reader.writeheader()

            parse_dir(
                src,
                parser,
                _write_results,
                callback_args=(reader,),
                nb_threads=nb_threads,
                include_path=include_path,
            )
    else:
        reader = csv.DictWriter(dest, fieldnames=fieldnames)
        if mode == "w":
            reader.writeheader()

        parse_dir(
            src,
            parser,
            _write_results,
            callback_args=(reader,),
            nb_threads=nb_threads,
            include_path=include_path,
        )

`parse_dir2json(src, parser, dest, nb_threads=1, include_path=True, mode='w')` #

Parse a DICOM directory and write results to a JSON

file.

Parameters:

Name	Type	Description	Default
`src`	`Union[str, os.PathLike]`	The DICOM file or directory to parse.	required
`parser`	`DicomTagGroup`	The DicomTagGroup instance specifying which DICOM tags to parse and how.	required
`dest`	`Union[str, os.PathLike, TextIO]`	The destination path to write the results to.	required
`nb_threads`	`int`	The number of threads to use when parsing DICOM files. The default is 1.	`1`
`include_path`	`bool`	If True, add a "dicom_path" key to the parsed results. The default is True.	`True`
`mode`	`str`	Whether to write ("w") or append ("a") to the destination file.	`'w'`

Source code in pacsanini/io/io_parsers.py

def parse_dir2json(
    src: Union[str, PathLike],
    parser: DicomTagGroup,
    dest: Union[str, PathLike, TextIO],
    nb_threads: int = 1,
    include_path: bool = True,
    mode: str = "w",
):
    """Parse a DICOM directory and write results to a JSON
    file.

    Parameters
    ----------
    src : Union[str, PathLike]
        The DICOM file or directory to parse.
    parser : DicomTagGroup
        The DicomTagGroup instance specifying which DICOM
        tags to parse and how.
    dest : Union[str, PathLike, TextIO]
        The destination path to write the results to.
    nb_threads : int
        The number of threads to use when parsing DICOM
        files. The default is 1.
    include_path : bool
        If True, add a "dicom_path" key to the parsed results.
        The default is True.
    mode : str
        Whether to write ("w") or append ("a") to the
        destination file.
    """
    fieldnames = [tag.tag_alias for tag in parser.tags]
    if include_path:
        fieldnames.append("dicom_path")

    results: list = []
    parse_dir(
        src,
        parser,
        _append_results,
        callback_kwargs={"results_list": results},
        nb_threads=nb_threads,
        include_path=include_path,
    )

    if isinstance(dest, (str, PathLike)):
        if mode == "a":
            mode = "r+"
        with open(dest, mode) as output:
            if mode == "r+":
                old_results = json.load(output.read())
                results += old_results["dicom_tags"]
            json.dump(
                {"dicom_tags": results}, output, indent=2, default=_json_serializer
            )
    else:
        json.dump({"dicom_tags": results}, dest, indent=2, default=_json_serializer)

io

pacsanini.io special #

base_parser #

parse_dir(src, parser, callback, callback_args=None, callback_kwargs=None, nb_threads=1, include_path=True) #

df_parser #

parse_dir2df(src, parser, nb_threads=1, include_path=True) #

io_parsers #

parse_dir2csv(src, parser, dest, nb_threads=1, include_path=True, mode='w') #

parse_dir2json(src, parser, dest, nb_threads=1, include_path=True, mode='w') #

`pacsanini.io` `special` #

`base_parser` #

`parse_dir(src, parser, callback, callback_args=None, callback_kwargs=None, nb_threads=1, include_path=True)` #

`df_parser` #

`parse_dir2df(src, parser, nb_threads=1, include_path=True)` #

`io_parsers` #

`parse_dir2csv(src, parser, dest, nb_threads=1, include_path=True, mode='w')` #

`parse_dir2json(src, parser, dest, nb_threads=1, include_path=True, mode='w')` #