io
pacsanini.io
special
#
The io module provides methods for parsing large quantities of DICOM files
that reside in large directories.
base_parser
#
The base_parser module provides methods that internalize the worker-consumer
pattern when parsing DICOM files.
parse_dir(src, parser, callback, callback_args=None, callback_kwargs=None, nb_threads=1, include_path=True)
#
Parse a DICOM directory and return the passed results into the
provided callback function.
The callback function is responsible for consuming the results of the parsed DICOM files.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
src |
Union[str, os.PathLike] |
The source DICOM path or directory to parse recursively. |
required |
parser |
Optional[pacsanini.parse.DicomTagGroup] |
The tags to get the DICOM tag values from. If this is None, the results passed to the callback function will not be a dict containing a "dicom" key whose value will be the corresponding pydicom.Dataset object. |
required |
callback |
Callable |
The callback functions to send results to for consumption. The first argument of the function should be reserved for the parsing result. |
required |
callback_args |
tuple |
Extra positional arguments to pass to the callback function. |
None |
callback_kwargs |
dict |
Extra keyword arguments to pass to the callback function. |
None |
nb_threads |
int |
The number of threads to use for the parsing of DICOM files. |
1 |
include_path |
bool |
If True, add a "dicom_path" key to the results dict. |
True |
Source code in pacsanini/io/base_parser.py
def parse_dir(
src: Union[str, os.PathLike],
parser: Optional[DicomTagGroup],
callback: Callable,
callback_args: tuple = None,
callback_kwargs: dict = None,
nb_threads: int = 1,
include_path: bool = True,
):
"""Parse a DICOM directory and return the passed results into the
provided callback function.
The callback function is responsible for consuming the results of
the parsed DICOM files.
Parameters
----------
src : Union[str, os.PathLike]
The source DICOM path or directory to parse recursively.
parser : Optional[DicomTagGroup]
The tags to get the DICOM tag values from. If this is None,
the results passed to the callback function will not be a dict
containing a "dicom" key whose value will be the corresponding
pydicom.Dataset object.
callback : Callable
The callback functions to send results to for consumption.
The first argument of the function should be reserved for
the parsing result.
callback_args : tuple
Extra positional arguments to pass to the callback function.
callback_kwargs : dict
Extra keyword arguments to pass to the callback function.
nb_threads : int
The number of threads to use for the parsing of DICOM files.
include_path : bool
If True, add a "dicom_path" key to the results dict.
"""
if not os.path.exists(src):
raise FileNotFoundError(f"'{src}' does not exist.")
if nb_threads < 1:
raise ValueError("nb_threads must be greater than 0")
if not callable(callback):
raise ValueError("callback must be a callable.")
try:
stop_working = threading.Event()
stop_consuming = threading.Event()
worker_queue: queue.Queue = queue.Queue()
consumer_queue: queue.Queue = queue.Queue()
consumer_thread = threading.Thread(
target=_thread_consumer,
args=(consumer_queue, stop_consuming, callback),
kwargs={"callback_args": callback_args, "callback_kwargs": callback_kwargs},
daemon=True,
)
consumer_thread.start()
threads = []
for _ in range(nb_threads):
thread = threading.Thread(
target=_thread_worker,
args=(parser, worker_queue, consumer_queue, stop_working),
kwargs={"include_path": include_path},
daemon=True,
)
threads.append(thread)
thread.start()
_enqueue_files(src, worker_queue)
finally:
stop_working.set()
for worker in threads:
worker.join()
stop_consuming.set()
consumer_thread.join()
df_parser
#
The df_parser module provides methods for parsing DICOM files
using the base_parser module and return data frame instances.
parse_dir2df(src, parser, nb_threads=1, include_path=True)
#
Parse a DICOM directory and return the parsed DICOM
tag results as a DataFrame.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
src |
Union[str, os.PathLike] |
The input file or DICOM directory to parse. |
required |
parser |
DicomTagGroup |
The DicomTagGroup instance specifying which DICOM tags to parse and how. |
required |
nb_threads |
int |
The number of threads to use when parsing DICOM files. The default is 1. |
1 |
include_path |
bool |
If True, add a "dicom_path" key to the parsed results. The default is True. |
True |
Returns:
| Type | Description |
|---|---|
DataFrame |
The parsed DICOM tag results as a DataFrame. |
Source code in pacsanini/io/df_parser.py
def parse_dir2df(
src: Union[str, PathLike],
parser: DicomTagGroup,
nb_threads: int = 1,
include_path: bool = True,
) -> pd.DataFrame:
"""Parse a DICOM directory and return the parsed DICOM
tag results as a DataFrame.
Parameters
----------
src : Union[str, PathLike]
The input file or DICOM directory to parse.
parser : DicomTagGroup
The DicomTagGroup instance specifying which DICOM
tags to parse and how.
nb_threads : int
The number of threads to use when parsing DICOM
files. The default is 1.
include_path : bool
If True, add a "dicom_path" key to the parsed results.
The default is True.
Returns
-------
pd.DataFrame
The parsed DICOM tag results as a DataFrame.
"""
results: list = []
parse_dir(
src,
parser,
_write_results,
callback_args=(results,),
nb_threads=nb_threads,
include_path=include_path,
)
return pd.DataFrame(results)
io_parsers
#
The io_parsers provides generic extensions of the base_parser
module methods that can be called and conveniently used by users.
parse_dir2csv(src, parser, dest, nb_threads=1, include_path=True, mode='w')
#
Parse a DICOM directory and write results to a CSV
file.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
src |
Union[str, os.PathLike] |
The DICOM file or directory to parse. |
required |
parser |
DicomTagGroup |
The DicomTagGroup instance specifying which DICOM tags to parse and how. |
required |
dest |
Union[str, os.PathLike, TextIO] |
The destination path to write the results to. |
required |
nb_threads |
int |
The number of threads to use when parsing DICOM files. The default is 1. |
1 |
include_path |
bool |
If True, add a "dicom_path" key to the parsed results. The default is True. |
True |
mode |
str |
Whether to write ("w") or append ("a") to the destination file. |
'w' |
Source code in pacsanini/io/io_parsers.py
def parse_dir2csv(
src: Union[str, PathLike],
parser: DicomTagGroup,
dest: Union[str, PathLike, TextIO],
nb_threads: int = 1,
include_path: bool = True,
mode: str = "w",
):
"""Parse a DICOM directory and write results to a CSV
file.
Parameters
----------
src : Union[str, PathLike]
The DICOM file or directory to parse.
parser : DicomTagGroup
The DicomTagGroup instance specifying which DICOM
tags to parse and how.
dest : Union[str, PathLike, TextIO]
The destination path to write the results to.
nb_threads : int
The number of threads to use when parsing DICOM
files. The default is 1.
include_path : bool
If True, add a "dicom_path" key to the parsed results.
The default is True.
mode : str
Whether to write ("w") or append ("a") to the
destination file.
"""
fieldnames = [tag.tag_alias for tag in parser.tags]
if include_path:
fieldnames.append("dicom_path")
if isinstance(dest, (str, PathLike)):
with open(dest, mode, newline="") as output:
reader = csv.DictWriter(output, fieldnames=fieldnames)
if mode == "w":
reader.writeheader()
parse_dir(
src,
parser,
_write_results,
callback_args=(reader,),
nb_threads=nb_threads,
include_path=include_path,
)
else:
reader = csv.DictWriter(dest, fieldnames=fieldnames)
if mode == "w":
reader.writeheader()
parse_dir(
src,
parser,
_write_results,
callback_args=(reader,),
nb_threads=nb_threads,
include_path=include_path,
)
parse_dir2json(src, parser, dest, nb_threads=1, include_path=True, mode='w')
#
Parse a DICOM directory and write results to a JSON
file.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
src |
Union[str, os.PathLike] |
The DICOM file or directory to parse. |
required |
parser |
DicomTagGroup |
The DicomTagGroup instance specifying which DICOM tags to parse and how. |
required |
dest |
Union[str, os.PathLike, TextIO] |
The destination path to write the results to. |
required |
nb_threads |
int |
The number of threads to use when parsing DICOM files. The default is 1. |
1 |
include_path |
bool |
If True, add a "dicom_path" key to the parsed results. The default is True. |
True |
mode |
str |
Whether to write ("w") or append ("a") to the destination file. |
'w' |
Source code in pacsanini/io/io_parsers.py
def parse_dir2json(
src: Union[str, PathLike],
parser: DicomTagGroup,
dest: Union[str, PathLike, TextIO],
nb_threads: int = 1,
include_path: bool = True,
mode: str = "w",
):
"""Parse a DICOM directory and write results to a JSON
file.
Parameters
----------
src : Union[str, PathLike]
The DICOM file or directory to parse.
parser : DicomTagGroup
The DicomTagGroup instance specifying which DICOM
tags to parse and how.
dest : Union[str, PathLike, TextIO]
The destination path to write the results to.
nb_threads : int
The number of threads to use when parsing DICOM
files. The default is 1.
include_path : bool
If True, add a "dicom_path" key to the parsed results.
The default is True.
mode : str
Whether to write ("w") or append ("a") to the
destination file.
"""
fieldnames = [tag.tag_alias for tag in parser.tags]
if include_path:
fieldnames.append("dicom_path")
results: list = []
parse_dir(
src,
parser,
_append_results,
callback_kwargs={"results_list": results},
nb_threads=nb_threads,
include_path=include_path,
)
if isinstance(dest, (str, PathLike)):
if mode == "a":
mode = "r+"
with open(dest, mode) as output:
if mode == "r+":
old_results = json.load(output.read())
results += old_results["dicom_tags"]
json.dump(
{"dicom_tags": results}, output, indent=2, default=_json_serializer
)
else:
json.dump({"dicom_tags": results}, dest, indent=2, default=_json_serializer)