Skip to content

parse

pacsanini.parse #

The parse module contains the core methods and classes of the pacsanini

package that are used to obtain DICOM tag values from files.

DicomTag pydantic-model #

The DicomTag class represents a DICOM tag that you wish

to obtain a tag value from.

Attributes:

Name Type Description
tag_name Union[List[str], str]

A string or list of strings corresponding to a tag to parse.

tag_alias Optional[str]

An alternative name to give to the tag after it is parsed.

default_val Optional[Any]

If set and the tag_name did not find an existing value, return the default_val.

callback Optional[Callable[[Any], Any]]

If set, use the callback method to format the parsed DICOM tag result.

tag_value(self, data) #

Return the tag value of the given DICOM data.

Source code in pacsanini/parse.py
def tag_value(self, data: Dataset) -> Any:
    """Return the tag value of the given DICOM data."""
    return get_tag_value(
        data, self.tag_name, callback=self.callback, default_val=self.default_val
    )

validate_alias(v, values) classmethod #

Check for a tag alias. If not present, use the first

tag_name value.

Source code in pacsanini/parse.py
@validator("tag_alias", pre=True, always=True)
def validate_alias(cls, v, values):  # pylint: disable=no-self-argument,no-self-use
    """Check for a tag alias. If not present, use the first
    tag_name value.
    """
    if v:
        return v

    tag_name = values["tag_name"]
    if isinstance(tag_name, str):
        return tag_name
    return tag_name[0]

validate_callback(v) classmethod #

Validate the callback and load the method if

it is a string.

Source code in pacsanini/parse.py
@validator("callback", pre=True)
def validate_callback(cls, v):  # pylint: disable=no-self-argument,no-self-use
    """Validate the callback and load the method if
    it is a string.
    """
    if v is None or not isinstance(v, str):
        return v

    if ":" in v:
        from_imp, func = v.rsplit(":", 1)
        module = __import__(from_imp, fromlist=[""])
        v = getattr(module, func)
    else:
        v = _builtin_types[v]
    return v

DicomTagGroup pydantic-model #

Parse a group of DICOM tags.

from_json(path) classmethod #

Obtain a DicomTagGroup instance from a json file.

Source code in pacsanini/parse.py
@classmethod
def from_json(cls, path: str):
    """Obtain a DicomTagGroup instance from a json file."""
    with open(path) as in_:
        content = json.load(in_)
    return cls(**content)

from_yaml(path) classmethod #

Obtain a DicomTagGroup instance from a yaml file.

Source code in pacsanini/parse.py
@classmethod
def from_yaml(cls, path: str):
    """Obtain a DicomTagGroup instance from a yaml file."""
    with open(path) as in_:
        content = yaml.safe_load(in_.read())
    return cls(**content)

parse_dicom(self, dicom) #

Parse a DICOM file using the instance's tags.

Source code in pacsanini/parse.py
def parse_dicom(self, dicom: Union[str, Dataset]) -> Dict[str, Any]:
    """Parse a DICOM file using the instance's tags."""
    return parse_dicom(dicom, self.tags)

parse_dicoms(self, dicoms) #

Parse multiple DICOM files using the instance's tags.

Source code in pacsanini/parse.py
def parse_dicoms(
    self, dicoms: Iterable[Union[str, Dataset]]
) -> Generator[Dict[str, Any], None, None]:
    """Parse multiple DICOM files using the instance's tags."""
    for result in parse_dicoms(dicoms, self.tags):
        yield result

parse_dicoms2df(self, dicoms) #

Parse multiple DICOM files using the instance's tags

and return a DataFrame.

Source code in pacsanini/parse.py
def parse_dicoms2df(self, dicoms: Iterable[Union[str, Dataset]]) -> pd.DataFrame:
    """Parse multiple DICOM files using the instance's tags
    and return a DataFrame.
    """
    return pd.DataFrame(self.parse_dicoms(dicoms))

get_dicom_tag_value(data, tag_name, *, callback=None) #

Get the tag value of a particular DICOM tag. If the DICOM

tag could not be found, None is returned. Nested tags can also be retrieved -to do so, use the dot notation to indicate the nested tag to retrieve.

Tag names can have the following structures: * if the tag is found at the top level of the DICOM structure, its name suffices (eg: "SOPInstanceUID"). * if the tag is nested, you can use the following structure: "." (eg: "ViewCodeSequence.CodeValue"). You can access as many nested tags as you want. Using the dot separator will always cause the method to read the first element of the DICOM sequence. * if the tag is nested and the nested tag is not in the sequence's first element, you can use the bracket notation "[1]" (eg: "DeidentificationMethodCodeSequence[1]CodingSchemeDesignator"). Index errors will lead to None being returned.

Parameters:

Name Type Description Default
data Dataset

The DICOM data element to search in.

required
tag_name str

The name of the DICOM tag. This can be a nested tag.

required
callback Callable[[Any], Any]

A callback function to use to format the obtained DICOM tag value.

None

Returns:

Type Description
Any

The DICOM tag value or None if it was not found.

Source code in pacsanini/parse.py
def get_dicom_tag_value(
    data: Dataset, tag_name: str, *, callback: Callable[[Any], Any] = None
) -> Any:
    """Get the tag value of a particular DICOM tag. If the DICOM
    tag could not be found, None is returned. Nested tags can also
    be retrieved -to do so, use the dot notation to indicate
    the nested tag to retrieve.

    Tag names can have the following structures:
    * if the tag is found at the top level of the DICOM structure,
      its name suffices (eg: "SOPInstanceUID").
    * if the tag is nested, you can use the following structure:
      "<tag name>.<nested tag name>" (eg: "ViewCodeSequence.CodeValue").
      You can access as many nested tags as you want. Using the dot
      separator will always cause the method to read the first element
      of the DICOM sequence.
    * if the tag is nested and the nested tag is not in the sequence's
      first element, you can use the bracket notation "<tag name>[1]<nested tag name>"
      (eg: "DeidentificationMethodCodeSequence[1]CodingSchemeDesignator").
      Index errors will lead to None being returned.

    Parameters
    ----------
    data : Dataset
        The DICOM data element to search in.
    tag_name : str
        The name of the DICOM tag. This can be a nested tag.
    callback : Callable[[Any], Any]
        A callback function to use to format the obtained DICOM
        tag value.

    Returns
    -------
    Any
        The DICOM tag value or None if it was not found.
    """
    match = _SEQUENCE.search(tag_name)
    if "." in tag_name or match:
        if "." in tag_name:
            tag, sub_tag = tag_name.split(".", 1)
            seq_idx = 0
        else:
            tag = tag_name[: match.start(1) - 1]
            sub_tag = tag_name[match.end(1) + 1 :]
            seq_idx = int(match.group(1))

        try:
            seq = data.data_element(tag)
            if seq is None or seq.VM == 0:
                # ValueMultiplicity set to 0 indicates an invalid sequence.
                return None
            return get_dicom_tag_value(seq[seq_idx], sub_tag, callback=callback)
        except (KeyError, IndexError):
            return None

    try:
        data_el = data.data_element(tag_name)
    except KeyError:
        data_el = None
    else:
        data_el = data_el.value if data_el is not None else None
        if callback is not None and data_el is not None:
            with suppress(Exception):
                data_el = callback(data_el)

    return data_el

get_tag_value(data, tag_name, *, callback=None, default_val=None) #

Get the tag value of a particular DICOM tag. If the DICOM

tag could not be found, None is returned. Nested tags can also be retrieved -to do so, use the dot notation to indicate the nested tag to retrieve.

Parameters:

Name Type Description Default
data Dataset

The DICOM data element to search in.

required
tag_name Union[Iterable[str], str]

The name of the DICOM tag. This can be a nested tag.

required
callback Callable[[Any], Any]

A callback function to use to format the obtained DICOM tag value.

None
default_val Any

The default value to return if the tag value could not be retrieved.

None

Returns:

Type Description
Any

The DICOM tag value or None/the default value if it was not found.

Source code in pacsanini/parse.py
def get_tag_value(
    data: Dataset,
    tag_name: Union[Iterable[str], str],
    *,
    callback: Callable[[Any], Any] = None,
    default_val: Any = None,
) -> Any:
    """Get the tag value of a particular DICOM tag. If the DICOM
    tag could not be found, None is returned. Nested tags can also
    be retrieved -to do so, use the dot notation to indicate
    the nested tag to retrieve.

    Parameters
    ----------
    data : Dataset
        The DICOM data element to search in.
    tag_name : str
        The name of the DICOM tag. This can be a nested tag.
    callback : Callable[[Any], Any]
        A callback function to use to format the obtained DICOM
        tag value.
    default_val : Any
        The default value to return if the tag value
        could not be retrieved.

    Returns
    -------
    Any
        The DICOM tag value or None/the default value
        if it was not found.
    """
    tags_to_check = [tag_name] if isinstance(tag_name, str) else tag_name
    for tag in tags_to_check:
        tag_val = get_dicom_tag_value(data, tag, callback=callback)
        if tag_val:
            return tag_val

    if tag_val is None and default_val is not None:
        tag_val = default_val
    return tag_val

parse_dicom(dicom, tags) #

Parse a DICOM file using the requirements specified

by the tags.

If the tags parameter is an iterable of dict instances, they will be coerced to DicomTag instances.

Parameters:

Name Type Description Default
dicom Union[str, pydicom.dataset.Dataset]

The DICOM file to parse.

required
tags Iterable[Union[dict, pacsanini.parse.DicomTag]]

The tags to get the values of from the DICOM file.

required

Returns:

Type Description
Dict[str, Any]

A dict whose keys correspond to the tag aliases and whose values correspond to the DICOM tags' values.

Source code in pacsanini/parse.py
def parse_dicom(
    dicom: Union[str, Dataset], tags: Iterable[Union[dict, DicomTag]]
) -> Dict[str, Any]:
    """Parse a DICOM file using the requirements specified
    by the tags.

    If the tags parameter is an iterable of dict instances,
    they will be coerced to DicomTag instances.

    Parameters
    ----------
    dicom : Union[str, Dataset]
        The DICOM file to parse.
    tags : Iterable[Union[dict, DicomTag]]
        The tags to get the values of from the DICOM file.

    Returns
    -------
    Dict[str, Any]
        A dict whose keys correspond to the tag aliases
        and whose values correspond to the DICOM tags' values.
    """
    if isinstance(dicom, str):
        dicom = dcmread(dicom, stop_before_pixels=True)

    results = {}
    for tag in tags:
        if isinstance(tag, dict):
            tag = DicomTag(**tag)
        results[str(tag.tag_alias)] = tag.tag_value(dicom)

    return results

parse_dicoms(dicoms, tags) #

Parse multiple DICOM files using the specified

tags.

Parameters:

Name Type Description Default
dicoms Iterable[Union[str, pydicom.dataset.Dataset]]

The DICOM file to parse.

required
tags Iterable[Union[dict, pacsanini.parse.DicomTag]]

The tags to get the values of from the DICOM file.

required

Returns:

Type Description
Generator[Dict[str, Any], NoneType, NoneType]

Dicts whose keys correspond to the tag aliases and whose values correspond to the DICOM tags' values.

Source code in pacsanini/parse.py
def parse_dicoms(
    dicoms: Iterable[Union[str, Dataset]], tags: Iterable[Union[dict, DicomTag]]
) -> Generator[Dict[str, Any], None, None]:
    """Parse multiple DICOM files using the specified
    tags.

    Parameters
    ----------
    dicoms : Iterable[Union[str, Dataset]]
        The DICOM file to parse.
    tags : Iterable[Union[dict, DicomTag]]
        The tags to get the values of from the DICOM file.

    Yields
    ------
    Generator[Dict[str, Any], None, None]
        Dicts whose keys correspond to the tag aliases
        and whose values correspond to the DICOM tags' values.
    """
    for dcm in dicoms:
        yield parse_dicom(dcm, tags)