Skip to content

db

pacsanini.db.crud #

The crud module provides methods and classes that can be used to insert

single items (studies found from C-FIND requests or DICOM metadata) into a given database.

DBWrapper #

A wrapper class for the database connections. The purpose of this is

to be able to open database connections lazily inside a thread that may not be the application's main thread. It is recommended to use instances of this class inside a context manager.

Attributes:

Name Type Description
conn_uri str

The database connection URI.

create_tables bool

Whether to create tables when the connection is first established. The default is False.

debug bool

If True, echo SQL statements to the standard output. The default is False.

close(self) #

Close the instance's current session and engine if they are still open.

Source code in pacsanini/db/crud.py
def close(self):
    """Close the instance's current session and engine if they are still open."""
    if self.session is not None:
        self.session.close()
    if self.engine is not None:
        self.engine.dispose()

conn(self) #

Obtain a session instance

Source code in pacsanini/db/crud.py
@lru_cache(maxsize=1)
def conn(self) -> Session:
    """Obtain a session instance"""
    self.engine = create_engine(self.conn_uri)
    if self.create_tables:
        config = PacsaniniConfig(
            storage=StorageConfig(resources=self.conn_uri, directory="./")
        )
        from pacsanini.db.utils import (  # pylint: disable=import-outside-toplevel
            initialize_database,
        )

        initialize_database(config)
    DBSession = sessionmaker(bind=self.engine)
    self.session = DBSession()
    return self.session

add_found_study(session, dcm) #

Add study metadata to the database after a successfull C-FIND

operation.

Parameters:

Name Type Description Default
dcm Dataset

The retrieved Dataset instance resulting from a C-FIND operation.

required
session Session

The database session.

required
Source code in pacsanini/db/crud.py
def add_found_study(session: Session, dcm: Dataset) -> Optional[StudyFind]:
    """Add study metadata to the database after a successfull C-FIND
    operation.

    Parameters
    ----------
    dcm : Dataset
        The retrieved Dataset instance resulting from a C-FIND operation.
    session : Session
        The database session.
    """
    study_find = dcm2study_finding(dcm)

    try:
        session.add(study_find)
        session.commit()
    except exc.IntegrityError:
        session.rollback()
        return None
    else:
        return study_find

add_image(session, dcm, institution=None, filepath=None) #

Insert an image to the database. If the image belongs to a new patient, study, or

series, the relevant tables will also be updated. If the image already exists in the database (based on the SOPInstanceUID), the transaction will be rolled back.

Parameters:

Name Type Description Default
session Session

The database session to use for inserting the DICOM image into the database.

required
dcm Union[str, pydicom.dataset.Dataset]

The DICOM image to add to the database.

required
institution str

The institution that the DICOM image belongs to. The default is None.

None
filepath str

The DICOM image's filepath. The default is None.

None

Returns:

Type Description
Optional[pacsanini.db.models.Image]

The inserted Image object. If the insert was unsuccessfull, None is returned.

Source code in pacsanini/db/crud.py
def add_image(
    session: Session,
    dcm: Union[str, Dataset],
    institution: str = None,
    filepath: str = None,
) -> Optional[Image]:
    """Insert an image to the database. If the image belongs to a new patient, study, or
    series, the relevant tables will also be updated. If the image already exists in the
    database (based on the SOPInstanceUID), the transaction will be rolled back.

    Parameters
    ----------
    session : Session
        The database session to use for inserting the DICOM image into the database.
    dcm : Union[str, Dataset]
        The DICOM image to add to the database.
    institution : str
        The institution that the DICOM image belongs to. The default is None.
    filepath : str
        The DICOM image's filepath. The default is None.

    Returns
    -------
    Image
        The inserted Image object. If the insert was unsuccessfull, None
        is returned.
    """
    pat, study, series, image = dcm2dbmodels(
        dcm, institution=institution, filepath=filepath
    )

    try:
        session.add(pat)
        session.flush()
        pat_dbid = pat.id
    except exc.IntegrityError:
        session.rollback()
        pat_dbid = (
            session.query(Patient.id)
            .filter(Patient.patient_id == pat.patient_id)
            .first()[0]
        )

    study.patient_id = pat_dbid
    try:
        session.add(study)
        session.flush()
        study_dbid = study.id
    except exc.IntegrityError:
        session.rollback()
        study_dbid = (
            session.query(Study.id)
            .filter(Study.study_uid == study.study_uid)
            .first()[0]
        )

    series.study_id = study_dbid
    try:
        session.add(series)
        session.flush()
        series_dbid = series.id
    except exc.IntegrityError:
        session.rollback()
        series_dbid = (
            session.query(Series.id)
            .filter(Series.series_uid == series.series_uid)
            .first()[0]
        )

    image.series_id = series_dbid
    try:
        session.add(image)
        session.commit()
    except exc.IntegrityError:
        logger.warning("{image} already exists in the database. Rolling back commit...")
        session.rollback()
        return None
    else:
        return image

get_studies_to_move(session) #

Get a list of StudyFind instances that haven't been retrieved

according to their retrieved_on key.

Parameters:

Name Type Description Default
session Session

The database session to use.

required

Returns:

Type Description
List[pacsanini.db.models.StudyFind]

A list of StudyFind resources that should be moved.

Source code in pacsanini/db/crud.py
def get_studies_to_move(session: Session) -> List[StudyFind]:
    """Get a list of StudyFind instances that haven't been retrieved
    according to their `retrieved_on` key.

    Parameters
    ----------
    session : Session
        The database session to use.

    Returns
    -------
    List[StudyFind]
        A list of StudyFind resources that should be moved.
    """
    query = session.query(StudyFind).filter(StudyFind.retrieved_on == None)
    return query.all()

get_study_uids_to_move(session) #

Get a list of StudyInstanceUID values to retrieve.

Parameters:

Name Type Description Default
session Session

The database session to use.

required

Returns:

Type Description
List[str]

A list of StudyInstanceUID resources that should be moved.

Source code in pacsanini/db/crud.py
def get_study_uids_to_move(session: Session) -> List[str]:
    """Get a list of StudyInstanceUID values to retrieve.

    Parameters
    ----------
    session : Session
        The database session to use.

    Returns
    -------
    List[str]
        A list of StudyInstanceUID resources that should be moved.
    """
    return [study_find.study_uid for study_find in get_studies_to_move(session)]

update_retrieved_study(session, study_uid) #

Update a found study by setting its retrieved_on value to the current

date. If the relevant study was already retrieved, it will not be updated but the StudyFind instance will be returned. If the found study does not exist, None is returned.

Parameters:

Name Type Description Default
session Session

The database session to use.

required
study_uid str

The study instance uid to mark as retrieved.

required

Returns:

Type Description
Optional[pacsanini.db.models.StudyFind]

The StudyFind instance if it was found or updated. None otherwise.

Source code in pacsanini/db/crud.py
def update_retrieved_study(session: Session, study_uid: str) -> Optional[StudyFind]:
    """Update a found study by setting its retrieved_on value to the current
    date. If the relevant study was already retrieved, it will not be updated
    but the StudyFind instance will be returned. If the found study does not
    exist, None is returned.

    Parameters
    ----------
    session : Session
        The database session to use.
    study_uid : str
        The study instance uid to mark as retrieved.

    Returns
    -------
    Optional[StudyFind]
        The StudyFind instance if it was found or updated. None otherwise.
    """
    found_study: StudyFind = (
        session.query(StudyFind).filter(StudyFind.study_uid == study_uid).first()
    )
    if found_study is None:
        return None

    if found_study.retrieved_on is None:
        found_study.retrieved_on = datetime.utcnow()
        session.add(found_study)
        session.commit()
    return found_study

pacsanini.db.dcm2model #

The dcm2model module provides methods that can be used to convert pydicom.Dataset

instances to sqlalchemy instances.

dcm2dbmodels(dcm, institution=None, filepath=None) #

Convert a DICOM file into the different database models that will be used

to insert the DICOM data into the database.

Parameters:

Name Type Description Default
dcm Union[str, pydicom.dataset.Dataset]

The DICOM data to convert to a Patient, Study, Series, and Image instance.

required
institution str

If set, add a specified institution name to the Patient model. The default is None.

None
filepath str

If set, add the DICOM's filepath to the database. The default is None. If the input dcm parameter value is a string, filepath will be set to this.

None

Returns:

Type Description
Tuple[pacsanini.db.models.Patient, pacsanini.db.models.Study, pacsanini.db.models.Series, pacsanini.db.models.Image]

A 4-tuple corresponding to the image's

Source code in pacsanini/db/dcm2model.py
def dcm2dbmodels(
    dcm: Union[str, Dataset], institution: str = None, filepath: str = None
) -> Tuple[Patient, Study, Series, Image]:
    """Convert a DICOM file into the different database models that will be used
    to insert the DICOM data into the database.

    Parameters
    ----------
    dcm : Union[str, Dataset]
        The DICOM data to convert to a Patient, Study, Series, and Image instance.
    institution : str
        If set, add a specified institution name to the Patient
        model. The default is None.
    filepath : str
        If set, add the DICOM's filepath to the database. The default
        is None. If the input dcm parameter value is a string, filepath
        will be set to this.

    Returns
    -------
    Tuple[Patient, Study, Series, Image]
        A 4-tuple corresponding to the image's
    """
    if isinstance(dcm, str):
        filepath = dcm
        dcm = dcmread(dcm, stop_before_pixels=True)

    pat = dcm2patient(dcm, institution=institution)
    study = dcm2study(dcm)
    series = dcm2series(dcm)
    image = dcm2image(dcm, institution=institution, filepath=filepath)
    return pat, study, series, image

dcm2image(dcm, institution=None, filepath=None) #

Convert a DICOM file to a Image instance that can be inserted

in the database.

Parameters:

Name Type Description Default
dcm Dataset

The DICOM data to convert to a Image instance.

required
institution str

If set, add a specified institution name to the Image model. The default is None.

None
filepath str

If set, add the DICOM's filepath to the database. The default is None.

None

Returns:

Type Description
Image

The Image model.

Source code in pacsanini/db/dcm2model.py
def dcm2image(dcm: Dataset, institution: str = None, filepath: str = None) -> Image:
    """Convert a DICOM file to a Image instance that can be inserted
    in the database.

    Parameters
    ----------
    dcm : Dataset
        The DICOM data to convert to a Image instance.
    institution : str
        If set, add a specified institution name to the Image
        model. The default is None.
    filepath : str
        If set, add the DICOM's filepath to the database. The default
        is None.

    Returns
    -------
    Image
        The Image model.
    """
    tag_grp = DicomTagGroup(
        tags=[
            {"tag_name": "PatientID", "tag_alias": "patient_id"},
            {"tag_name": "StudyInstanceUID", "tag_alias": "study_uid"},
            {
                "tag_name": "StudyDate",
                "tag_alias": "study_date",
                "callback": str2datetime,
            },
            {"tag_name": "SeriesInstanceUID", "tag_alias": "series_uid"},
            {"tag_name": "Modality", "tag_alias": "modality"},
            {"tag_name": "SOPClassUID", "tag_alias": "sop_class_uid"},
            {"tag_name": "SOPInstanceUID", "tag_alias": "image_uid"},
            {"tag_name": "AcquisitionTime", "tag_alias": "acquisition_time"},
            {"tag_name": "Manufacturer", "tag_alias": "manufacturer"},
            {
                "tag_name": "ManufacturerModelName",
                "tag_alias": "manufacturer_model_name",
            },
        ]
    )
    data = tag_grp.parse_dicom(dcm)
    data["meta"] = dcm2dict(dcm, include_pixels=False)
    data["institution"] = institution
    data["filepath"] = filepath
    return Image(**data)

dcm2patient(dcm, institution=None) #

Convert a DICOM file to a Patient instance that can be inserted

in the database.

Parameters:

Name Type Description Default
dcm Dataset

The DICOM data to convert to a Patient instance.

required
institution str

If set, add a specified institution name to the Patient model. The default is None.

None

Returns:

Type Description
Patient

The Patient model.

Source code in pacsanini/db/dcm2model.py
def dcm2patient(dcm: Dataset, institution: str = None) -> Patient:
    """Convert a DICOM file to a Patient instance that can be inserted
    in the database.

    Parameters
    ----------
    dcm : Dataset
        The DICOM data to convert to a Patient instance.
    institution : str
        If set, add a specified institution name to the Patient
        model. The default is None.

    Returns
    -------
    Patient
        The Patient model.
    """
    tag_grp = DicomTagGroup(
        tags=[
            {"tag_name": "PatientID", "tag_alias": "patient_id"},
            {"tag_name": "PatientName", "tag_alias": "patient_name", "callback": str},
            {
                "tag_name": "PatientBirthDate",
                "tag_alias": "patient_birth_date",
                "callback": str2datetime,
            },
        ]
    )
    data = tag_grp.parse_dicom(dcm)
    data["institution"] = institution
    return Patient(**data)

dcm2series(dcm) #

Convert a DICOM file to a Series instance that can be inserted

in the database.

Parameters:

Name Type Description Default
dcm Dataset

The DICOM data to convert to a Series instance.

required

Returns:

Type Description
Series

The Series model.

Source code in pacsanini/db/dcm2model.py
def dcm2series(dcm: Dataset) -> Series:
    """Convert a DICOM file to a Series instance that can be inserted
    in the database.

    Parameters
    ----------
    dcm : Dataset
        The DICOM data to convert to a Series instance.

    Returns
    -------
    Series
        The Series model.
    """
    tag_grp = DicomTagGroup(
        tags=[
            {"tag_name": "SeriesInstanceUID", "tag_alias": "series_uid"},
            {"tag_name": "Modality", "tag_alias": "modality"},
        ]
    )
    data = tag_grp.parse_dicom(dcm)
    return Series(**data)

dcm2study(dcm) #

Convert a DICOM file to a Study instance that can be inserted

in the database.

Parameters:

Name Type Description Default
dcm Dataset

The DICOM data to convert to a Study instance.

required

Returns:

Type Description
Study

The Study model.

Source code in pacsanini/db/dcm2model.py
def dcm2study(dcm: Dataset) -> Study:
    """Convert a DICOM file to a Study instance that can be inserted
    in the database.

    Parameters
    ----------
    dcm : Dataset
        The DICOM data to convert to a Study instance.

    Returns
    -------
    Study
        The Study model.
    """
    tag_grp = DicomTagGroup(
        tags=[
            {"tag_name": "StudyInstanceUID", "tag_alias": "study_uid"},
            {
                "tag_name": "StudyDate",
                "tag_alias": "study_date",
                "callback": str2datetime,
            },
            {
                "tag_name": "PatientAge",
                "tag_alias": "patient_age",
                "callback": agestr2years,
                "default": -1,
            },
            {"tag_name": "AccessionNumber", "tag_alias": "accession_number"},
        ]
    )
    data = tag_grp.parse_dicom(dcm)
    return Study(**data)

dcm2study_finding(dcm) #

Convert a DICOM file to a StudyFind instance that can be inserted

in the database.

Parameters:

Name Type Description Default
dcm Dataset

The DICOM data to convert to a StudyFind instance.

required

Returns:

Type Description
StudyFind

The StudyFind model.

Source code in pacsanini/db/dcm2model.py
def dcm2study_finding(dcm: Dataset) -> StudyFind:
    """Convert a DICOM file to a StudyFind instance that can be inserted
    in the database.

    Parameters
    ----------
    dcm : Dataset
        The DICOM data to convert to a StudyFind instance.

    Returns
    -------
    StudyFind
        The StudyFind model.
    """
    tag_grp = DicomTagGroup(
        tags=[
            {"tag_name": "PatientName", "tag_alias": "patient_name", "callback": str},
            {"tag_name": "PatientID", "tag_alias": "patient_id"},
            {"tag_name": "StudyInstanceUID", "tag_alias": "study_uid"},
            {
                "tag_name": "StudyDate",
                "tag_alias": "study_date",
                "callback": str2datetime,
            },
            {"tag_name": "AccessionNumber", "tag_alias": "accession_number"},
        ]
    )
    data = tag_grp.parse_dicom(dcm)
    return StudyFind(**data)

pacsanini.db.parser #

The parser module module provides convenience methods for parsing DICOM files

and storing results into a given database.

parse_dir2sql(src, conn_uri, institution_name=None, nb_threads=1, create_tables=False) #

Parse a DICOM directory and persist the found results in the database

specified by the conn_uri parameter.

Parameters:

Name Type Description Default
src str

The DICOM directory to parse.

required
conn_uri str

The database's connection URI to use.

required
institution_name str

If specified, associate the parsed DICOM files with the name of an institution. If unset, this will default to unknwon followed by today's date in the YYYYMMDD format.

None
nb_threads int

The number of threads to use. This defaults to 1.

1
create_tables bool

If True, create the database tables before inserting the first parser result. The default is False.

False
Source code in pacsanini/db/parser.py
def parse_dir2sql(
    src: str,
    conn_uri: str,
    institution_name: str = None,
    nb_threads: int = 1,
    create_tables: bool = False,
):
    """Parse a DICOM directory and persist the found results in the database
    specified by the conn_uri parameter.

    Notes
    -----
    Unlike other parse_dir wrapper methods, this method does not use the DICOMTagParser
    instance. Parsed DICOM files will have basic DICOM tag metadata stored in traditional
    columns as well as the entire DICOM file (pixel data excluded) stored in JSON format.

    Parameters
    ----------
    src : str
        The DICOM directory to parse.
    conn_uri : str
        The database's connection URI to use.
    institution_name : str
        If specified, associate the parsed DICOM files with the name of an
        institution. If unset, this will default to unknwon followed by today's
        date in the YYYYMMDD format.
    nb_threads : int
        The number of threads to use. This defaults to 1.
    create_tables : bool
        If True, create the database tables before inserting the first
        parser result. The default is False.
    """
    if institution_name is None:
        institution_name = f"unknown_{datetime.now().strftime('%Y%m%d')}"

    with DBWrapper(conn_uri, create_tables=create_tables, debug=True) as wrapper:
        parse_dir(
            src,
            None,
            _inner_sql,
            nb_threads=nb_threads,
            callback_args=(wrapper, institution_name),
            include_path=True,
        )

pacsanini.db.utils #

Database utilities for managing/initializing the pacsanini database.

dump_database(session, output=None, tables=None) #

Dump the pacsanini database into CSV files. Each CSV file

corresponds to a database table.

Parameters:

Name Type Description Default
session Session

The database session to use for dumping data.

required
output str

If set, write all output files under the specified directory. If it doesn't exist, it will be created. The default is the current directory.

None
tables List[str]

Optional. If set, specify the tables to dump in CSV format.

None

Exceptions:

Type Description
ValueError

A ValueError is raised if a table name in the tables parameter does not correspond to an existing table.

Source code in pacsanini/db/utils.py
def dump_database(
    session: Session, output: str = None, tables: List[str] = None
) -> None:
    """Dump the pacsanini database into CSV files. Each CSV file
    corresponds to a database table.

    Parameters
    ----------
    session : Session
        The database session to use for dumping data.
    output : str
        If set, write all output files under the specified directory.
        If it doesn't exist, it will be created. The default is the
        current directory.
    tables : List[str]
        Optional. If set, specify the tables to dump in CSV format.

    Raises
    ------
    ValueError
        A ValueError is raised if a table name in the tables parameter
        does not correspond to an existing table.
    """
    target_tables = list(TABLES.keys())
    if tables:
        for table_name in tables:
            if table_name not in TABLES:
                raise ValueError(
                    f'"{table_name}" does not exist in the pacsanini database.'
                )
        target_tables = tables

    if output:
        os.makedirs(output, exist_ok=True)
    else:
        output = os.getcwd()

    for table_name in target_tables:
        path = os.path.join(output, f"{table_name}.csv")

        table = TABLES[table_name]
        table_cols = [col.name for col in table.__mapper__.columns]  # type: ignore
        start_time = time()
        logger.info(f"Initiating dump of the {table_name} table...")

        with open(path, "w", newline="", encoding="utf-8") as out:
            writer = csv.DictWriter(out, table_cols)
            writer.writeheader()
            for record in session.query(table).all():
                writer.writerow({col: getattr(record, col) for col in table_cols})

        end_time = time()
        logger.info(
            f"Finished dump of the {table_name} table in {end_time-start_time:.3f}s"
        )

get_db_session(db_uri) #

Obtain a database session whose opening and closing is context

managed. If an error is raised during the session's usage, the current transaction will be rolled back, closed, and the error will be raised. It is the caller's responsibility to commit transactions.

Parameters:

Name Type Description Default
db_uri str

The database's URI.

required

Returns:

Type Description
Generator[sqlalchemy.orm.session.Session, NoneType, NoneType]

The context-wrapped Session instance.

Source code in pacsanini/db/utils.py
@contextmanager
def get_db_session(db_uri: str) -> Generator[Session, None, None]:
    """Obtain a database session whose opening and closing is context
    managed. If an error is raised during the session's usage, the
    current transaction will be rolled back, closed, and the error will
    be raised. It is the caller's responsibility to commit transactions.

    Parameters
    ----------
    db_uri : str
        The database's URI.

    Returns
    -------
    Generator[Session, None, None]
        The context-wrapped Session instance.
    """
    engine: Engine = None
    db_session: Session = None
    try:
        if db_uri.lower().startswith("sqlite"):
            connect_args = {"check_same_thread": False}
        else:
            connect_args = None
        engine = create_engine(db_uri, connect_args=connect_args)
        DBSession = sessionmaker(bind=engine)
        db_session = DBSession()
        yield db_session
    except:
        if db_session is not None:
            db_session.rollback()
        raise
    finally:
        if db_session is not None:
            db_session.close()
        if engine is not None:
            engine.dispose()

initialize_database(config, echo=True, force_init=False) #

Initialize the pacsanini database after checking whether it

already exists or not.

Parameters:

Name Type Description Default
config PacsaniniConfig

The configuration to use for the database initialization.

required
echo bool

Whether to echo SQL statements made during the creation of the database. The default is True.

True
force_init bool

Force the database initialization regardless of whether it already exists. This is mainly useful for sqlite databases as the sqlite file will be created as soon as the engine is created. The default is False.

False
Source code in pacsanini/db/utils.py
def initialize_database(
    config: PacsaniniConfig, echo: bool = True, force_init: bool = False
) -> None:
    """Initialize the pacsanini database after checking whether it
    already exists or not.

    Parameters
    ----------
    config : PacsaniniConfig
        The configuration to use for the database initialization.
    echo : bool
        Whether to echo SQL statements made during the creation
        of the database. The default is True.
    force_init : bool
        Force the database initialization regardless of whether it already exists.
        This is mainly useful for sqlite databases as the sqlite file will be created
        as soon as the engine is created. The default is False.
    """
    logger.info("Initializing new pacsanini database instance...")
    if not database_exists(config.storage.resources) or force_init:
        create_database(config.storage.resources)

        alembic_config = get_alembic_config(config)
        revision = get_latest_version(alembic_config)

        command.current(alembic_config)
        if echo:
            command.upgrade(alembic_config, revision, sql=True)
        command.upgrade(alembic_config, revision)
    else:
        logger.info("pacsanini database already found... Skipping initialization.")