db
pacsanini.db.crud
#
The crud module provides methods and classes that can be used to insert
single items (studies found from C-FIND requests or DICOM metadata) into a given database.
DBWrapper
#
A wrapper class for the database connections. The purpose of this is
to be able to open database connections lazily inside a thread that may not be the application's main thread. It is recommended to use instances of this class inside a context manager.
Attributes:
| Name | Type | Description |
|---|---|---|
conn_uri |
str |
The database connection URI. |
create_tables |
bool |
Whether to create tables when the connection is first established. The default is False. |
debug |
bool |
If True, echo SQL statements to the standard output. The default is False. |
close(self)
#
Close the instance's current session and engine if they are still open.
Source code in pacsanini/db/crud.py
def close(self):
"""Close the instance's current session and engine if they are still open."""
if self.session is not None:
self.session.close()
if self.engine is not None:
self.engine.dispose()
conn(self)
#
Obtain a session instance
Source code in pacsanini/db/crud.py
@lru_cache(maxsize=1)
def conn(self) -> Session:
"""Obtain a session instance"""
self.engine = create_engine(self.conn_uri)
if self.create_tables:
config = PacsaniniConfig(
storage=StorageConfig(resources=self.conn_uri, directory="./")
)
from pacsanini.db.utils import ( # pylint: disable=import-outside-toplevel
initialize_database,
)
initialize_database(config)
DBSession = sessionmaker(bind=self.engine)
self.session = DBSession()
return self.session
add_found_study(session, dcm)
#
Add study metadata to the database after a successfull C-FIND
operation.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
dcm |
Dataset |
The retrieved Dataset instance resulting from a C-FIND operation. |
required |
session |
Session |
The database session. |
required |
Source code in pacsanini/db/crud.py
def add_found_study(session: Session, dcm: Dataset) -> Optional[StudyFind]:
"""Add study metadata to the database after a successfull C-FIND
operation.
Parameters
----------
dcm : Dataset
The retrieved Dataset instance resulting from a C-FIND operation.
session : Session
The database session.
"""
study_find = dcm2study_finding(dcm)
try:
session.add(study_find)
session.commit()
except exc.IntegrityError:
session.rollback()
return None
else:
return study_find
add_image(session, dcm, institution=None, filepath=None)
#
Insert an image to the database. If the image belongs to a new patient, study, or
series, the relevant tables will also be updated. If the image already exists in the database (based on the SOPInstanceUID), the transaction will be rolled back.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
session |
Session |
The database session to use for inserting the DICOM image into the database. |
required |
dcm |
Union[str, pydicom.dataset.Dataset] |
The DICOM image to add to the database. |
required |
institution |
str |
The institution that the DICOM image belongs to. The default is None. |
None |
filepath |
str |
The DICOM image's filepath. The default is None. |
None |
Returns:
| Type | Description |
|---|---|
Optional[pacsanini.db.models.Image] |
The inserted Image object. If the insert was unsuccessfull, None is returned. |
Source code in pacsanini/db/crud.py
def add_image(
session: Session,
dcm: Union[str, Dataset],
institution: str = None,
filepath: str = None,
) -> Optional[Image]:
"""Insert an image to the database. If the image belongs to a new patient, study, or
series, the relevant tables will also be updated. If the image already exists in the
database (based on the SOPInstanceUID), the transaction will be rolled back.
Parameters
----------
session : Session
The database session to use for inserting the DICOM image into the database.
dcm : Union[str, Dataset]
The DICOM image to add to the database.
institution : str
The institution that the DICOM image belongs to. The default is None.
filepath : str
The DICOM image's filepath. The default is None.
Returns
-------
Image
The inserted Image object. If the insert was unsuccessfull, None
is returned.
"""
pat, study, series, image = dcm2dbmodels(
dcm, institution=institution, filepath=filepath
)
try:
session.add(pat)
session.flush()
pat_dbid = pat.id
except exc.IntegrityError:
session.rollback()
pat_dbid = (
session.query(Patient.id)
.filter(Patient.patient_id == pat.patient_id)
.first()[0]
)
study.patient_id = pat_dbid
try:
session.add(study)
session.flush()
study_dbid = study.id
except exc.IntegrityError:
session.rollback()
study_dbid = (
session.query(Study.id)
.filter(Study.study_uid == study.study_uid)
.first()[0]
)
series.study_id = study_dbid
try:
session.add(series)
session.flush()
series_dbid = series.id
except exc.IntegrityError:
session.rollback()
series_dbid = (
session.query(Series.id)
.filter(Series.series_uid == series.series_uid)
.first()[0]
)
image.series_id = series_dbid
try:
session.add(image)
session.commit()
except exc.IntegrityError:
logger.warning("{image} already exists in the database. Rolling back commit...")
session.rollback()
return None
else:
return image
get_studies_to_move(session)
#
Get a list of StudyFind instances that haven't been retrieved
according to their retrieved_on key.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
session |
Session |
The database session to use. |
required |
Returns:
| Type | Description |
|---|---|
List[pacsanini.db.models.StudyFind] |
A list of StudyFind resources that should be moved. |
Source code in pacsanini/db/crud.py
def get_studies_to_move(session: Session) -> List[StudyFind]:
"""Get a list of StudyFind instances that haven't been retrieved
according to their `retrieved_on` key.
Parameters
----------
session : Session
The database session to use.
Returns
-------
List[StudyFind]
A list of StudyFind resources that should be moved.
"""
query = session.query(StudyFind).filter(StudyFind.retrieved_on == None)
return query.all()
get_study_uids_to_move(session)
#
Get a list of StudyInstanceUID values to retrieve.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
session |
Session |
The database session to use. |
required |
Returns:
| Type | Description |
|---|---|
List[str] |
A list of StudyInstanceUID resources that should be moved. |
Source code in pacsanini/db/crud.py
def get_study_uids_to_move(session: Session) -> List[str]:
"""Get a list of StudyInstanceUID values to retrieve.
Parameters
----------
session : Session
The database session to use.
Returns
-------
List[str]
A list of StudyInstanceUID resources that should be moved.
"""
return [study_find.study_uid for study_find in get_studies_to_move(session)]
update_retrieved_study(session, study_uid)
#
Update a found study by setting its retrieved_on value to the current
date. If the relevant study was already retrieved, it will not be updated but the StudyFind instance will be returned. If the found study does not exist, None is returned.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
session |
Session |
The database session to use. |
required |
study_uid |
str |
The study instance uid to mark as retrieved. |
required |
Returns:
| Type | Description |
|---|---|
Optional[pacsanini.db.models.StudyFind] |
The StudyFind instance if it was found or updated. None otherwise. |
Source code in pacsanini/db/crud.py
def update_retrieved_study(session: Session, study_uid: str) -> Optional[StudyFind]:
"""Update a found study by setting its retrieved_on value to the current
date. If the relevant study was already retrieved, it will not be updated
but the StudyFind instance will be returned. If the found study does not
exist, None is returned.
Parameters
----------
session : Session
The database session to use.
study_uid : str
The study instance uid to mark as retrieved.
Returns
-------
Optional[StudyFind]
The StudyFind instance if it was found or updated. None otherwise.
"""
found_study: StudyFind = (
session.query(StudyFind).filter(StudyFind.study_uid == study_uid).first()
)
if found_study is None:
return None
if found_study.retrieved_on is None:
found_study.retrieved_on = datetime.utcnow()
session.add(found_study)
session.commit()
return found_study
pacsanini.db.dcm2model
#
The dcm2model module provides methods that can be used to convert pydicom.Dataset
instances to sqlalchemy instances.
dcm2dbmodels(dcm, institution=None, filepath=None)
#
Convert a DICOM file into the different database models that will be used
to insert the DICOM data into the database.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
dcm |
Union[str, pydicom.dataset.Dataset] |
The DICOM data to convert to a Patient, Study, Series, and Image instance. |
required |
institution |
str |
If set, add a specified institution name to the Patient model. The default is None. |
None |
filepath |
str |
If set, add the DICOM's filepath to the database. The default is None. If the input dcm parameter value is a string, filepath will be set to this. |
None |
Returns:
| Type | Description |
|---|---|
Tuple[pacsanini.db.models.Patient, pacsanini.db.models.Study, pacsanini.db.models.Series, pacsanini.db.models.Image] |
A 4-tuple corresponding to the image's |
Source code in pacsanini/db/dcm2model.py
def dcm2dbmodels(
dcm: Union[str, Dataset], institution: str = None, filepath: str = None
) -> Tuple[Patient, Study, Series, Image]:
"""Convert a DICOM file into the different database models that will be used
to insert the DICOM data into the database.
Parameters
----------
dcm : Union[str, Dataset]
The DICOM data to convert to a Patient, Study, Series, and Image instance.
institution : str
If set, add a specified institution name to the Patient
model. The default is None.
filepath : str
If set, add the DICOM's filepath to the database. The default
is None. If the input dcm parameter value is a string, filepath
will be set to this.
Returns
-------
Tuple[Patient, Study, Series, Image]
A 4-tuple corresponding to the image's
"""
if isinstance(dcm, str):
filepath = dcm
dcm = dcmread(dcm, stop_before_pixels=True)
pat = dcm2patient(dcm, institution=institution)
study = dcm2study(dcm)
series = dcm2series(dcm)
image = dcm2image(dcm, institution=institution, filepath=filepath)
return pat, study, series, image
dcm2image(dcm, institution=None, filepath=None)
#
Convert a DICOM file to a Image instance that can be inserted
in the database.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
dcm |
Dataset |
The DICOM data to convert to a Image instance. |
required |
institution |
str |
If set, add a specified institution name to the Image model. The default is None. |
None |
filepath |
str |
If set, add the DICOM's filepath to the database. The default is None. |
None |
Returns:
| Type | Description |
|---|---|
Image |
The Image model. |
Source code in pacsanini/db/dcm2model.py
def dcm2image(dcm: Dataset, institution: str = None, filepath: str = None) -> Image:
"""Convert a DICOM file to a Image instance that can be inserted
in the database.
Parameters
----------
dcm : Dataset
The DICOM data to convert to a Image instance.
institution : str
If set, add a specified institution name to the Image
model. The default is None.
filepath : str
If set, add the DICOM's filepath to the database. The default
is None.
Returns
-------
Image
The Image model.
"""
tag_grp = DicomTagGroup(
tags=[
{"tag_name": "PatientID", "tag_alias": "patient_id"},
{"tag_name": "StudyInstanceUID", "tag_alias": "study_uid"},
{
"tag_name": "StudyDate",
"tag_alias": "study_date",
"callback": str2datetime,
},
{"tag_name": "SeriesInstanceUID", "tag_alias": "series_uid"},
{"tag_name": "Modality", "tag_alias": "modality"},
{"tag_name": "SOPClassUID", "tag_alias": "sop_class_uid"},
{"tag_name": "SOPInstanceUID", "tag_alias": "image_uid"},
{"tag_name": "AcquisitionTime", "tag_alias": "acquisition_time"},
{"tag_name": "Manufacturer", "tag_alias": "manufacturer"},
{
"tag_name": "ManufacturerModelName",
"tag_alias": "manufacturer_model_name",
},
]
)
data = tag_grp.parse_dicom(dcm)
data["meta"] = dcm2dict(dcm, include_pixels=False)
data["institution"] = institution
data["filepath"] = filepath
return Image(**data)
dcm2patient(dcm, institution=None)
#
Convert a DICOM file to a Patient instance that can be inserted
in the database.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
dcm |
Dataset |
The DICOM data to convert to a Patient instance. |
required |
institution |
str |
If set, add a specified institution name to the Patient model. The default is None. |
None |
Returns:
| Type | Description |
|---|---|
Patient |
The Patient model. |
Source code in pacsanini/db/dcm2model.py
def dcm2patient(dcm: Dataset, institution: str = None) -> Patient:
"""Convert a DICOM file to a Patient instance that can be inserted
in the database.
Parameters
----------
dcm : Dataset
The DICOM data to convert to a Patient instance.
institution : str
If set, add a specified institution name to the Patient
model. The default is None.
Returns
-------
Patient
The Patient model.
"""
tag_grp = DicomTagGroup(
tags=[
{"tag_name": "PatientID", "tag_alias": "patient_id"},
{"tag_name": "PatientName", "tag_alias": "patient_name", "callback": str},
{
"tag_name": "PatientBirthDate",
"tag_alias": "patient_birth_date",
"callback": str2datetime,
},
]
)
data = tag_grp.parse_dicom(dcm)
data["institution"] = institution
return Patient(**data)
dcm2series(dcm)
#
Convert a DICOM file to a Series instance that can be inserted
in the database.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
dcm |
Dataset |
The DICOM data to convert to a Series instance. |
required |
Returns:
| Type | Description |
|---|---|
Series |
The Series model. |
Source code in pacsanini/db/dcm2model.py
def dcm2series(dcm: Dataset) -> Series:
"""Convert a DICOM file to a Series instance that can be inserted
in the database.
Parameters
----------
dcm : Dataset
The DICOM data to convert to a Series instance.
Returns
-------
Series
The Series model.
"""
tag_grp = DicomTagGroup(
tags=[
{"tag_name": "SeriesInstanceUID", "tag_alias": "series_uid"},
{"tag_name": "Modality", "tag_alias": "modality"},
]
)
data = tag_grp.parse_dicom(dcm)
return Series(**data)
dcm2study(dcm)
#
Convert a DICOM file to a Study instance that can be inserted
in the database.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
dcm |
Dataset |
The DICOM data to convert to a Study instance. |
required |
Returns:
| Type | Description |
|---|---|
Study |
The Study model. |
Source code in pacsanini/db/dcm2model.py
def dcm2study(dcm: Dataset) -> Study:
"""Convert a DICOM file to a Study instance that can be inserted
in the database.
Parameters
----------
dcm : Dataset
The DICOM data to convert to a Study instance.
Returns
-------
Study
The Study model.
"""
tag_grp = DicomTagGroup(
tags=[
{"tag_name": "StudyInstanceUID", "tag_alias": "study_uid"},
{
"tag_name": "StudyDate",
"tag_alias": "study_date",
"callback": str2datetime,
},
{
"tag_name": "PatientAge",
"tag_alias": "patient_age",
"callback": agestr2years,
"default": -1,
},
{"tag_name": "AccessionNumber", "tag_alias": "accession_number"},
]
)
data = tag_grp.parse_dicom(dcm)
return Study(**data)
dcm2study_finding(dcm)
#
Convert a DICOM file to a StudyFind instance that can be inserted
in the database.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
dcm |
Dataset |
The DICOM data to convert to a StudyFind instance. |
required |
Returns:
| Type | Description |
|---|---|
StudyFind |
The StudyFind model. |
Source code in pacsanini/db/dcm2model.py
def dcm2study_finding(dcm: Dataset) -> StudyFind:
"""Convert a DICOM file to a StudyFind instance that can be inserted
in the database.
Parameters
----------
dcm : Dataset
The DICOM data to convert to a StudyFind instance.
Returns
-------
StudyFind
The StudyFind model.
"""
tag_grp = DicomTagGroup(
tags=[
{"tag_name": "PatientName", "tag_alias": "patient_name", "callback": str},
{"tag_name": "PatientID", "tag_alias": "patient_id"},
{"tag_name": "StudyInstanceUID", "tag_alias": "study_uid"},
{
"tag_name": "StudyDate",
"tag_alias": "study_date",
"callback": str2datetime,
},
{"tag_name": "AccessionNumber", "tag_alias": "accession_number"},
]
)
data = tag_grp.parse_dicom(dcm)
return StudyFind(**data)
pacsanini.db.parser
#
The parser module module provides convenience methods for parsing DICOM files
and storing results into a given database.
parse_dir2sql(src, conn_uri, institution_name=None, nb_threads=1, create_tables=False)
#
Parse a DICOM directory and persist the found results in the database
specified by the conn_uri parameter.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
src |
str |
The DICOM directory to parse. |
required |
conn_uri |
str |
The database's connection URI to use. |
required |
institution_name |
str |
If specified, associate the parsed DICOM files with the name of an institution. If unset, this will default to unknwon followed by today's date in the YYYYMMDD format. |
None |
nb_threads |
int |
The number of threads to use. This defaults to 1. |
1 |
create_tables |
bool |
If True, create the database tables before inserting the first parser result. The default is False. |
False |
Source code in pacsanini/db/parser.py
def parse_dir2sql(
src: str,
conn_uri: str,
institution_name: str = None,
nb_threads: int = 1,
create_tables: bool = False,
):
"""Parse a DICOM directory and persist the found results in the database
specified by the conn_uri parameter.
Notes
-----
Unlike other parse_dir wrapper methods, this method does not use the DICOMTagParser
instance. Parsed DICOM files will have basic DICOM tag metadata stored in traditional
columns as well as the entire DICOM file (pixel data excluded) stored in JSON format.
Parameters
----------
src : str
The DICOM directory to parse.
conn_uri : str
The database's connection URI to use.
institution_name : str
If specified, associate the parsed DICOM files with the name of an
institution. If unset, this will default to unknwon followed by today's
date in the YYYYMMDD format.
nb_threads : int
The number of threads to use. This defaults to 1.
create_tables : bool
If True, create the database tables before inserting the first
parser result. The default is False.
"""
if institution_name is None:
institution_name = f"unknown_{datetime.now().strftime('%Y%m%d')}"
with DBWrapper(conn_uri, create_tables=create_tables, debug=True) as wrapper:
parse_dir(
src,
None,
_inner_sql,
nb_threads=nb_threads,
callback_args=(wrapper, institution_name),
include_path=True,
)
pacsanini.db.utils
#
Database utilities for managing/initializing the pacsanini database.
dump_database(session, output=None, tables=None)
#
Dump the pacsanini database into CSV files. Each CSV file
corresponds to a database table.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
session |
Session |
The database session to use for dumping data. |
required |
output |
str |
If set, write all output files under the specified directory. If it doesn't exist, it will be created. The default is the current directory. |
None |
tables |
List[str] |
Optional. If set, specify the tables to dump in CSV format. |
None |
Exceptions:
| Type | Description |
|---|---|
ValueError |
A ValueError is raised if a table name in the tables parameter does not correspond to an existing table. |
Source code in pacsanini/db/utils.py
def dump_database(
session: Session, output: str = None, tables: List[str] = None
) -> None:
"""Dump the pacsanini database into CSV files. Each CSV file
corresponds to a database table.
Parameters
----------
session : Session
The database session to use for dumping data.
output : str
If set, write all output files under the specified directory.
If it doesn't exist, it will be created. The default is the
current directory.
tables : List[str]
Optional. If set, specify the tables to dump in CSV format.
Raises
------
ValueError
A ValueError is raised if a table name in the tables parameter
does not correspond to an existing table.
"""
target_tables = list(TABLES.keys())
if tables:
for table_name in tables:
if table_name not in TABLES:
raise ValueError(
f'"{table_name}" does not exist in the pacsanini database.'
)
target_tables = tables
if output:
os.makedirs(output, exist_ok=True)
else:
output = os.getcwd()
for table_name in target_tables:
path = os.path.join(output, f"{table_name}.csv")
table = TABLES[table_name]
table_cols = [col.name for col in table.__mapper__.columns] # type: ignore
start_time = time()
logger.info(f"Initiating dump of the {table_name} table...")
with open(path, "w", newline="", encoding="utf-8") as out:
writer = csv.DictWriter(out, table_cols)
writer.writeheader()
for record in session.query(table).all():
writer.writerow({col: getattr(record, col) for col in table_cols})
end_time = time()
logger.info(
f"Finished dump of the {table_name} table in {end_time-start_time:.3f}s"
)
get_db_session(db_uri)
#
Obtain a database session whose opening and closing is context
managed. If an error is raised during the session's usage, the current transaction will be rolled back, closed, and the error will be raised. It is the caller's responsibility to commit transactions.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
db_uri |
str |
The database's URI. |
required |
Returns:
| Type | Description |
|---|---|
Generator[sqlalchemy.orm.session.Session, NoneType, NoneType] |
The context-wrapped Session instance. |
Source code in pacsanini/db/utils.py
@contextmanager
def get_db_session(db_uri: str) -> Generator[Session, None, None]:
"""Obtain a database session whose opening and closing is context
managed. If an error is raised during the session's usage, the
current transaction will be rolled back, closed, and the error will
be raised. It is the caller's responsibility to commit transactions.
Parameters
----------
db_uri : str
The database's URI.
Returns
-------
Generator[Session, None, None]
The context-wrapped Session instance.
"""
engine: Engine = None
db_session: Session = None
try:
if db_uri.lower().startswith("sqlite"):
connect_args = {"check_same_thread": False}
else:
connect_args = None
engine = create_engine(db_uri, connect_args=connect_args)
DBSession = sessionmaker(bind=engine)
db_session = DBSession()
yield db_session
except:
if db_session is not None:
db_session.rollback()
raise
finally:
if db_session is not None:
db_session.close()
if engine is not None:
engine.dispose()
initialize_database(config, echo=True, force_init=False)
#
Initialize the pacsanini database after checking whether it
already exists or not.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
config |
PacsaniniConfig |
The configuration to use for the database initialization. |
required |
echo |
bool |
Whether to echo SQL statements made during the creation of the database. The default is True. |
True |
force_init |
bool |
Force the database initialization regardless of whether it already exists. This is mainly useful for sqlite databases as the sqlite file will be created as soon as the engine is created. The default is False. |
False |
Source code in pacsanini/db/utils.py
def initialize_database(
config: PacsaniniConfig, echo: bool = True, force_init: bool = False
) -> None:
"""Initialize the pacsanini database after checking whether it
already exists or not.
Parameters
----------
config : PacsaniniConfig
The configuration to use for the database initialization.
echo : bool
Whether to echo SQL statements made during the creation
of the database. The default is True.
force_init : bool
Force the database initialization regardless of whether it already exists.
This is mainly useful for sqlite databases as the sqlite file will be created
as soon as the engine is created. The default is False.
"""
logger.info("Initializing new pacsanini database instance...")
if not database_exists(config.storage.resources) or force_init:
create_database(config.storage.resources)
alembic_config = get_alembic_config(config)
revision = get_latest_version(alembic_config)
command.current(alembic_config)
if echo:
command.upgrade(alembic_config, revision, sql=True)
command.upgrade(alembic_config, revision)
else:
logger.info("pacsanini database already found... Skipping initialization.")