Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions docs/cli.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1057,6 +1057,45 @@ That will look for SpatiaLite in a set of predictable locations. To load it from

sqlite-utils create-database empty.db --init-spatialite --load-extension /path/to/spatialite.so

.. _cli_merge:

Merging databases
=================

Use ``sqlite-utils merge`` to merge tables from one or more source databases into a destination database.

Tables that do not exist in the destination are created. Tables that already exist have their rows inserted.

.. code-block:: bash

sqlite-utils merge combined.db one.db two.db

To automatically add missing columns to existing destination tables, use ``--alter``:

.. code-block:: bash

sqlite-utils merge combined.db one.db two.db --alter

To replace rows that have conflicting primary keys, use ``--replace``:

.. code-block:: bash

sqlite-utils merge combined.db one.db two.db --replace

To skip rows that have conflicting primary keys, use ``--ignore``:

.. code-block:: bash

sqlite-utils merge combined.db one.db two.db --ignore

To merge only specific tables, use ``--table`` (can be specified multiple times):

.. code-block:: bash

sqlite-utils merge combined.db one.db two.db --table mytable

Virtual tables (such as FTS indexes) and their shadow tables are automatically skipped.

.. _cli_inserting_data:

Inserting JSON data
Expand Down
57 changes: 57 additions & 0 deletions sqlite_utils/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -1541,6 +1541,63 @@ def create_database(path, enable_wal, init_spatialite, load_extension):
db.vacuum()


@cli.command(name="merge")
@click.argument(
"path",
type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
required=True,
)
@click.argument(
"sources",
type=click.Path(file_okay=True, dir_okay=False, allow_dash=False, exists=True),
nargs=-1,
required=True,
)
@click.option("pks", "--pk", help="Column to use as primary key", multiple=True)
@click.option("--alter", is_flag=True, help="Alter destination tables to add any missing columns")
@click.option(
"--replace", is_flag=True, help="Replace rows with matching primary keys"
)
@click.option(
"--ignore", is_flag=True, help="Ignore rows with conflicting primary keys"
)
@click.option(
"tables",
"--table",
help="Specific tables to merge (can be specified multiple times)",
multiple=True,
)
@load_extension_option
def merge_cmd(path, sources, pks, alter, replace, ignore, tables, load_extension):
"""
Merge tables from one or more SOURCE databases into a DEST database.

Tables that do not exist in DEST are created. Tables that already exist
have their rows inserted. Use --alter to add missing columns automatically.

Example:

\b
sqlite-utils merge combined.db one.db two.db
sqlite-utils merge combined.db one.db two.db --alter
sqlite-utils merge combined.db one.db two.db --replace --table mytable
"""
db = sqlite_utils.Database(path)
_register_db_for_cleanup(db)
_load_extensions(db, load_extension)
try:
db.merge(
sources,
pk=list(pks) if pks else None,
alter=alter,
replace=replace,
ignore=ignore,
tables=list(tables) if tables else None,
)
except OperationalError as e:
raise click.ClickException(str(e))


@cli.command(name="create-table")
@click.argument(
"path",
Expand Down
71 changes: 71 additions & 0 deletions sqlite_utils/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,77 @@ def attach(self, alias: str, filepath: Union[str, pathlib.Path]) -> None:
).strip()
self.execute(attach_sql)

def merge(
self,
sources: Iterable[Union[str, pathlib.Path, "Database"]],
*,
pk: Optional[Any] = None,
alter: bool = False,
replace: bool = False,
ignore: bool = False,
tables: Optional[Iterable[str]] = None,
) -> "Database":
"""
Merge tables from one or more source databases into this database.

Tables that do not exist in the destination are created with the source
schema and all rows. Tables that already exist have rows inserted into
them. Use ``alter=True`` to automatically add any missing columns to
existing destination tables.

Virtual tables (e.g. FTS indexes) in source databases are skipped.

:param sources: One or more source databases. Each item may be a
``Database`` instance, or a path to a SQLite database file.
:param pk: Primary key column(s) to use for all merged tables. When
``None``, each source table's own primary key(s) are used.
:param alter: Add any missing columns to existing destination tables.
:param replace: Replace rows whose primary key already exists in the
destination table.
:param ignore: Skip rows whose primary key already exists in the
destination table.
:param tables: If provided, only merge these named tables. Tables
listed here that do not exist in a particular source are silently
skipped.
:return: ``self`` (the destination database).
"""
for source in sources:
if isinstance(source, (str, pathlib.Path)):
source = Database(source)
source_table_names = source.table_names()
# Collect virtual table names so their shadow tables can be skipped too.
virtual_table_names = {
name
for name in source_table_names
if source.table(name).virtual_table_using is not None
}
names_to_merge = list(tables) if tables is not None else source_table_names
for table_name in names_to_merge:
if table_name not in source_table_names:
continue
source_table = source.table(table_name)
# Skip virtual tables (e.g. FTS indexes).
if source_table.virtual_table_using is not None:
continue
# Skip shadow tables created by virtual tables (e.g. docs_fts_data).
if any(table_name.startswith(vt + "_") for vt in virtual_table_names):
continue
if pk is not None:
effective_pk: Any = pk[0] if len(pk) == 1 else list(pk)
elif source_table.use_rowid:
effective_pk = None
else:
source_pks = source_table.pks
effective_pk = source_pks[0] if len(source_pks) == 1 else source_pks
self[table_name].insert_all(
source_table.rows,
pk=effective_pk,
alter=alter,
replace=replace,
ignore=ignore,
)
return self

def query(
self, sql: str, params: Optional[Union[Sequence, Dict[str, Any]]] = None
) -> Generator[dict, None, None]:
Expand Down
Loading
Loading