Schema#

class swordfish.connection.Schema(impl)#

Manages catalog schemas.

Parameters:

impl (SchemaImpl)

property handle: Handle#

Obtains a schema handle.

Returns:

The schema handle.

Return type:

Handle

create_table(name: str, table_schema, partition_cols: List[str], *, compress_methods: Dict[str, str] = None)#
create_table(name: str, table_schema, *, compress_methods: Dict[str, str] = None)
create_table(name: str, table_schema, partition_cols: List[str], *, compress_methods: Dict[str, str] = None, sort_cols: List[str] = None, keep_duplicates: Literal['ALL', 'LAST', 'FIRST'] | EnumInt = 'ALL', sort_key_mapping_function: List[FunctionDef] = None, soft_delete: bool = False, indexes: List[str] = None) Table
create_table(name: str, table_schema, *, compress_methods: Dict[str, str] = None, sort_cols: List[str] = None, keep_duplicates: Literal['ALL', 'LAST', 'FIRST'] | EnumInt = 'ALL', soft_delete: bool = False, indexes: List[str] = None) Table
create_table(name: str, table_schema, partition_cols: List[str], *, compress_methods: Dict[str, str] = None, primary_key_cols: List[str] = None, indexes: List[str] = None) Table
create_table(name: str, table_schema, *, compress_methods: Dict[str, str] = None, primary_key_cols: List[str] = None, indexes: List[str] = None) Table

Creates a table using a specific storage engine with optional partitioning.

This method supports creating tables with three different storage engines: OLAP, TSDB, PKEY. Each engine supports creating either:

  • Partitioned tables: Require partition_cols for data partitioning

  • Dimension tables: Do not use partitioning but retain other engine capabilities

Engine Compatibility Chart:

Feature

OLAP Engine

TSDB Engine

PKEY Engine

partition_cols

✓ (required) [1]

✓ (required) [1]

✓ (required) [1]

compress_methods

sort_cols

primary_key_cols

keep_duplicates

sort_key [2]

soft_delete

indexes

Parameters:
  • name (str) – The name of the table.

  • table_schema (Any) – The schema definition of the table, a mapping of column names to data types.

  • partition_cols (list of str, optional) – The partitioning column(s). Defaults to None.

  • compress_methods (dict of str to str, optional) – Compression methods for specific columns, where the key is the column name and the value is the compression method. Defaults to None.

  • sort_cols (list of str, optional) – Columns used for sorting to optimize query performance, applicable for the TSDB engine. Defaults to None.

  • primary_key_cols (list of str, optional) – Columns that act as primary keys, applicable for the PKEY engine. Defaults to None.

  • keep_duplicates ({"ALL", "LAST", "FIRST"}, EnumInt, optional) – Deduplication strategy, applicable for the TSDB engine. Defaults to None. - “ALL”: Allows all duplicate values. - “LAST”: Retains only the latest value. - “FIRST”: Retains only the earliest value.

  • sort_key_mapping_function (list of FunctionDef, optional) – A list of functions for defining sorting key mappings, applicable for the TSDB engine (partitioned table). Defaults to None.

  • soft_delete (bool, optional) – Enables soft delete functionality, applicable for the TSDB engine. Defaults to None.

  • indexes (list of str, optional) – A list of columns to create indexes on, used for query optimization. Applicable for the TSDB and PKEY engine. Defaults to None.

Returns:

The created table instance.

Return type:

Table

Examples

Creating a partitioned table with the OLAP engine
>>> schema.create_table("quote", table_schema={'id': "INT",
... 'date': "DATE", 'value': "DOUBLE"}, partition_cols=["id"],
... compress_methods={"id": "lz4"}))
Creating a dimension table with the OLAP engine
>>> schema.create_table("quote", table_schema={'id': "INT",
... 'date': "DATE", 'value': "DOUBLE"}, compress_methods={"id":
... "lz4"})
Creating a partitioned table with the TSDB engine
>>> import swordfish as sf
>>> import swordfish.function as F
>>> schema.create_table(
...     name="quote",
...     table_schema={'id': "INT", 'date': "DATE", 'value':
...     "DOUBLE"},
...     partition_cols=["id"],
...     compress_methods={"id": "lz4"},
...     sort_cols=["date", "id"],
...     keep_duplicates="LAST",
...     sort_key_mapping_function=[sf.partial(F.hashBucket,
...     buckets=5)],
...     soft_delete=True,
...     indexes=["id", "date"],
... )
Creating a dimension table with the TSDB engine
>>> schema.create_table(
...     name="quote",
...     table_schema={'id': "INT", 'date': "DATE", 'value':
...     "DOUBLE"},
...     compress_methods={"id": "lz4"},
...     sort_cols=['timestamp', 'value'],
...     keep_duplicates="LAST",
...     soft_delete=True,
...     indexes=['name', 'timestamp'],
... )
Creating a partitioned table with the PKEY engine
>>> schema.create_table(
...     name="quote",
...     table_schema={'id': "INT", 'date': "DATE", 'value':
...     "DOUBLE"},
...     partition_cols=["id"],
...     compress_methods={"id": "lz4"},
...     primary_key_cols=["id"],
...     indexes=["timestamp"],
... )
Creating a dimension table with the PKEY engine
>>> schema.create_table(
...     name="quote",
...     table_schema={'id': "INT", 'date': "DATE", 'value':
...     "DOUBLE"},
...     compress_methods={"value": "lz4"},
...     primary_key_cols=["id"],
...     indexes=["name"],
... )
list_tables()#

Retrieves the names of all tables in the schema.

Returns:

A list of table names.

Return type:

list of str

Examples

>>> schema.list_tables()
exists_table(name)#
Parameters:

name (str)

Return type:

bool

drop_table(name)#

Drops a table from the schema.

Parameters:

name (str) – The name of the table to be dropped.

Examples

>>> schema.drop_table("table_name")
truncate_table(name)#

Truncates a table in the schema.

Parameters:

name (str) – The name of the table to truncate.

Examples

>>> schema.truncate_table("table_name")
table(name)#

Retrieves a table by name.

Parameters:

name (str) – The name of the table to retrieve.

Returns:

The Table corresponding to the specified name.

Return type:

Table

Examples

>>> schema.table("table_name")
property engine_type: StorageType#

Retrieves the storage engine type.

Returns:

The type of storage engine.

Return type:

StorageType