Skip to content
Snippets Groups Projects
Unverified Commit 0a38c7ec authored by David Robertson's avatar David Robertson Committed by GitHub
Browse files

Snapshot schema 72 (#13873)

Including another batch of fixes to the schema dump script
parent 41461fd4
No related branches found
No related tags found
No related merge requests found
Showing
with 2165 additions and 22 deletions
Create a new snapshot of the database schema.
......@@ -26,6 +26,9 @@ usage() {
echo " Defaults to 9999."
echo "-h"
echo " Display this help text."
echo ""
echo " NB: make sure to run this against the *oldest* supported version of postgres,"
echo " or else pg_dump might output non-backwards-compatible syntax."
}
SCHEMA_NUMBER="9999"
......@@ -240,25 +243,54 @@ DROP TABLE user_directory_search_stat;
echo "Dumping SQLite3 schema..."
mkdir -p "$OUTPUT_DIR/"{common,main,state}"/full_schema/$SCHEMA_NUMBER"
sqlite3 "$SQLITE_COMMON_DB" ".schema --indent" > "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
sqlite3 "$SQLITE_COMMON_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
sqlite3 "$SQLITE_MAIN_DB" ".schema --indent" > "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
sqlite3 "$SQLITE_MAIN_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
sqlite3 "$SQLITE_STATE_DB" ".schema --indent" > "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
sqlite3 "$SQLITE_STATE_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
mkdir -p "$OUTPUT_DIR/"{common,main,state}"/full_schemas/$SCHEMA_NUMBER"
sqlite3 "$SQLITE_COMMON_DB" ".schema" > "$OUTPUT_DIR/common/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite"
sqlite3 "$SQLITE_COMMON_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/common/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite"
sqlite3 "$SQLITE_MAIN_DB" ".schema" > "$OUTPUT_DIR/main/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite"
sqlite3 "$SQLITE_MAIN_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/main/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite"
sqlite3 "$SQLITE_STATE_DB" ".schema" > "$OUTPUT_DIR/state/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite"
sqlite3 "$SQLITE_STATE_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/state/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite"
cleanup_pg_schema() {
sed -e '/^$/d' -e '/^--/d' -e 's/public\.//g' -e '/^SET /d' -e '/^SELECT /d'
# Cleanup as follows:
# - Remove empty lines. pg_dump likes to output a lot of these.
# - Remove comment-only lines. pg_dump also likes to output a lot of these to visually
# separate tables etc.
# - Remove "public." prefix --- the schema name.
# - Remove "SET" commands. Last time I ran this, the output commands were
# SET statement_timeout = 0;
# SET lock_timeout = 0;
# SET idle_in_transaction_session_timeout = 0;
# SET client_encoding = 'UTF8';
# SET standard_conforming_strings = on;
# SET check_function_bodies = false;
# SET xmloption = content;
# SET client_min_messages = warning;
# SET row_security = off;
# SET default_table_access_method = heap;
# - Very carefully remove specific SELECT statements. We CANNOT blanket remove all
# SELECT statements because some of those have side-effects which we do want in the
# schema. Last time I ran this, the only SELECTS were
# SELECT pg_catalog.set_config('search_path', '', false);
# and
# SELECT pg_catalog.setval(text, bigint, bool);
# We do want to remove the former, but the latter is important. If the last argument
# is `true` or omitted, this marks the given integer as having been consumed and
# will NOT appear as the nextval.
sed -e '/^$/d' \
-e '/^--/d' \
-e 's/public\.//g' \
-e '/^SET /d' \
-e '/^SELECT pg_catalog.set_config/d'
}
echo "Dumping Postgres schema..."
pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_COMMON_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_COMMON_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_COMMON_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/common/full_schemas/$SCHEMA_NUMBER/full.sql.postgres"
pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_COMMON_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/common/full_schemas/$SCHEMA_NUMBER/full.sql.postgres"
pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/main/full_schemas/$SCHEMA_NUMBER/full.sql.postgres"
pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/main/full_schemas/$SCHEMA_NUMBER/full.sql.postgres"
pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/state/full_schemas/$SCHEMA_NUMBER/full.sql.postgres"
pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/state/full_schemas/$SCHEMA_NUMBER/full.sql.postgres"
echo "Done! Files dumped to: $OUTPUT_DIR"
......@@ -393,6 +393,14 @@ class LoggingTransaction:
def executemany(self, sql: str, *args: Any) -> None:
self._do_execute(self.txn.executemany, sql, *args)
def executescript(self, sql: str) -> None:
if isinstance(self.database_engine, Sqlite3Engine):
self._do_execute(self.txn.executescript, sql) # type: ignore[attr-defined]
else:
raise NotImplementedError(
f"executescript only exists for sqlite driver, not {type(self.database_engine)}"
)
def _make_sql_one_line(self, sql: str) -> str:
"Strip newlines out of SQL so that the loggers in the DB are on one line"
return " ".join(line.strip() for line in sql.splitlines() if line.strip())
......
......@@ -32,9 +32,10 @@ class IncorrectDatabaseSetup(RuntimeError):
ConnectionType = TypeVar("ConnectionType", bound=Connection)
CursorType = TypeVar("CursorType", bound=Cursor)
class BaseDatabaseEngine(Generic[ConnectionType], metaclass=abc.ABCMeta):
class BaseDatabaseEngine(Generic[ConnectionType, CursorType], metaclass=abc.ABCMeta):
def __init__(self, module: DBAPI2Module, config: Mapping[str, Any]):
self.module = module
......@@ -64,7 +65,7 @@ class BaseDatabaseEngine(Generic[ConnectionType], metaclass=abc.ABCMeta):
...
@abc.abstractmethod
def check_new_database(self, txn: Cursor) -> None:
def check_new_database(self, txn: CursorType) -> None:
"""Gets called when setting up a brand new database. This allows us to
apply stricter checks on new databases versus existing database.
"""
......@@ -124,3 +125,21 @@ class BaseDatabaseEngine(Generic[ConnectionType], metaclass=abc.ABCMeta):
Note: This has no effect on SQLite3, as transactions are SERIALIZABLE by default.
"""
...
@staticmethod
@abc.abstractmethod
def executescript(cursor: CursorType, script: str) -> None:
"""Execute a chunk of SQL containing multiple semicolon-delimited statements.
This is not provided by DBAPI2, and so needs engine-specific support.
"""
...
@classmethod
def execute_script_file(cls, cursor: CursorType, filepath: str) -> None:
"""Execute a file containing multiple semicolon-delimited SQL statements.
This is not provided by DBAPI2, and so needs engine-specific support.
"""
with open(filepath, "rt") as f:
cls.executescript(cursor, f.read())
......@@ -31,7 +31,9 @@ if TYPE_CHECKING:
logger = logging.getLogger(__name__)
class PostgresEngine(BaseDatabaseEngine[psycopg2.extensions.connection]):
class PostgresEngine(
BaseDatabaseEngine[psycopg2.extensions.connection, psycopg2.extensions.cursor]
):
def __init__(self, database_config: Mapping[str, Any]):
super().__init__(psycopg2, database_config)
psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
......@@ -212,3 +214,11 @@ class PostgresEngine(BaseDatabaseEngine[psycopg2.extensions.connection]):
else:
isolation_level = self.isolation_level_map[isolation_level]
return conn.set_isolation_level(isolation_level)
@staticmethod
def executescript(cursor: psycopg2.extensions.cursor, script: str) -> None:
"""Execute a chunk of SQL containing multiple semicolon-delimited statements.
Psycopg2 seems happy to do this in DBAPI2's `execute()` function.
"""
cursor.execute(script)
......@@ -24,7 +24,7 @@ if TYPE_CHECKING:
from synapse.storage.database import LoggingDatabaseConnection
class Sqlite3Engine(BaseDatabaseEngine[sqlite3.Connection]):
class Sqlite3Engine(BaseDatabaseEngine[sqlite3.Connection, sqlite3.Cursor]):
def __init__(self, database_config: Mapping[str, Any]):
super().__init__(sqlite3, database_config)
......@@ -120,6 +120,25 @@ class Sqlite3Engine(BaseDatabaseEngine[sqlite3.Connection]):
# All transactions are SERIALIZABLE by default in sqlite
pass
@staticmethod
def executescript(cursor: sqlite3.Cursor, script: str) -> None:
"""Execute a chunk of SQL containing multiple semicolon-delimited statements.
Python's built-in SQLite driver does not allow you to do this with DBAPI2's
`execute`:
> execute() will only execute a single SQL statement. If you try to execute more
> than one statement with it, it will raise a Warning. Use executescript() if
> you want to execute multiple SQL statements with one call.
Though the docs for `executescript` warn:
> If there is a pending transaction, an implicit COMMIT statement is executed
> first. No other implicit transaction control is performed; any transaction
> control must be added to sql_script.
"""
cursor.executescript(script)
# Following functions taken from: https://github.com/coleifer/peewee
......
......@@ -266,7 +266,7 @@ def _setup_new_database(
".sql." + specific
):
logger.debug("Applying schema %s", entry.absolute_path)
executescript(cur, entry.absolute_path)
database_engine.execute_script_file(cur, entry.absolute_path)
cur.execute(
"INSERT INTO schema_version (version, upgraded) VALUES (?,?)",
......@@ -517,7 +517,7 @@ def _upgrade_existing_database(
UNAPPLIED_DELTA_ON_WORKER_ERROR % relative_path
)
logger.info("Applying schema %s", relative_path)
executescript(cur, absolute_path)
database_engine.execute_script_file(cur, absolute_path)
elif ext == specific_engine_extension and root_name.endswith(".sql"):
# A .sql file specific to our engine; just read and execute it
if is_worker:
......@@ -525,7 +525,7 @@ def _upgrade_existing_database(
UNAPPLIED_DELTA_ON_WORKER_ERROR % relative_path
)
logger.info("Applying engine-specific schema %s", relative_path)
executescript(cur, absolute_path)
database_engine.execute_script_file(cur, absolute_path)
elif ext in specific_engine_extensions and root_name.endswith(".sql"):
# A .sql file for a different engine; skip it.
continue
......@@ -666,7 +666,7 @@ def _get_or_create_schema_state(
) -> Optional[_SchemaState]:
# Bluntly try creating the schema_version tables.
sql_path = os.path.join(schema_path, "common", "schema_version.sql")
executescript(txn, sql_path)
database_engine.execute_script_file(txn, sql_path)
txn.execute("SELECT version, upgraded FROM schema_version")
row = txn.fetchone()
......
CREATE TABLE background_updates (
update_name text NOT NULL,
progress_json text NOT NULL,
depends_on text,
ordering integer DEFAULT 0 NOT NULL
);
ALTER TABLE ONLY background_updates
ADD CONSTRAINT background_updates_uniqueness UNIQUE (update_name);
CREATE TABLE background_updates (
update_name text NOT NULL,
progress_json text NOT NULL,
depends_on text, ordering INT NOT NULL DEFAULT 0,
CONSTRAINT background_updates_uniqueness UNIQUE (update_name)
);
This diff is collapsed.
This diff is collapsed.
CREATE TABLE state_group_edges (
state_group bigint NOT NULL,
prev_state_group bigint NOT NULL
);
CREATE SEQUENCE state_group_id_seq
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
CACHE 1;
CREATE TABLE state_groups (
id bigint NOT NULL,
room_id text NOT NULL,
event_id text NOT NULL
);
CREATE TABLE state_groups_state (
state_group bigint NOT NULL,
room_id text NOT NULL,
type text NOT NULL,
state_key text NOT NULL,
event_id text NOT NULL
);
ALTER TABLE ONLY state_groups_state ALTER COLUMN state_group SET (n_distinct=-0.02);
ALTER TABLE ONLY state_groups
ADD CONSTRAINT state_groups_pkey PRIMARY KEY (id);
CREATE INDEX state_group_edges_prev_idx ON state_group_edges USING btree (prev_state_group);
CREATE UNIQUE INDEX state_group_edges_unique_idx ON state_group_edges USING btree (state_group, prev_state_group);
CREATE INDEX state_groups_room_id_idx ON state_groups USING btree (room_id);
CREATE INDEX state_groups_state_type_idx ON state_groups_state USING btree (state_group, type, state_key);
SELECT pg_catalog.setval('state_group_id_seq', 1, false);
CREATE TABLE state_groups (
id BIGINT PRIMARY KEY,
room_id TEXT NOT NULL,
event_id TEXT NOT NULL
);
CREATE TABLE state_groups_state (
state_group BIGINT NOT NULL,
room_id TEXT NOT NULL,
type TEXT NOT NULL,
state_key TEXT NOT NULL,
event_id TEXT NOT NULL
);
CREATE TABLE state_group_edges (
state_group BIGINT NOT NULL,
prev_state_group BIGINT NOT NULL
);
CREATE INDEX state_group_edges_prev_idx ON state_group_edges (prev_state_group);
CREATE INDEX state_groups_state_type_idx ON state_groups_state (state_group, type, state_key);
CREATE INDEX state_groups_room_id_idx ON state_groups (room_id) ;
CREATE UNIQUE INDEX state_group_edges_unique_idx ON state_group_edges (state_group, prev_state_group) ;
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment