Hey I'm having a bit of trouble getting the snapsh...
# community-help
s
Hey I'm having a bit of trouble getting the snapshot endpoint to actually save to specified path. I'm running docker compose it looks like this
Copy code
services:
  typesense:
    image: typesense/typesense:27.1
    restart: always
    ports:
      - "127.0.0.1:8108:8108"
    volumes:
      - ./typesense-data:/data
      - ./backups:/backups
    command:
      - --data-dir
      - /data
      - --api-key=${TYPESENSE_API_KEY}
      - --enable-cors
The typesense instance itself works fine, and if I jump into the docker shell I can see the backups directory mounted I call the snapshot endpoint, then I see the following my logs.
Copy code
I20250207 18:59:03.446450    74 raft_server.cpp:897] Triggering an on demand snapshot...
I20250207 18:59:03.446684   138 node.cpp:943] node default_group:172.18.0.2:8107:8108 starts to do snapshot
I20250207 18:59:03.447271   296 raft_server.cpp:554] on_snapshot_save
I20250207 18:59:03.447347   296 batched_indexer.cpp:571] Serialized 0 in-flight requests for snapshot.
I20250207 18:59:03.494668   296 raft_server.cpp:480] save_snapshot called
I20250207 18:59:03.507514   296 snapshot.cpp:642] Deleting /data/state/snapshot/snapshot_00000000000001100170
I20250207 18:59:03.507591   296 snapshot.cpp:648] Renaming /data/state/snapshot/temp to /data/state/snapshot/snapshot_00000000000001100170
I20250207 18:59:03.507639   296 snapshot.cpp:519] Deleting /data/state/snapshot/snapshot_00000000000001100168
I20250207 18:59:03.517138   296 snapshot_executor.cpp:234] node default_group:172.18.0.2:8107:8108 snapshot_save_done, last_included_index=1100170 last_included_term=18
I20250207 18:59:03.530437   294 log.cpp:1150] log save_meta /data/state/log/log_meta first_log_index: 1100169 time: 12879
I20250207 18:59:03.599927   303 raft_server.cpp:522] Copying system snapshot to external snapshot directory at /backups
E20250207 18:59:03.600128   303 <http://file_util_posix.cc:324]|file_util_posix.cc:324]> CopyDirectory() couldn't create directory: /backups/state errno = 2
E20250207 18:59:03.600276   303 <http://file_util_posix.cc:324]|file_util_posix.cc:324]> CopyDirectory() couldn't create directory: /backups/state errno = 2
E20250207 18:59:03.600476   296 raft_server.cpp:1179] On demand snapshot failed, error:
E20250207 18:59:03.600589   296 raft_server.cpp:1183] Copy failed.
I20250207 18:59:03.605010   303 raft_server.cpp:933] Dummy write to <http://172.18.0.2:8108/health>, status = 200, response = {"ok":true}
I20250207 18:59:03.605065   303 raft_server.cpp:547] save_snapshot done
I20250207 18:59:05.621717   262 raft_server.cpp:706] Term: 18, pending_queue: 0, last_index: 1100171, committed: 1100171, known_applied: 1100171, applying: 0, pending_writes: 0, queued_writes: 0, local_sequence: 5634244
I20250207 18:59:05.622058   290 raft_server.h:60] Peer refresh succeeded!
1
Not sure how to delete this previous message but my problem is solved now I believe it was down to permissions. In case it helps anyone else this is the script I've created to sync typesense data -> a cloudflare r2 bucket #!/usr/bin/env python3 import os import sys import requests import boto3 import shutil import subprocess import logging import time from datetime import datetime from pathlib import Path from dotenv import load_dotenv load_dotenv() # Retrieve environment variables TYPESENSE_HOST = os.getenv('TYPESENSE_HOST', 'http://localhost:8108') TYPESENSE_API_KEY = os.getenv('TYPESENSE_API_KEY') R2_ENDPOINT = os.getenv('R2_ENDPOINT') R2_ACCESS_KEY = os.getenv('R2_ACCESS_KEY') R2_SECRET_KEY = os.getenv('R2_SECRET_KEY') R2_BUCKET = os.getenv('R2_BUCKET') # Directory to store the tar archive (host path) BACKUP_LOCATION = os.getenv('BACKUP_LOCATION') # Dedicated backups directory (host path) for snapshot data HOST_SNAPSHOT_DIR = os.getenv('HOST_SNAPSHOT_DIR', '/home/user/services/typesense/backups') # Container path for snapshot data (Typesense will write snapshots here) CONTAINER_SNAPSHOT_DIR = os.getenv('CONTAINER_SNAPSHOT_DIR', '/backups') REQUIRED_ENV_VARS = [ 'TYPESENSE_API_KEY', 'R2_ENDPOINT', 'R2_ACCESS_KEY', 'R2_SECRET_KEY', 'R2_BUCKET' ] missing_vars = [var for var in REQUIRED_ENV_VARS if not os.getenv(var)] if missing_vars: print(f"Error: Missing required environment variables: {', '.join(missing_vars)}") sys.exit(1) # Determine current directory and backup location (host path) current_dir = Path(file).parent backup_location = Path(BACKUP_LOCATION) if BACKUP_LOCATION else current_dir backup_location.mkdir(parents=True, exist_ok=True) # Ensure the host snapshot (backups) directory exists host_snapshot_dir = Path(HOST_SNAPSHOT_DIR) host_snapshot_dir.mkdir(parents=True, exist_ok=True) # Set up logging log_dir = current_dir / 'logs' log_dir.mkdir(exist_ok=True) logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler(log_dir / 'backup.log'), logging.StreamHandler() ] ) logger = logging.getLogger(name) def get_snapshot_directory(): """ Use the HOST_SNAPSHOT_DIR as the snapshot directory. Clear its contents before use (ensure this directory is dedicated for snapshots). """ snapshot_dir = host_snapshot_dir if snapshot_dir.exists(): shutil.rmtree(snapshot_dir) snapshot_dir.mkdir(parents=True, exist_ok=True) logger.info(f"Using host snapshot directory: {snapshot_dir}") return snapshot_dir def create_snapshot(): try: headers = { 'Content-Type': 'application/json', 'X-TYPESENSE-API-KEY': TYPESENSE_API_KEY } # Use the host snapshot (backups) directory snapshot_dir = get_snapshot_directory() # Call the snapshot API with the container path params = { 'snapshot_path': CONTAINER_SNAPSHOT_DIR } logger.info(f"Calling snapshot API with snapshot_path={CONTAINER_SNAPSHOT_DIR}") response = requests.post( f"{TYPESENSE_HOST}/operations/snapshot", headers=headers, params=params ) response.raise_for_status() if not response.json().get('success'): raise Exception("Snapshot creation failed") logger.info("Snapshot API returned success. Waiting for snapshot data to be written...") # Increased timeout to 180 seconds in case snapshot takes longer. timeout = 180 # seconds poll_interval = 2 # seconds elapsed = 0 # First, wait for the expected subdirectory ('state') to appear. state_dir = snapshot_dir / 'state' while not state_dir.exists() and elapsed < timeout: logger.info("Waiting for 'state' subdirectory to appear in snapshot directory...") time.sleep(poll_interval) elapsed += poll_interval if not state_dir.exists(): raise Exception("Timeout waiting for 'state' directory to be created.") logger.info(f"'state' subdirectory detected at {state_dir}. Beginning to poll for snapshot data completeness...") previous_size = -1 stable_iterations = 0 # Reset elapsed for data polling stability check. elapsed = 0 while elapsed < timeout: files = list(state_dir.rglob("*")) total_size = sum(f.stat().st_size for f in files if f.is_file()) logger.info(f"Polling 'state' directory: found {len(files)} file(s), total size: {total_size} bytes.") if total_size > 1024: # Expect at least 1KB of data (adjust threshold if necessary) if total_size == previous_size: stable_iterations += 1 logger.info(f"Total size stable for {stable_iterations} iteration(s).") else: stable_iterations = 0 if stable_iterations >= 2: # Data size is stable across two consecutive polls. logger.info(f"Snapshot data appears complete and stable (total size: {total_size} bytes).") break previous_size = total_size time.sleep(poll_interval) elapsed += poll_interval else: raise Exception("Timeout waiting for snapshot data to be fully written.") # Prepare the backup file path outside the snapshot directory. timestamp = datetime.now().strftime('%Y%m%d-%H%M%S') backup_filename = f"typesense-backup-{timestamp}.tar.gz" backup_file = backup_location / backup_filename logger.info(f"Creating tar archive: {backup_file}") subprocess.run([ 'tar', '-czf', str(backup_file), '-C', str(snapshot_dir), '.' ], check=True) return backup_file, snapshot_dir except requests.RequestException as e: logger.error(f"HTTP request failed: {str(e)}") raise except subprocess.CalledProcessError as e: logger.error(f"Tar command failed: {str(e)}") raise except Exception as e: logger.error(f"Unexpected error during snapshot: {str(e)}") raise def upload_to_r2(file_path): try: logger.info("Initialising R2 client") s3_client = boto3.client('s3', endpoint_url=R2_ENDPOINT, aws_access_key_id=R2_ACCESS_KEY, aws_secret_access_key=R2_SECRET_KEY, region_name='auto', config=boto3.session.Config( signature_version='s3v4', retries={'max_attempts': 3}, ) ) filename = file_path.name logger.info(f"Starting upload of {filename} to R2") s3_client.upload_file(str(file_path), R2_BUCKET, filename) logger.info(f"Successfully uploaded {filename} to R2") except Exception as e: logger.error(f"Upload failed: {str(e)}") raise def cleanup(backup_file, snapshot_dir): try: if backup_file and backup_file.exists(): backup_file.unlink() logger.info(f"Cleaned up backup file: {backup_file}") if snapshot_dir and snapshot_dir.exists(): shutil.rmtree(snapshot_dir) logger.info(f"Cleaned up snapshot directory: {snapshot_dir}") except Exception as e: logger.error(f"Cleanup failed: {str(e)}") def main(): backup_file = None snapshot_dir = None try: logger.info("Starting backup process") backup_file, snapshot_dir = create_snapshot() upload_to_r2(backup_file) logger.info("Backup completed successfully") except Exception as e: logger.error(f"Backup failed: {str(e)}") sys.exit(1) finally: if backup_file or snapshot_dir: cleanup(backup_file, snapshot_dir) if name == "__main__": main()