(* $Id: seqdb_fsys_ht.mli 16182 2008-01-21 12:53:26Z gerd $ *) (** Filesystem implementation based on Kvseq and Hindex files *) open Seqdb_fsys_types type ht_base (** An [ht_base] is a group of file systems residing in the same Unix directory *) type ht_file_descr (** The opaque type of an open file, so that [ht_file_descr file_system] is the full type of the file systems implemented here *) type params = { ht_inode_size : int; (** ISZ (must be a multiple of 8) *) ht_table_size : int; (** HTSIZE in entries *) ht_hash_algo : Seqdb_containers.Hash_algo.hash_algo; (** HTALGO *) ht_index_type : [ `Plain | `Stored_hashes ]; (** Whether plain (CELLSZ=1) indexes or indexes with hash values (CELLSZ=2) are used *) ht_have_dups : bool; (** HAVEDUPS *) } (** The parameters from the superblocks *) type stats = { ht_table_total : int; (** How many entries in the hash table are used *) ht_table_del : int; (** How many entries are marked as deleted *) ht_data : int64; (** Total data space (used+dead), in bytes *) ht_dead_data : int64; (** Dead (wasted) data space, in bytes *) ht_used_data : int64; (** Used data space, in bytes *) } (** Statistics *) type iterator_type = [`Data|`Index] (** How to iterate over the entries: * - [`Data]: Base the iteration on the data file * - [`Index]: Base the iteration on the index file *) exception Filesys_exists of string (** The file system already exists in the base *) exception Filesys_not_found of string (** No such file system exists in the base *) val new_base : string -> ht_base (** Creates a base. The string is the Linux directory where the base resides. The directory must already exist. Optionally, the directory may contain a [GLOBAL.lock] file. *) val lock_base : ht_base -> unit (** Obtains a shared global lock. This prevents file system checks from being done while the base is in use Fails if there is no [GLOBAL.lock] file *) val names : ht_base -> string list (** Looks into the base directory and returns the names of all filesystems (i.e. all files with [.data] suffix) *) val base_dir : ht_base -> string (** Return the directory *) val create_filesys : ht_base -> string -> params -> ht_file_descr file_system (** Creates a new file system in the base. The string is the name. If the file system already exists, [Filesys_exists] is raised. *) val get_filesys : ht_base -> string -> ht_file_descr file_system (** Get the filesystem *) val check_filesys : ht_base -> string -> unit (** Performs a filesys check, and if necessary rolls it back *) val configure_filesys : ?hindex_caching:bool -> ?data_caching:bool -> ?sync_every:int -> ?big_readahead:bool -> ?fully_buffered_index:bool -> ht_base -> string -> unit (** Changes runtime parameters: * - [hindex_caching]: whether to allow page-caching of the idx file. * (true after create/get_filesys) * - [data_caching]: whether to allow page-caching of the data file. * (true after create/get_filesys) * - [sync_every]: after how many seconds files are synced to disk. * A negative value disables syncs. (600 after create/get_filesys) * - [fully_buffered_index]: The buffer for the index is made so large that * it can hold the whole index (false after create/get_filesys) *) val willneed_filesys : ht_base -> string -> unit (** Load as much as reasonable into the page cache *) val willneed_all_filesys : ht_base -> unit (** Load as much as reasonable into the page cache *) val check_all_filesys : ht_base -> unit (** Do a filesystem check if we can get the global lock exclusively *) val filesys_params : ht_base -> string -> params (** Get the parameters of the filesystem *) val dispose_all : ht_base -> unit (** Disposes all filesystems of the base *) val checkpoint_and_dispose_all : ?soft:bool -> ht_base -> unit (** Checkpoints and disposes all filesystems of the base that have been used by this program [soft]: if true, the checkpoint is only set if the last checkpoint is too old *) type dedup_mode = [ `Off | `Twopass | `Indexcheck ] (** How to identify duplicates in a file system with [HAVEDUPS]. See [get_iterator] *) val get_iterator : ?at_filepos:string -> ?dedup_mode:dedup_mode -> ht_base -> string -> ht_file_descr file_system_iterator (** Get an iterator for the filesystem. This iterator iterates over the kvseq file, and emulates a file-by-file iteration on top of this. The files are visited in inode order. [at_filepos]: If passed, the iteration starts at this file position and not at the beginnning of the fsys. [dedup_mode]: If the fsys has the [HAVEDUPS] feature, the question is whether to detect duplicate entries and how: - [`Off]: Duplicates are not detected. This is the default. - [`Twopass]: Two passes are made over the entries. In the first pass it is recorded which entries exist where. In the second pass the duplicate entries can be skipped. The advantage of this mode is that there are no accesses to the index at all, and it can be used to rebuild the index. - [`Indexcheck]: For every entry it is checked whether it occurs in the index. If so, the entry is returned, otherwise it is skipped. Note that disposing and re-opening the filesystem is not supported while the iterator is in use. *) val get_idx_iterator : ht_base -> string -> ht_file_descr file_system_iterator (** Get an iterator for the filesystem. This iterator iterates over the index file, and emulates a file-by-file iteration on top of this. The files are visited in inode order. Note that disposing and re-opening the filesystem is not supported while the iterator is in use. *) val get_stats : ht_base -> string -> stats (** Read the statistics *) val reindex : ?fault_tolerant:bool -> ?repair:bool -> ?itype:iterator_type -> ?fully_buffered_index:bool -> ht_base -> string -> params -> bool (** Create a new .idx file with the given params. (Note: The inode size cannot be changed with this.) Returns whether the new index replaced the previous one. If [fault_tolerant], errors are ignored, and the new index replaces the current one even in case of errors. If [repair], invalid inodes are deleted in the data file. In any case, they are not added to the new index. The iterator type [itype] can be selected; it defaults to [`Data]. A [`Data] iterator can be used to fill an empty index with the files found in the data file. Furthermore, a [`Data] iterator is a good proof whether the filesystem is in a consistent state. The [`Index] iterator only visits the files that are in the old index. Generally, it is more robust. *) val compact : ?fault_tolerant:bool -> ?itype:iterator_type -> ?fully_buffered_index:bool -> ht_base -> string -> params -> bool (** Create new .data and .idx files with the given params. Returns whether the new files replaced the previous ones. If [fault_tolerant], errors are ignored, and the new files replace the previous ones even in case of errors. [itype]: See [reindex]. *) (* val impl_name : ht_base -> string -> string *) (* Return the [impl_name] of the fsys: "Seqdb_fsys_ht" for all fsys not using plugins, otherwise the [impl_name] of the plugin. *) (* Remarks: * * - The [reserve] method is unimplemented and currently a no-op * * - Filesystems automatically lock the underlying files (using * additional .lock files). Read accesses need only shared locks, * and write accesses need exclusive locks. The locks are released * when [dispose] is invoked. *) (**/**) (**********************************************************************) (* Plugin API (internal) *) (**********************************************************************) (* Don't access this API from outside the files library!!! *) type fsys_config = { hindex_caching : bool; data_caching : bool; sync_every : int; big_readahead : bool; fully_buffered_index : bool; mutable_mtime : bool; (* Whether mtime is mutable or not. Immutable mtime means that the mtime can only be set once at file creation time. Later writes to the files do not update the mtime. [set_file_mtime] raises [Failure "immutable mtime"]. *) inode_relocatable : bool; (* Whether the inode is relocatable. If relocations are forbidden, the [filepos] method always returns the same value for a file. If an operation is tried to be executed that would require a relocation, the [Failure "inodes not relocatable"] will be raised. *) onsync : unit -> unit; (* This function is called after syncing the data file *) read_only_mode : bool; (* In ro mode the fsys is unlocked after reading the superblock. This means that another process can write to the file in parallel. Useful for Seqdb_fsys_ao where writes are always appends, so reads and writes do not interfer with each other. *) } class type ht_file_system_t = object inherit [ht_file_descr] file_system method check : unit -> bool method get_config : fsys_config method configure : fsys_config -> unit method willneed : unit -> unit method params : params method stats : stats method is_open : bool method iterator : string option -> dedup_mode -> ht_file_descr file_system_iterator (* Get a new data file iterator. The arg is the file position to start at (8 bytes). None means: start at the beginning *) method idx_iterator : unit -> ht_file_descr file_system_iterator method reindex : bool -> bool -> bool -> iterator_type -> params -> bool method create_same : string -> bool -> ht_file_system_t (* Create a new fsys of the same kind as this one, but with a new name (passed as string). The bool arg says whether an existing fsys is to be truncated. The new fsys is initially exclusively locked. The new fsys is not put into any base. *) method rename_files : string -> unit (* Rename the files of this fsys. Locking is ignored *) end class type plugin = object method impl_name : string (* By convention the O'Caml module name implementing the plugin *) method compatibility : ht_base -> string -> int (* The grade of compatibility. 0 means this plugin is inappropriate. The higher the number the better *) method get_derived : ht_base -> string -> ht_file_system_t -> ht_file_system_t (* [get_derived base name fsys]: The existing filesystem [name] of [base] is opened. In [fsys] the opened ht filesystem is passed. The result of this method is cached in [base], and will be the object whose operations are invoked *) end val register_plugin : plugin -> unit val override : ht_base -> string -> ht_file_system_t -> unit val create_ht_filesys : ht_base -> string -> params -> ht_file_system_t val plugin_reconfigure : ht_base -> string -> plugin -> (fsys_config -> fsys_config) -> unit