(* $Id: seqdb_fsys_types.mli 16180 2008-01-18 20:57:28Z gerd $ *) (** Filesystem type definitions *) type file_name = string (** Names. Note that filesystems have usually a length limitation *) type file_type = char (** The type may be used arbitrarily. By convention, we use: - 'f': an uncompressed file - 'c': a gzip-compressed file - 'a': a gar archive (see {!Seqdb_archive}) - 'x': unknown type *) exception File_not_found of file_name (** No such file exists *) exception File_type_mismatch of file_name (** The file exists, but does not have an acceptable type *) exception File_exists of file_name (** The file exists and is in the way *) type wr_flags = [ `Excl | `Mtime of int64 ] (** See [open_file_wr_ext] below *) (** This is the class type of a file system that uses ['file_descr] to denote open files (usually opaque). One can open a file for reading or writing, access it, and then close it after use, just as for an ordinary file system. There are, however, subtle differences to ordinary file systems. Although one can open the same file several times, one should not do so! There is no mechanism so that changes made to one file are visible in the other file. For example, after {[ let f1 = fsys # open_file_wr name ... in let f2 = fsys # open_file_wr name ... in fsys # set_file_mtime f1 1234L; let t2 = fsys # file_mtime f2 ]} it is not guaranteed that [t2] is already set to the new timestamp. Even worse, it may happen that such parallel accesses destroy parts of the file system. By calling [dispose] one can enforce that the real Unix file descriptors underlying the class are closed. It is ok to do so, and to continue to access the files that have been opened before calling [dispose]. The file descriptors are simply reopened. Note however, that [dispose] also implies that any locks are released. If another process accesses the file system while the locks are not held strange things can happen. So for ensuring consistency, one should avoid that if parallel processes write to the file system. After finishing with a file system completely, one must checkpoint it. This enforces that any remaining changes are written to disk, and that these changes are marked as valid. When using the file system the next time, one has the option of ignoring the trailing part of the data file after the valid part (in case the last access ended without checkpoint). (See {!Seqdb_fsys_ht.check_filesys}.) Note that checkpointing is a very simple mechanism that only protects the file system integrity basically. In particular, one should not expect the wonders of journaling. After a crash, it may still happen that the index as well as valid inodes contain pointers to invalid parts of the data file. Thus it is still possible that files are unreadable. However, it is always safe to append to checked file systems. *) class type ['file_descr] file_system = object method exclusive_access : unit -> unit (** If the file system supports locking, this method grants exclusive access until [dispose] is called. *) method open_file_rd : file_name -> file_type list -> 'file_descr (** Opens the file for reading, or raises [File_not_found]. The file must have the passed [file_name], and one of the types in the [file_type list]. If this list is empty, all types are permitted. If the file exists but does not have an acceptable type, [File_type_mismatch] is raised. *) method open_file_wr : file_name -> file_type list -> file_type option -> ('file_descr * bool) (** Opens the file for writing. The file must have the passed [file_name], and one of the types in the [file_type list]. If this list is empty, all types are permitted. If the [file_type option] is [Some t], the file is created if not existing. Raises [File_not_found] if this flag is not set and the file does not exist. Raises [File_type_mismatch] if the file exists but does not have an acceptable type. The method returns the open file descriptor, and whether the file has been created. The type of newly created files is [t]. *) method open_file_wr_ext : wr_flags list -> file_name -> file_type list -> file_type option -> ('file_descr * bool) (** Same as [open_file_wr], but the behavior can be modified by a list of flags: - [`Excl]: Raises [File_exists] if the file already exists - [`Mtime t]: Set the mtime initially to [t], and prevent that writing to the returned file descriptor modifíes [t]. You can end this mode by explicitly setting mtime using [set_file_mtime]. *) method file_size : 'file_descr -> int64 (** Returns the size of the open file *) method file_type : 'file_descr -> file_type (** Returns the file type *) method file_mtime : 'file_descr -> int64 (** Returns the timestamp of the last modification *) method set_file_type : 'file_descr -> file_type -> unit (** Sets the file type *) method set_file_mtime : 'file_descr -> int64 -> unit (** Sets the mtime timestamp *) method read_file : 'file_descr -> string -> int -> int64 -> int -> int (** [read_file fd s s_pos f_pos len]: Reads [len] bytes of the file [fd] at position [f_pos] and writes the bytes into [s] beginning at position [s_pos]. If more bytes are requested to be read than available at [f_pos], only the available bytes are read. The method returns the actual number of read bytes. This number is only smaller than [len] if too many bytes have been requested. *) method write_file : 'file_descr -> string -> int -> int64 -> int -> unit (** [write_file fd s s_pos f_pos len]: Writes [len] bytes to the file [fd] at position [f_pos]. These bytes are taken from the string [s] at position [s_pos]. It is an error if the string is shorter than [s_pos + len]. *) method truncate : 'file_descr -> int64 -> unit (** Truncates the file size. The new size must be less or equal than the old size *) method reserve : 'file_descr -> int64 -> unit (** Reserves space for this file size. The file size is, however, not changed. *) method close_file : 'file_descr -> unit (** Closes the file *) method delete_file : 'file_descr -> unit (** Closes and deletes the file *) method delete_name : file_name -> unit (** Deletes the named file *) method delete_name_from_index : file_name -> unit (** Only deletes the filename from the index file, but does not mark the entry as deleted. This is only allowed if the [HAVE_DUPS] feature of the fsys is enabled, as it consequently may happen that the same file name exists twice in the data part of the fsys (but not in the index). For indexes with stored hashes this method is cheaper than delete_file. For other indexes there is no advantage. *) method rename_file : 'file_descr -> file_name -> unit (** Renames the file to the new name. It is an error if there is already a file with the new name, [File_exists] is raised in this case. *) method filepos : 'file_descr -> int64 (** The exact file position of the inode of this file *) method is_last_file : 'file_descr -> bool (** Returns whether the file is physically the last one in the fsys *) method cmp_filepos : file_name -> file_name -> int (** Indicates the order in which the files are accessed best to maximize speed. Returns a value like [compare]. It is not required that the files exist. *) method guess_filepos : file_name -> int64 option (** Returns the best guess for the file position it is possible to make by only looking at the index. Returns None if it is found out that the file does not exist *) method checkpoint : ?soft:bool -> unit -> unit (** Syncs data to disk, and marks the logical file system length as valid one. {b Due to the way the filesystems are implemented it is required to checkpoint explicitly, or files may disappear, or even become unreadable.} [soft]: if true, the checkpoint is only set if the last checkpoint is too old *) method dispose : unit -> unit (** Indicates that the filesystem is currently not used, and that all OS resources should be freed. It is allowed to use a disposed filesystem at a later time. Note that a disposal does not imply a checkpoint. In order to dispose a file permanently, you have to checkpoint first, and then to dispose. *) method superblock_variable : string -> int64 (** Get a superblock variable *) method set_superblock_variable : string -> int64 -> unit (** Set a superblock variable *) end (** An iterator for a file system walks over all files. *) class type ['file_descr] file_system_iterator = object method start : unit -> 'file_descr file_system_iteration (** Starts a fresh iteration. Raises [End_of_file] if there is no file at all. *) end (** An iteration is a running instance of an iterator *) and ['file_descr] file_system_iteration = object method current_name : file_name (** Returns the current file name *) method current_file : 'file_descr (** Returns the current file descriptor. This might be cheaper than retrieving the name and opening the file afterwards. The file descr is only opened read-only. *) method next : unit -> unit (** Switches to the next file, or raises [End_of_file] if the end is reached *) method next_recoverable : unit -> unit (** When [next] throws an exception, this method might help. It tries to skip damaged file regions, and to find the next valid entry. Raises [End_of_file] is nothing can be found. *) end