(* $Id: addtime.ml 16166 2008-01-17 22:59:55Z gerd $ *) (* Convert any fsys to an append-only fsys with timemarks. The mtimes are invented, and count from 1 onwards. *) open Seqdb_fsys_ht open Printf let itype_of_string s = match s with | "plain" -> `Plain | "stored_hashes" -> `Stored_hashes | _ -> raise(Arg.Bad("bad index type: " ^ s)) let copy ?(fault_tolerant=false) ?(itype=`Data) b name_from name_to p ao_p docs_per_second = let sys = get_filesys b name_from in configure_filesys ~fully_buffered_index:true ~hindex_caching:true ~data_caching:true ~sync_every:(-1) b name_from; let sys' = Seqdb_fsys_ao.create_ao_filesys b name_to p ao_p in configure_filesys ~fully_buffered_index:true ~hindex_caching:true ~data_caching:true ~sync_every:(-1) b name_to; let have_errors = ref false in (* Now copy all files from [sys] to [sys']: *) let s = String.create 1048576 (* 1M *) in let mtime = ref 1L in let mtime_n = ref 0 in ( try let iterator = match itype with | `Data -> get_iterator b name_from | `Index -> get_idx_iterator b name_from in let iter = iterator # start() in while true do let name = iter # current_name in let to_del = ref None in ( try let hf = iter # current_file in (* Here we would need [reserve]... *) let ftype = sys # file_type hf in let hf', _ = sys' # open_file_wr_ext [ `Mtime !mtime ] name [] (Some ftype) in to_del := Some hf'; (* in case of an error *) incr mtime_n; if !mtime_n = docs_per_second then ( mtime := Int64.succ !mtime; mtime_n := 0 ); let k = ref 0L in let size = sys # file_size hf in while !k < size do let n = sys # read_file hf s 0 !k (String.length s) in assert(n <> 0); sys' # write_file hf' s 0 !k n; k := Int64.add !k (Int64.of_int n) done with | error -> Log.logf `Err "addtime: exception at %s: %s" name (Printexc.to_string error); have_errors := true; ( match !to_del with | None -> () | Some hf' -> sys' # delete_file hf'; ) ); try iter # next() with | End_of_file as e -> raise e | error -> Log.logf `Err "addtime: Exception after %s: %s " name (Printexc.to_string error); have_errors := true; iter # next_recoverable(); Log.logf `Err "addtime: Skipping damaged file region. Next file is %s" iter#current_name done with | End_of_file -> () ); sys # dispose(); sys' # checkpoint(); sys' # dispose(); if fault_tolerant || not !have_errors then ( Log.logf `Notice "addtime: copied without errors"; ) else ( Log.logf `Notice "addtime: there have been errors"; ) ;; let main() = Seqdb_fsys_ao.init(); let pgm_name = Filename.basename Sys.argv.(0) in let index_type = ref None in let index_size = ref None in let index_iteration = ref false in let tolerant = ref false in let timemark_period = ref 300 in let docs_per_second = ref 1 in let fsyslist = ref [] in Arg.parse [ "-index-size", Arg.Int (fun k -> index_size := Some k), " Set the size of the index"; "-index-type", (Arg.String (fun s -> index_type := Some(itype_of_string s))), "(plain|stored_hashes) Change the index type"; "-fault-tolerant", Arg.Set tolerant, " Try to ignore some errors"; "-index-iteration", Arg.Set index_iteration, " Create the new index by iterating over the old index and checking"; "-timemark-every", Arg.Set_int timemark_period, " Set a timemark every n seconds"; "-docs-per-seconds", Arg.Set_int docs_per_second, " How many docs get the same mtime"; ] (fun s -> fsyslist := !fsyslist @ [s]) (sprintf "usage: %s [options] directory/fsys_from fsys_to" pgm_name); let fsys_from, fsys_to = match !fsyslist with | [ f; t] -> (f,t) | _ -> failwith "exactly 2 file args expected" in if String.contains fsys_to '/' then failwith "second file arg must not contain slashes"; let base = new_base (Filename.dirname fsys_from) in let fsys_from = Filename.basename fsys_from in let old_params = filesys_params base fsys_from in let params = match !index_size with | None -> old_params | Some s -> { old_params with ht_table_size = s } in let params = match !index_type with | None -> params | Some it -> { params with ht_index_type = it } in let itype = if !index_iteration then `Index else `Data in let ao_params = { Seqdb_fsys_ao.ao_time_mark_period = !timemark_period } in copy ~fault_tolerant:!tolerant ~itype base fsys_from fsys_to params ao_params !docs_per_second ;; main();;