(* $Id: fct.ml 16180 2008-01-18 20:57:28Z gerd $ *) (* File Container Tool *) open Printf open Seqdb_containers module H = Autoindex(Kvseq) module P = Perm(Kvseq) let read_int64 s = Rtypes.int64_of_int8 (Rtypes.read_int8 s 0) let write_int64 n = Rtypes.int8_as_string (Rtypes.int8_of_int64 n) let show_format v = if v = Sb_consts.kvseq_format then "kvseq" else if v = Sb_consts.hindex_format then "hindex" else if v = Sb_consts.perm_format then "perm" else "unknown" let show_purpose v = Rtypes.int8_as_string (Rtypes.int8_of_int64 v) let show_repr v = if v < 0L then "illegal" else if v = Sb_consts.int8_repr then "variable length up to 255 bytes" else if v = Sb_consts.int16_repr then "variable length up to 64 Kbytes" else if v = Sb_consts.int32_repr then "variable length up to 4 Gbytes" else if v = Sb_consts.int64_repr then "variable length up to 8 Ebytes (exa bytes)" else if v >= Sb_consts.fixed_repr_min && v <= Sb_consts.fixed_repr_max then sprintf "fixed length of %Ld bytes" (Int64.sub v Sb_consts.fixed_repr_min) else if v >= Sb_consts.lim8_repr_min && v <= Sb_consts.lim8_repr_max then sprintf "limited length up to %Ld bytes" (Int64.succ (Int64.sub v Sb_consts.lim8_repr_min)) else sprintf "unknown" let show_htalgo v = if v = Sb_consts.md5_algo then "MD5" else "unknown" let simple_descr fd = ( object method file_descr = fd method dispose_hint() = () end ) let named_descr name fd = ( object method name = name method file_descr = fd method dispose_hint() = () end ) let cmd_superblock pgm_name = let list_flag = ref false in let mods = ref [] in let file = ref None in Arg.parse [ "-list", Arg.Set list_flag, " List the superblock (default)"; "-set", Arg.Tuple(let name = ref "" in [ Arg.Set_string name; Arg.String (fun s -> let n = try Int64.of_string s with _ -> raise(Arg.Bad("Not an int64: " ^ s)) in mods := !mods @ [ !name, n ] ) ]), " Set the variable to a numeric "; ] (fun s -> match !file with | None -> file := Some s | Some _ -> raise(Arg.Bad("Unexpected arg: " ^ s))) (sprintf "usage: %s superblock [options] file" pgm_name); let file = match !file with | None -> failwith "Missing file argument" | Some file -> file in if !mods <> [] then ( let fd = Unix.openfile file [ Unix.O_RDWR ] 0 in let io = simple_descr fd in let sb = Superblock.read_superblock io in List.iter (fun (name, value) -> Superblock.set_variable sb name value ) !mods; Superblock.write_superblock io sb; Unix.close fd; print_endline "Modified superblock" ); if !mods = [] || !list_flag then ( let fd = Unix.openfile file [ Unix.O_RDONLY ] 0 in let io = simple_descr fd in let sb = Superblock.read_superblock io in let vars = Superblock.variables sb in let add_info = [ Sb_consts.format_name, show_format; Sb_consts.purpose_name, show_purpose; Sb_consts.keyrepr_name, show_repr; Sb_consts.valrepr_name, show_repr; Sb_consts.htalgo_name, show_htalgo ] in printf "Superblock contents:\n"; List.iter (fun (name, value) -> let info = try let f = List.assoc name add_info in "(" ^ f value ^ ")" with Not_found -> "" in printf " %-8s: %19Ld %s\n" name value info ) vars; Unix.close fd; ) ;; let repr_of_str s = match s with | "var8" -> `Int8 | "var16" -> `Int16 | "var32" -> `Int32 | "var64" -> `Int64 | _ -> ( try `Fixed (int_of_string s) with | _ -> failwith("Bad argument: " ^ s) ) ;; let htalgo_of_str s = match s with | "MD5" -> `MD5 | _ -> failwith("Bad argument: " ^ s) ;; let fmt_of_str s = match s with | "kvseq" -> `Kvseq | "hindex" -> `Hindex | "perm" -> `Perm | _ -> failwith("Bad argument: " ^ s) ;; let cmd_create pgm_name = (* None: take the library's defaults: *) let sbsize = ref None in let htsize = ref None in let cellsz = ref 1 in let fileincr = ref None in let supports_deletions = ref true in let keyrepr = ref None in let valrepr = ref None in let alignment = ref None in let have_statistics = ref true in let hash_algo = ref None in let purpose = ref None in (* Other args: *) let format = ref None in let file = ref None in let for_file = ref None in Arg.parse [ "-sbsize", Arg.Int (fun n -> sbsize := Some n), " Create superblock of this size"; "-htsize", Arg.Int (fun n -> htsize := Some(Int64.of_int n)), " Sets the size of the hash table in #entries (only for hindex)"; "-cellsz", Arg.Set_int cellsz, " n=1 puts only kv positions into index, n=2 also puts hashes into index (only hindex)"; "-fileincr", Arg.Int (fun n -> fileincr := Some (Int64.of_int n)), " Set the file increment (only for kvseq and perm)"; "-no-delflag", Arg.Clear supports_deletions, " Omit the delete flag (only for kvseq)"; "-keyrepr", Arg.String (fun s -> keyrepr := Some(repr_of_str s)), "(var8|var16|var32|var64|) Set the repr of keys (only for kvseq)"; "-valrepr", Arg.String (fun s -> valrepr := Some(repr_of_str s)), "(var8|var16|var32|var64|) Set the repr of values (only for kvseq)"; "-alignment", Arg.Int (fun n -> alignment := Some n), " Enable alignment (only for kvseq)"; "-no-statistics", Arg.Clear have_statistics, " Disable statistics (ENTRIES, AENTRIES - only for kvseq and hindex)"; "-hash-algo", Arg.String (fun s -> hash_algo := Some(htalgo_of_str s)), "(MD5) Set the hash algorithm"; "-purpose", Arg.String (fun s -> purpose := Some s), " Set the purpose (up to 8 chars)"; "-for", Arg.String (fun s -> for_file := Some s), " Create the hindex or perm file for this kvseq file"; ] (fun s -> match !format with | None -> format := Some(fmt_of_str s) | Some _ -> ( match !file with | None -> file := Some s | Some _ -> raise(Arg.Bad ("Unexpected arg: " ^ s)) ) ) (sprintf "usage: %s create (kvseq|hindex|perm) " pgm_name); let (format, file) = match (!format, !file) with | (Some fmt), (Some file) -> (fmt, file) | _ -> failwith ("Too few arguments") in match format with | `Kvseq -> let fd = Unix.openfile file [ Unix.O_RDWR; Unix.O_CREAT; Unix.O_EXCL ] 0o666 in let io = simple_descr fd in let f = Kvseq.create ?sbsize:!sbsize ?fileincr:!fileincr ~supports_deletions:!supports_deletions ?keyrepr:!keyrepr ?valrepr:!valrepr ?alignment:!alignment ~have_statistics:!have_statistics ?suggested_hash_algo:!hash_algo ?purpose:!purpose io in Kvseq.sync f; Unix.close fd; printf "Created kvseq file %s\n" file | `Hindex -> let for_file = match !for_file with | None -> failwith "-for is required to create an hindex file" | Some f -> f in let kv_fd = Unix.openfile for_file [ Unix.O_RDONLY ] 0 in let kv_io = simple_descr kv_fd in let kv = Kvseq.access kv_io in let fd = Unix.openfile file [ Unix.O_RDWR; Unix.O_CREAT; Unix.O_EXCL ] 0o666 in let io = simple_descr fd in let hi = H.create ?sbsize:!sbsize ~have_statistics:!have_statistics ?htsize:!htsize ~cellsz:!cellsz ?hash_algo:!hash_algo ?purpose:!purpose kv io in H.sync hi; Unix.close fd; Unix.close kv_fd; printf "Created hindex file %s\n" file | `Perm -> let for_file = match !for_file with | None -> failwith "-for is required to create a perm file" | Some f -> f in let kv_fd = Unix.openfile for_file [ Unix.O_RDONLY ] 0 in let kv_io = simple_descr kv_fd in let kv = Kvseq.access kv_io in let fd = Unix.openfile file [ Unix.O_RDWR; Unix.O_CREAT; Unix.O_EXCL ] 0o666 in let io = simple_descr fd in let perm = P.create ?sbsize:!sbsize ?hash_algo:!hash_algo ?fileincr:!fileincr ?purpose:!purpose kv io in P.sync perm; Unix.close fd; Unix.close kv_fd; printf "Create perm file %s\n" file ;; let file_contents file = let st = Unix.LargeFile.stat file in let n = Int64.to_int st.Unix.LargeFile.st_size in let s = String.create n in let fd = Unix.openfile file [Unix.O_RDONLY] 0 in Netsys.really_read fd s 0 n; s ;; let cmd_add pgm_name = let key = ref None in let value = ref None in let mode = ref `Single in let kvseq_file = ref None in let other_file = ref None in Arg.parse [ "-key", Arg.String (fun s -> key := Some s), " Set the key to add directly"; "-key-file", Arg.String (fun s -> key := Some (file_contents s)), " The key is read from this file"; "-value", Arg.String (fun s -> value := Some s), " Set the value to add directly"; "-value-file", Arg.String (fun s -> value := Some(file_contents s)), " The value is read from this file"; "-add-files", Arg.Unit (fun () -> mode := `Files), " Read the files to add from stdin"; ] (fun s -> match !kvseq_file with | None -> kvseq_file := Some s | Some _ -> ( match !other_file with | None -> other_file := Some s | Some _ -> raise(Arg.Bad ("Unexpected arg: " ^ s)) ) ) (sprintf "usage: %s add [options] kvseq-file [hindex-or-perm-file]" pgm_name); let kvseq_file = match !kvseq_file with | None -> failwith "Missing argument" | Some f -> f in let kv_io = let fd = Unix.openfile kvseq_file [Unix.O_RDWR] 0 in simple_descr fd in let kv = Kvseq.access kv_io in Kvseq.configure ~auto_fadvise:true kv; let other_io_opt = match !other_file with | None -> None | Some f -> let fd = Unix.openfile f [Unix.O_RDWR] 0 in let io = named_descr f fd in Some io in let other_opt = match other_io_opt with | None -> None | Some io -> let io' = (io :> Seqdb_rdwr.file_descr) in let sb = Superblock.read_superblock io' in let fmt_val = Superblock.variable sb Sb_consts.format_name in if fmt_val = Sb_consts.hindex_format then ( let other = H.access kv io' in H.configure ~auto_fadvise:true other; Some (`Hindex other) ) else if fmt_val = Sb_consts.perm_format then ( let other = P.access kv io' in P.configure ~auto_fadvise:true other; Some (`Perm other) ) else failwith ("File has wrong format: " ^ io#name) in let add k v = let e = Kvseq.add kv { Kvseq.delflag = false; key = k; value = v } in match other_opt with | None -> () | Some (`Hindex other) -> let he_opt = try Some(H.lookup other k) with Not_found -> None in ( match he_opt with | Some he -> if Kvseq.supports_deletions kv then Kvseq.delete (H.get_contents he); H.replace he e | None -> ignore(H.add other e) ) | Some (`Perm other) -> ignore(P.add other e) in ( match !mode with | `Single -> let k = match !key with | Some k -> k | None -> failwith "No key" in let v = match !value with | Some v -> v | None -> failwith "No value" in add k v | `Files -> try while true do let k = input_line stdin in let v = file_contents k in add k v done with | End_of_file -> () ); ( match other_opt with | None -> () | Some (`Hindex other) -> H.sync other | Some (`Perm other) -> P.sync other ); ( match other_io_opt with | None -> () | Some io -> Unix.close io#file_descr ); Kvseq.sync kv; Unix.close kv_io#file_descr ;; let cmd_list pgm_name = let inc_del = ref false in let kvseq_file = ref None in let other_file = ref None in Arg.parse [ "-include-deleted", Arg.Set inc_del, " Also list deleted entries"; ] (fun s -> match !kvseq_file with | None -> kvseq_file := Some s | Some _ -> ( match !other_file with | None -> other_file := Some s | Some _ -> raise(Arg.Bad ("Unexpected arg: " ^ s)) ) ) (sprintf "usage: %s list [options] kvseq-file [perm-file]" pgm_name); let show_entry e = let delflag = Kvseq.get_delflag e in if not delflag || !inc_del then ( printf "@0x%016Lx%s:\n" (Kvseq.int64_of_pointer(Kvseq.get_pointer e)) (if delflag then " (deleted)" else ""); printf " \"%s\"\n" (String.escaped (Kvseq.get_key e)) ) in let kvseq_file = match !kvseq_file with | None -> failwith "Missing argument" | Some f -> f in let kv_io = let fd = Unix.openfile kvseq_file [Unix.O_RDWR] 0 in simple_descr fd in let kv = Kvseq.access kv_io in Kvseq.configure ~auto_fadvise:true kv; let other_io_opt = match !other_file with | None -> None | Some f -> let fd = Unix.openfile f [Unix.O_RDWR] 0 in let io = named_descr f fd in Some io in let other_opt = match other_io_opt with | None -> None | Some io -> let io' = (io :> Seqdb_rdwr.file_descr) in let sb = Superblock.read_superblock io' in let fmt_val = Superblock.variable sb Sb_consts.format_name in if fmt_val = Sb_consts.perm_format then ( let other = P.access kv io' in P.configure ~auto_fadvise:true other; Some (`Perm other) ) else failwith ("File has wrong format: " ^ io#name) in ( match other_opt with | None -> Kvseq.fadvise_iterating kv; ( try let e = ref (Kvseq.first_entry kv) in while true do show_entry !e; e := Kvseq.next_entry !e done with End_of_file -> () ) | Some (`Perm other) -> (* CHECK: Maybe we should FADV_RANDOM for kv? *) let n = P.size other in let k = ref 0L in while !k < n do let pe = P.lookup other !k in let e = P.get_contents pe in show_entry e; k := Int64.succ !k done ); ( match other_opt with | None -> () | Some (`Hindex other) -> H.sync other | Some (`Perm other) -> P.sync other ); ( match other_io_opt with | None -> () | Some io -> Unix.close io#file_descr ); Kvseq.sync kv; Unix.close kv_io#file_descr ;; let cmd_get pgm_name = let which = ref `None in let kvseq_file = ref None in let other_file = ref None in Arg.parse [ "-at", Arg.String (fun s -> try which := `At (Int64.of_string s) with _ -> failwith("Bad number: " ^ s)), " Get the value at position n in the kvseq file"; "-key", Arg.String (fun s -> which := `Key s), " Get the value for this key (requires hindex file)"; "-index", Arg.String (fun s -> try which := `Index (Int64.of_string s) with _ -> failwith("Bad number: " ^ s)), " Get the value at index n in the perm file"; ] (fun s -> match !kvseq_file with | None -> kvseq_file := Some s | Some _ -> ( match !other_file with | None -> other_file := Some s | Some _ -> raise(Arg.Bad ("Unexpected arg: " ^ s)) ) ) (sprintf "usage: %s get [options] kvseq-file [hindex-or-perm-file]" pgm_name); let kvseq_file = match !kvseq_file with | None -> failwith "Missing argument" | Some f -> f in let kv_io = let fd = Unix.openfile kvseq_file [Unix.O_RDWR] 0 in simple_descr fd in let kv = Kvseq.access kv_io in Kvseq.configure ~auto_fadvise:true kv; let other_io_opt = match !other_file with | None -> None | Some f -> let fd = Unix.openfile f [Unix.O_RDWR] 0 in let io = named_descr f fd in Some io in let other_opt = match other_io_opt with | None -> None | Some io -> let io' = (io :> Seqdb_rdwr.file_descr) in let sb = Superblock.read_superblock io' in let fmt_val = Superblock.variable sb Sb_consts.format_name in if fmt_val = Sb_consts.perm_format then ( let other = P.access kv io' in P.configure ~auto_fadvise:true other; Some (`Perm other) ) else if fmt_val = Sb_consts.hindex_format then ( let other = H.access kv io' in H.configure ~auto_fadvise:true other; Some (`Hindex other) ) else failwith ("File has wrong format: " ^ io#name) in ( match !which with | `None -> () | `At k -> let s = write_int64 k in let e = Kvseq.lookup kv (Kvseq.pointer_of_string s) in print_string (Kvseq.get_value e) | `Key k -> ( match other_opt with | Some(`Hindex other) -> let e = try H.lookup other k with Not_found -> failwith "Not found" in let e' = H.get_contents e in print_string (Kvseq.get_value e') | _ -> failwith ("For -key an hindex file is required") ) | `Index k -> ( match other_opt with | Some(`Perm other) -> let e = P.lookup other k in let e' = P.get_contents e in print_string (Kvseq.get_value e') | _ -> failwith ("For -index a perm file is required") ) ); ( match other_opt with | None -> () | Some (`Hindex other) -> H.sync other | Some (`Perm other) -> P.sync other ); ( match other_io_opt with | None -> () | Some io -> Unix.close io#file_descr ); Kvseq.sync kv; Unix.close kv_io#file_descr ;; let cmd_group pgm_name = let kvseq_file = ref None in let perm_file = ref None in Arg.parse [ ] (fun s -> match !kvseq_file with | None -> kvseq_file := Some s | Some _ -> ( match !perm_file with | None -> perm_file := Some s | Some _ -> raise(Arg.Bad ("Unexpected arg: " ^ s)) ) ) (sprintf "usage: %s group [options] kvseq-file perm-file" pgm_name); let kvseq_file = match !kvseq_file with | None -> failwith "Missing argument" | Some f -> f in let perm_file = match !perm_file with | None -> failwith "Missing argument" | Some f -> f in let kv_io = let fd = Unix.openfile kvseq_file [Unix.O_RDWR] 0 in simple_descr fd in let kv = Kvseq.access kv_io in Kvseq.configure ~auto_fadvise:true kv; let perm_fd = Unix.openfile perm_file [Unix.O_RDWR] 0 in let perm_io = simple_descr perm_fd in let perm = P.access kv perm_io in P.configure ~auto_fadvise:true perm; P.group perm; P.sync perm; Unix.close perm_io#file_descr; Kvseq.sync kv; Unix.close kv_io#file_descr ;; let commands = [ "superblock", cmd_superblock, "Show/modify the superblock"; "create", cmd_create, "Create new files (kvseq/hindex/perm)"; "add", cmd_add, "Add entries to files"; "list", cmd_list, "List entries in files"; "get", cmd_get, "Get an entry from file(s)"; "group", cmd_group, "Group a perm file by keys"; ] let usage pgm_name = eprintf "usage: %s \n" pgm_name; eprintf " is one of the following:\n"; List.iter (fun (name, _, text) -> eprintf " %s: %s\n" name text ) commands; eprintf " and depend on the command you are issuing.\n"; eprintf "use '%s -help' to get command-specific help.\n" pgm_name; flush stderr; exit 2 ;; let main() = let pgm_name = Filename.basename Sys.argv.(0) in if Array.length Sys.argv <= 1 then usage pgm_name; let cmd_name = Sys.argv.(1) in let _, cmd, _ = try List.find (fun (name, _, _) -> name = cmd_name) commands with | Not_found -> usage pgm_name in Arg.current := 1; try cmd pgm_name with | Arg.Bad msg | Failure msg -> prerr_endline (pgm_name ^ ": " ^ msg); exit 2 | Unix.Unix_error(err, _, param) -> let prefix = if param = "" then pgm_name ^ ": " else pgm_name ^ ": " ^ param ^ ": " in prerr_endline (prefix ^ Unix.error_message err); exit 2 ;; main()