Commit 846d4ab1 authored by Vlad Dumitru's avatar Vlad Dumitru
Browse files

getting closer to... something?

parent 0dd11ff9
Pipeline #6226 canceled with stages
in 10 seconds
......@@ -21,27 +21,19 @@ let store_textgrid ~db ?author ?comment ?date ~tg_path ~key () =
match Annotation.Textgrid.Read.of_string contents with
| Ok tg ->
Ok (Base.List.map ~f:(fun tier ->
let doc = Speechcake.Document.v ~key (Annotation.Tier.data tier) in
let data = Annotation.Tier.data tier in
let uuid = Uuidm.v `V4 in
Speechcake.put db ?author ?comment ?date (Uuidm.to_string uuid) doc) tg)
let info = Speechcake.Store.Info.v ?author ?comment ?date () in
Speechcake.put db ~info ~key uuid data) tg)
| Error e ->
Error [ e ]
let get_textgrid ~db ~tiers =
let get_tier (uuid, version_id) =
Speechcake.get_at_version db (Uuidm.to_string uuid) version_id
in
let tiers =
Base.List.map tiers ~f:(fun (uuid, version) ->
let open Base.Result.Monad_infix in
get_tier (uuid, version)
>>| fun doc ->
let name = Base.List.last_exn doc.key in
let version = Speechcake.Database.VS.Hash.to_string version in
let info = Speechcake.Stamp.known ~uuid ~parents:[version] name in
let tier = Speechcake.Document.stamp info doc.data in
None, tier)
|> Base.Result.combine_errors in
Base.List.map tiers ~f:(fun (uuid, version) -> uuid, `Version version)
|> Speechcake.checkout db
|> Base.Result.map ~f:(fun tiers -> Base.List.map tiers ~f:(fun t -> None, t))
in
match tiers with
| Ok tiers ->
Ok (Annotation.Textgrid.Write.to_string tiers)
......@@ -71,9 +63,7 @@ let info tg_path =
let versions ~db ~uuid =
let open Base.Result.Monad_infix in
Speechcake.bucket db uuid
>>| Speechcake.Database.VS.versions
Speechcake.versions db uuid
......@@ -85,35 +75,11 @@ let add ~db ~tg_path ~key =
let identify_textgrid ~db ~textgrid =
Base.List.map textgrid ~f:(fun tier ->
let name = Annotation.Tier.name tier in
match Speechcake.Document.unstamp tier with
| Ok Speechcake.Stamp.(Known { uuid; parents; _ }, tier) ->
if Annotation.Tier.has_conflict_markers tier then
name, Error (`Has_conflict_markers)
else begin
match parents with
| [] ->
name, Error (`Missing_parent_info)
| [parent] ->
let parent = Base.Option.value_exn (Speechcake.Database.VS.Hash.of_string parent) in
let doc = Speechcake.get_at_version db (Uuidm.to_string uuid) parent in
let forbidden_branches =
Base.Option.value_exn (Speechcake.Database.of_key db.db (Uuidm.to_string uuid))
|> Speechcake.Database.VS.tags
|> Core.Hashtbl.keys in
name, Base.Result.map doc ~f:(fun doc -> `Known (uuid, doc.key, parent, forbidden_branches))
| parents -> (* multiple parents means we're in a merging process *)
name, Ok (`Conflict_resolution (uuid, Base.List.filter_map ~f:Speechcake.Database.VS.Hash.of_string parents))
end
| Ok (Fresh _, tier) ->
name, Ok (`Fresh (Speechcake.identify db tier))
| Error e ->
name, Error (`Stamp_decoding_error e))
Speechcake.identify_textgrid db textgrid
let put ~db ~tiers ~tier_name ~key =
let put ~db ~tiers ~tier_name ~key ~info ~branch =
Log.info (fun m -> m "put %s -> %s" tier_name (Base.String.concat ~sep:"/" key)) ;
let tier =
Base.List.find tiers
......@@ -121,21 +87,21 @@ let put ~db ~tiers ~tier_name ~key =
match tier with
| Some tier ->
let uuid = Uuidm.v `V4 in
let doc = Speechcake.Document.v ~key (Annotation.Tier.data tier) in
Ok (uuid, Speechcake.put db ~parents:[] (Uuidm.to_string uuid) doc)
let data = Annotation.Tier.data tier in
Base.Result.map (Speechcake.put db ~info ~branch ~parents:[] uuid data)
~f:(fun hash -> uuid, hash)
| None ->
Error (`Unknown_tier tier_name)
let most_recent_version bkt versions =
Base.List.map versions ~f:(fun v -> Base.Result.map (Speechcake.Database.VS.version bkt v) ~f:(fun d -> v, d))
(*let most_recent_version bkt versions =
Base.List.map versions ~f:(fun v -> Speechcake.Bucket.get bkt (`Version v))
|> Base.List.filter_map ~f:Base.Result.ok
|> Base.List.dedup_and_sort ~compare:(fun (_, (_, x)) (_, (_, y)) ->
let open Speechcake.Store.Info in
String.compare (date x) (date y))
|> Base.List.dedup_and_sort ~compare:(fun (_, x, _) (_, y, _) ->
String.compare (Info.date (Commit.info x)) (date y.info))
|> Base.List.rev
|> Base.List.hd_exn
|> fun (hash, (doc, meta)) ->
hash, doc, meta
hash, doc, meta*)
let update ~db ~tiers ~tier_name ~uuid ~branch =
Log.info (fun m -> m "update %s -> %s" tier_name (Uuidm.to_string uuid)) ;
......
......@@ -20,15 +20,6 @@ let encode_tier_listing : (string * string list * (string, String.comparator_wit
])
|> fun tier_listing -> `List tier_listing
let encode_version_info metadata =
let open Speechcake.Database.VS.Info in
`Assoc
[ "author", `String (author metadata)
; "comment", `String (comment metadata)
; "date", `String (date metadata)
; "parents", `List (Base.List.map (parents metadata) ~f:(fun p -> `String (string_of_hash p)))
]
let encode_version (hash, info) =
let branch_head = None in
match branch_head with
......
......@@ -258,6 +258,13 @@ module Data = struct
| _, _, _ ->
Error `Merging_points_and_intervals
let has_conflict_markers t =
let texts = match t with
| Intervals is -> List.map ~f:Interval.text is
| Points ps -> List.map ~f:Point.text ps
in
List.exists texts ~f:(String.is_prefix ~prefix:"###")
let encoder t =
let open Decoders_yojson.Basic.Encode in
match t with
......@@ -372,11 +379,7 @@ let decoder =
of_data ~name ~bounds data
let has_conflict_markers tier =
let texts = match data tier with
| Data.Intervals is -> List.map ~f:Interval.text is
| Data.Points ps -> List.map ~f:Point.text ps
in
List.exists texts ~f:(String.is_prefix ~prefix:"###")
Data.has_conflict_markers tier.data
let fill_gaps tier =
{ tier with data = Data.fill_gaps tier.data }
......
......@@ -45,7 +45,7 @@ module Data : sig
type merge_error =
[ `Intervals_overlap of Interval.t * Interval.t
| `Points_overlap of Point.t * Point.t
| `Merging_points_and_intervals
| `Merging_points_and_intervals
]
val merge
......@@ -56,6 +56,8 @@ module Data : sig
: old:t -> t -> t
-> (t * t, [> `Merging_points_and_intervals]) Result.t
val has_conflict_markers : t -> bool
val encoder : t Decoders_yojson.Basic.Encode.encoder
val decoder : t Decoders_yojson.Basic.Decode.decoder
end
......
let src = Logs.Src.create "storage" ~doc:"logs storage events"
module Log = (val Logs.src_log src : Logs.LOG)
module Make (C : Store.Contents.S) = struct
module VS = Store.Version_store.Make (C) (Store.Hash.BLAKE2B)
type t = (string, VS.t) Core.Hashtbl.t
let pp : t Fmt.t =
fun ppf db ->
Core.Hashtbl.to_alist db
|> Fmt.(list (pair ~sep:(any " -> ") string VS.pp) ppf)
let init () =
Core.Hashtbl.create (module Base.String)
let list : t -> string list =
Core.Hashtbl.keys
let of_key : t -> string -> VS.t option =
fun db k ->
Core.Hashtbl.find db k
let get db k v =
let open Base.Result.Monad_infix in
of_key db k
|> Base.Result.of_option ~error:(`Document_not_found k)
>>= fun s -> VS.version s v
>>| fun (data, meta) -> meta, data
let dump db path =
Core.Hashtbl.to_alist db
|> Base.List.iter ~f:(fun (uuid, bkt) ->
let data = VS.Encode.dump bkt |> Yojson.Basic.to_string in
Core.Out_channel.write_all (path ^ "/" ^ Uuidm.to_string uuid) ~data)
let load path =
Log.info Fmt.(fun m -> m "load: from path %a" (styled `Cyan string) path) ;
Core.Sys.ls_dir path
|> Base.List.map ~f:(fun filename ->
let path = path ^ "/" ^ filename in
Log.info Fmt.(fun m -> m "load: file %a" (styled `Cyan string) path) ;
let contents = Core.In_channel.read_all path |> Yojson.Basic.from_string
in
match VS.Decode.load contents with
| Ok doc -> Ok (filename, doc)
| Error e -> Error (`Decoding_error (filename, e)))
|> Base.Result.combine_errors
|> Base.Result.bind ~f:(fun kvs ->
match Core.Hashtbl.of_alist (module Base.String) kvs with
| `Ok ht -> Ok ht
| `Duplicate_key k -> Error [`Duplicate_key k])
end
module type S = sig
type t
type merge_error
val conflict_set : old:t -> t -> t -> (t * t, merge_error) Result.t
include Store.Contents.S
with type t := t and type merge_error := merge_error
include Fingerprintable.S
with type t := t
include Stampable.S
with type input := t
end
include Database_intf
(* exported modules and module types *)
module type CONFIG = CONFIG
module Stamp = Stamp
module Contents = Contents
module Document = Document
let src = Logs.Src.create "storage" ~doc:"logs storage events"
module Log = (val Logs.src_log src : Logs.LOG)
module Uuid = struct
module T = struct
include Uuidm
let sexp_of_t t =
Base.Sexp.Atom (Uuidm.to_string t)
(*let t_of_sexp = function
| Base.Sexp.Atom a -> Base.Option.value_exn (Uuidm.of_string a)
| Base.Sexp.List _ -> failwith "not a Uuid"*)
let hash = Core.Hashtbl.hash
end
include T
include Base.Comparable.Make (T)
end
module Make (C : Contents.S) (CFG : CONFIG) (H : Store.Hash.S) = struct
module Contents = C
type contents = Contents.t
type output = Contents.output
module Config = CFG
module Hash = H
type hash = Hash.t
module Stamp = Stamp
module Document = Document.Make (C)
module Version_store = Store.Version_store.Make (Document) (H)
type store = Version_store.t
type document = Document.t
type commit = Version_store.commit
type key = string list
type data_store_error = Version_store.Data_store.error
type merge_error = Contents.merge_error
type t =
{ stores : (Uuid.t, store) Core.Hashtbl.t
; fingerprints : (Fingerprint.t, Uuid.t) Core.Hashtbl.t
}
type error =
[ `Bucket_not_found of Uuidm.t
| `Commit_not_found of hash
| `Data_store_error of data_store_error
| `Decoding_error of Decoders_yojson.Basic.Decode.error
| `Head_not_found of string
| `Tag_not_found of string
]
type pointer =
[ `Head of string
| `Tag of string
| `Version of hash
]
let stores t = t.stores
(*let fingerprints t = t.fingerprints*)
(*let key b = Bucket.key b
let labels b = Bucket.labels b*)
let pp : t Fmt.t =
fun ppf db ->
Core.Hashtbl.to_alist db.stores
|> Fmt.(list (pair ~sep:(any " -> ") Uuid.pp Version_store.pp) ppf)
let init () =
{ stores = Core.Hashtbl.create (module Uuid)
; fingerprints = Core.Hashtbl.create (module Fingerprint)
}
let keys t =
Core.Hashtbl.keys t.stores
let of_uuid db u =
Core.Hashtbl.find (stores db) u
|> Base.Result.of_option ~error:(`Bucket_not_found u)
let versions db u =
Base.Result.map (of_uuid db u) ~f:Version_store.versions
let heads db u =
Base.Result.map (of_uuid db u) ~f:Version_store.heads
let tags db u =
Base.Result.map (of_uuid db u) ~f:Version_store.tags
let get (db : t) (u : Uuid.t) (v : pointer) : (hash * commit * document, [> error ]) Result.t =
let open Base.Result.Monad_infix in
of_uuid db u >>= fun store ->
match v with
| `Version hash ->
Version_store.version store hash
>>| fun (doc, commit) -> (hash, commit, doc)
| `Head branch ->
Version_store.head store branch
>>= fun hash -> Version_store.version store hash
>>| fun (doc, commit) -> (hash, commit, doc)
| `Tag name ->
Version_store.tag store name
>>= fun hash -> Version_store.version store hash
>>| fun (doc, commit) -> (hash, commit, doc)
let checkout (db : t) tiers =
Base.List.map tiers
~f:(fun (uuid, req) ->
Base.Result.map (get db uuid req) ~f:(fun (hash, _, doc) ->
let stamp =
Stamp.known (Base.List.last_exn (Document.key doc))
~uuid ~parents:[ H.to_string hash ] in
C.stamp (Document.data doc) stamp))
|> Base.Result.combine_errors
let put db u ~info ~parents ~branch v =
let store =
match of_uuid db u with
| Ok bucket ->
bucket
| Error (`Bucket_not_found _) ->
(*Bucket.init (Base.Option.value key ~default:[ Uuid.to_string u ])*)
Version_store.init ()
in
(*let hash = Bucket.put bucket ~info ~parents ~branch v in*)
let hash = Version_store.put store ~info ~parents ~branch v in
match hash with
| Ok hash ->
let fp =
C.fingerprint ~size:Config.fingerprint_size (Document.data v) in
Core.Hashtbl.set db.stores ~key:u ~data:store;
Core.Hashtbl.set db.fingerprints ~key:fp ~data:u;
(* TODO: write to disk *)
Ok hash
| Error (`Merge_error (old, x, y, `Content_conflict _)) ->
let open Base.Result.Monad_infix in
get db u (`Version old)
>>= fun (_, _, told) -> get db u (`Version x)
>>= fun (_, _, tx) -> get db u (`Version y)
>>= fun (_, _, ty) ->
(C.conflict_set ~old:(Document.data told)
(Document.data tx) (Document.data ty)
|> Base.Result.map_error
~f:(fun e -> `Merge_error (old, x, y, `Content_conflict e))
>>= fun conflict_set ->
Error (`Conflict_set conflict_set))
| Error (`Data_store_error _ as e)
| Error (`Commit_not_found _ as e)
| Error (`Decoding_error _ as e)
| Error (`Merge_error (_, _, _, `Key_conflict (_, _)) as e)
| Error (`Illegal_move as e) ->
Error e
let identify (t : t) data =
let fingerprint = C.fingerprint ~size:Config.fingerprint_size data in
let candidates =
Core.Hashtbl.to_alist t.fingerprints
(* calculate fingerprint match pecentages *)
|> Base.List.map ~f:(fun (fp, uuid) ->
let count = Base.Set.length (Base.Set.inter fingerprint fp) in
let percent = count * 100 / (min Config.fingerprint_size (C.length data)) in
uuid, percent)
(* filter out zero-valued scores *)
|> Base.List.filter ~f:(fun (_, score) -> score > 0)
(* sort by score in descending order *)
|> Base.List.sort ~compare:(fun (_, x) (_, y) -> Base.Int.compare x y)
|> Base.List.rev
(* only take the first 10 results *)
|> fun l -> Base.List.take l 10
(* pack up the results *)
|> Base.List.filter_map ~f:(fun (uuid, score) ->
let open Base.Result.Monad_infix in
( of_uuid t uuid >>| fun store ->
Version_store.heads store
|> Core.Hashtbl.keys
|> fun brs -> (uuid, score, brs))
|> Base.Result.ok)
in
let exact_match =
Base.List.map candidates ~f:(fun (uuid, _, _) -> uuid)
|> Base.List.find_map ~f:(fun uuid ->
let versions =
match versions t uuid with
| Ok versions -> Core.Hashtbl.to_alist versions
| Error _ -> [] in
Base.List.find_map versions ~f:(fun (v, _) ->
let doc = get t uuid (`Version v) in
match doc with
| Ok (_, commit, doc) ->
if C.equal data (Document.data doc)
then Some ((uuid, v), (commit, doc))
else None
| Error _ -> None))
in
match exact_match with
| Some ((uuid, v), (commit, doc)) ->
`Exactly ((uuid, v), (commit, doc))
| None ->
`Possibly_one_of candidates
let dump db path =
Core.Hashtbl.to_alist db
|> Base.List.iter ~f:(fun (uuid, bkt) ->
let data = Version_store.Encode.dump bkt |> Yojson.Basic.to_string in
Core.Out_channel.write_all (path ^ "/" ^ Uuidm.to_string uuid) ~data)
let load path =
Log.info Fmt.(fun m -> m "load: from path %a" (styled `Cyan string) path) ;
Core.Sys.ls_dir path
|> Base.List.map ~f:(fun filename ->
let path = path ^ "/" ^ filename in
let uuid = Base.Option.value_exn (Uuidm.of_string filename) in
Log.info Fmt.(fun m -> m "load: file %a" (styled `Cyan string) path) ;
let contents = Core.In_channel.read_all path |> Yojson.Basic.from_string
in
match Decoders_yojson.Basic.Decode.decode_value Version_store.Decode.decoder contents with
| Ok doc -> Ok (uuid, doc)
| Error e -> Error (`Decoding_error (filename, e)))
|> Base.Result.combine_errors
|> Base.Result.bind ~f:(fun kvs ->
match Core.Hashtbl.of_alist (module Uuid) kvs with
| `Ok ht -> Ok ht
| `Duplicate_key k -> Error [`Duplicate_key k])
end
include Database_intf.Intf (** @inline *)
module type CONFIG = sig
val fingerprint_size : int
val main_branch_name : string
end
module type S = sig
module Contents : Contents.S
module Config : CONFIG
module Hash : Store.Hash.S
type t
type contents = Contents.t
type output = Contents.output
module Stamp = Stamp
module Document : Document.S with module Contents := Contents
type store
type hash
type commit
type document = Document.t
type key = string list
type merge_error = Contents.merge_error
type data_store_error
type error =
[ `Bucket_not_found of Uuidm.t
| `Commit_not_found of hash
| `Data_store_error of data_store_error
| `Decoding_error of Decoders_yojson.Basic.Decode.error
| `Head_not_found of string
| `Tag_not_found of string
]
type pointer =
[ `Head of string
| `Tag of string
| `Version of hash
]
include Base.Pretty_printer.S with type t := t
(*val key : store -> key
val labels : bucket -> (string, Base.String.comparator_witness) Base.Set.t*)
val init : unit -> t
val keys : t -> Uuidm.t list
val of_uuid
: t -> Uuidm.t
-> (store, [> `Bucket_not_found of Uuidm.t ]) Result.t
val versions
: t -> Uuidm.t
-> ((hash, commit) Base.Hashtbl.t,
[> `Bucket_not_found of Uuidm.t ]) Result.t
val heads
: t -> Uuidm.t
-> ((string, hash) Base.Hashtbl.t,
[> `Bucket_not_found of Uuidm.t ]) Result.t
val tags
: t -> Uuidm.t
-> ((string, hash) Base.Hashtbl.t,
[> `Bucket_not_found of Uuidm.t ]) Result.t
val get
: t -> Uuidm.t -> pointer
-> (hash * commit * document, [> error ]) Result.t
val checkout
: t -> (Uuidm.t * pointer) list
-> (output list, [> error ] list) Result.t
(*val put
: t -> Uuidm.t
-> info:Store.Info.t -> parents:hash list
-> branch:string
-> document
-> (hash,