- Notifications
You must be signed in to change notification settings - Fork 21
Open
Description
I'm finding that adding and searching an objectbox database is really fast. However, the remove operation is really slow (1 second per object.) The database is on a local NVME SSD drive. It contains about 20,000 hashes and takes about 6GB.
My find_unique hash_box.query operation is fast - it's literally the call to hash_box.remove that takes the time.
What am I doing wrong?
@Entity()classImHash: id=Idkey=String(index=Index(IndexType.HASH), unique=True) cos_value=Float32Vector(index=HnswIndex( dimensions=62720, distance_type=VectorDistanceType.COSINE, )) defhash_image(im: Image.Image) ->list[float]: vector=img2vec.get_vec(im, tensor=True) returnvector.detach().cpu().numpy().flatten() defhash_and_store(name_or_fp, key: str): im=Image.open(name_or_fp) h=hash_image(im) ih=find_unique(key) ifihisNone: # createih=ImHash() ih.key=keyih.cos_value=hwithstore_lock: hash_box.put(ih) definit(db_dir: pathlib.Path): globalstore, hash_box, img2vecstore=Store(directory=str(db_dir/directory_name), model_json_file=str(db_dir/json_model_name), max_db_size_in_kb=10*1024*1024) hash_box=store.box(ImHash) img2vec=Img2Vec(cuda=False, model='efficientnet_b0') defclose(): store.close() deffind_unique(key: str): withstore_lock: query=hash_box.query(ImHash.key.equals(key)).build() result=query.find() iflen(result) ==0: returnNoneeliflen(result) >1: print('Multiple matches found') returnNoneelse: returnresult[0] deffind_similar(key: str) ->list[tuple[ImHash, float]]: target=find_unique(key) withstore_lock: query=hash_box.query(ImHash.cos_value.nearest_neighbor(target.cos_value, 8)).build() results=query.find_with_scores() results.sort(key=lambdax: x[1]) returnresultsdefremove(key: str): target=find_unique(key) iftargetisnotNone: withstore_lock: hash_box.remove(target) defremove_many(keys: list[str]): withstore.write_tx(): forkinkeys: i=find_unique(k) ifiisNone: print('Hash key "%s" was already gone'%k) else: withstore_lock: hash_box.remove(i.id)Metadata
Metadata
Assignees
Labels
No labels