Module benchmark
Entrypoint for running all tasks in biobench
.
Most of this script is self documenting.
Run python benchmark.py --help
to see all the options.
Note that you will have to download all the datasets, but each dataset includes its own download script with instructions.
For example, see biobench.newt.download
for an example.
Design
biobench is designed to make it easy to add both models and tasks that work with other models and tasks.
To add a new model, look at biobench.registry
's documentation, which includes a tutorial for adding a new model.
Functions
def main(cfgs: list[str] = ['configs/neurips.toml'], dry_run: bool = True)
-
Expand source code
@beartype.beartype def main( cfgs: list[str] = [os.path.join("configs", "neurips.toml")], dry_run: bool = True ): """ Launch all jobs, using either a local GPU or a Slurm cluster. Then report results and save to disk. Args: cfgs: List of paths to TOML config files. dry_run: If --no-dry-run, actually run experiment. """ # Load all configs from the provided paths and concatenate them cfgs = [cfg for path in cfgs for cfg in config.load(path)] if not cfgs: logger.warning("No configurations loaded.") return first = cfgs[0] # Verify all configs have consistent execution settings for cfg in cfgs[1:]: if cfg.slurm_acct != first.slurm_acct: raise ValueError("All configs must have the same slurm_acct") if cfg.log_to != first.log_to: raise ValueError("All configs must have the same log_to directory") if cfg.ssl != first.ssl: raise ValueError("All configs must have the same ssl setting") # 1. Setup executor. if first.slurm_acct: executor = submitit.SlurmExecutor(folder=first.log_to) executor.update_parameters( time=30, gpus_per_node=1, cpus_per_task=8, stderr_to_stdout=True, partition="debug", account=first.slurm_acct, ) # See biobench.third_party_models.get_ssl() for a discussion of this variable. if not first.ssl: executor.update_parameters(setup=["export BIOBENCH_DISABLE_SSL=1"]) else: executor = submitit.DebugExecutor(folder=first.log_to) # See biobench.third_party_models.get_ssl() for a discussion of this variable. if not first.ssl: os.environ["BIOBENCH_DISABLE_SSL"] = "1" db = reporting.get_db(first) # 2. Run benchmarks. jobs = [] n_skipped = 0 for cfg in helpers.progress(cfgs, desc="submitting jobs"): for task_name, data_root in cfg.data.to_dict().items(): # Check that you can get the task_name try: module = importlib.import_module(f"biobench.{task_name}") except ModuleNotFoundError: logger.warning("Could not find task '%s'.", task_name) continue if not data_root: continue if reporting.already_ran(db, cfg, task_name): n_skipped += 1 continue elif dry_run: jobs.append(cfg) else: job = executor.submit(module.benchmark, cfg) jobs.append(job) if dry_run: # Summarize the jobs by model and training examples model_counts = collections.defaultdict(int) for job_cfg in jobs: key = (job_cfg.model.ckpt, job_cfg.n_train) model_counts[key] += 1 # Check if there are any jobs to run if not model_counts: logger.info("All jobs have already been completed. Nothing to run.") return # Print summary table logger.info("Job Summary:") logger.info("%-50s | %-10s | %-5s", "Model", "Train Size", "Count") logger.info("-" * 71) for (model, n_train), count in sorted(model_counts.items()): logger.info("%-50s | %10d | %5d", model, n_train, count) logger.info("-" * 71) logger.info( "Total jobs to run: %d (skipped %d already completed)", len(jobs), n_skipped ) return logger.info("Submitted %d jobs (skipped %d).", len(jobs), n_skipped) # 3. Write results to sqlite. for i, future in enumerate(submitit.helpers.as_completed(jobs)): err = future.exception() if err: logger.warning("Error running job: %s: %s", err, err.__cause__) continue report: reporting.Report = future.result() report.write() logger.info("Finished %d/%d jobs.", i + 1, len(jobs)) logger.info("Finished.")
Launch all jobs, using either a local GPU or a Slurm cluster. Then report results and save to disk.
Args
cfgs
- List of paths to TOML config files.
dry_run
- If –no-dry-run, actually run experiment.