Skip to content

Commit 6ad44d1

Browse files
committed
names remapping: avoid clashes by using different dirs
1 parent f16bd32 commit 6ad44d1

File tree

4 files changed

+64
-31
lines changed

4 files changed

+64
-31
lines changed

src/runcrate/cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def cli():
5858
)
5959
@click.option(
6060
"--remap-names",
61-
help="remap file/dir names to the original ones (MAY LEAD TO CLASHES!)",
61+
help="remap file/dir names to the original ones",
6262
is_flag=True
6363
)
6464
def convert(root, output, license, workflow_name, readme, remap_names):

src/runcrate/convert.py

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454
"null": None,
5555
}
5656

57-
SCATTER_JOB_PATTERN = re.compile(r"^(.+)_\d+$")
57+
SCATTER_JOB_PATTERN = re.compile(r"^(.+)_(\d+)$")
5858

5959
CWLPROV_NONE = "https://w3id.org/cwl/prov#None"
6060

@@ -196,6 +196,7 @@ def __init__(self, root, workflow_name=None, license=None, readme=None,
196196
# map source files to destination files
197197
self.file_map = {}
198198
self.remap_names = remap_names
199+
self.data_root = "data"
199200

200201
@staticmethod
201202
def _get_step_maps(cwl_defs):
@@ -213,11 +214,13 @@ def _get_step_maps(cwl_defs):
213214
def _resolve_plan(self, activity):
214215
job_qname = activity.plan()
215216
plan = activity.provenance.entity(job_qname)
217+
scatter_id = None
216218
if not plan:
217219
m = SCATTER_JOB_PATTERN.match(str(job_qname))
218220
if m:
219221
plan = activity.provenance.entity(m.groups()[0])
220-
return plan
222+
scatter_id = m.groups()[1]
223+
return plan, scatter_id
221224

222225
def _get_hash(self, prov_param):
223226
k = prov_param.id.localpart
@@ -436,9 +439,11 @@ def add_action(self, crate, activity, parent_instrument=None):
436439
"@type": "CreateAction",
437440
"name": activity.label,
438441
}))
439-
plan = self._resolve_plan(activity)
442+
plan, scatter_id = self._resolve_plan(activity)
440443
plan_tag = plan.id.localpart
444+
dest_base = Path(self.data_root)
441445
if plan_tag == "main":
446+
dest_base = dest_base / "main"
442447
assert str(activity.type) == "wfprov:WorkflowRun"
443448
instrument = workflow
444449
self.roc_engine_run["result"] = action
@@ -453,6 +458,7 @@ def to_wf_p(k):
453458
if parts[0] == "main":
454459
parts[0] = parent_instrument_fragment
455460
plan_tag = "/".join(parts)
461+
dest_base = dest_base / (f"{plan_tag}_{scatter_id}" if scatter_id else f"{plan_tag}")
456462
tool_name = self.step_maps[parent_instrument_fragment][plan_tag]["tool"]
457463
instrument = crate.dereference(f"{workflow.id}#{tool_name}")
458464
control_action = self.control_actions.get(plan_tag)
@@ -476,12 +482,14 @@ def to_wf_p(k):
476482
action["instrument"] = instrument
477483
action["startTime"] = activity.start().time.isoformat()
478484
action["endTime"] = activity.end().time.isoformat()
479-
action["object"] = self.add_action_params(crate, activity, to_wf_p, "usage")
480-
action["result"] = self.add_action_params(crate, activity, to_wf_p, "generation")
485+
action["object"] = self.add_action_params(crate, activity, to_wf_p, "usage",
486+
dest_base / "in" if self.remap_names else "")
487+
action["result"] = self.add_action_params(crate, activity, to_wf_p, "generation",
488+
dest_base / "out" if self.remap_names else "")
481489
for job in activity.steps():
482490
self.add_action(crate, job, parent_instrument=instrument)
483491

484-
def add_action_params(self, crate, activity, to_wf_p, ptype="usage"):
492+
def add_action_params(self, crate, activity, to_wf_p, ptype="usage", dest_base=""):
485493
action_params = []
486494
all_roles = set()
487495
for rel in getattr(activity, ptype)():
@@ -501,7 +509,7 @@ def add_action_params(self, crate, activity, to_wf_p, ptype="usage"):
501509
wf_p = crate.dereference(to_wf_p(k))
502510
k = get_fragment(k)
503511
v = rel.entity()
504-
value = self.convert_param(v, crate)
512+
value = self.convert_param(v, crate, dest_base=dest_base)
505513
if value is None:
506514
continue # param is optional with no default and was not set
507515
if {"ro:Folder", "wf4ever:File"} & set(str(_) for _ in v.types()):
@@ -538,7 +546,7 @@ def _set_alternate_name(prov_param, action_p, parent=None):
538546
if "alternateName" in parent:
539547
action_p["alternateName"] = (Path(parent["alternateName"]) / basename).as_posix()
540548

541-
def convert_param(self, prov_param, crate, convert_secondary=True, parent=None):
549+
def convert_param(self, prov_param, crate, convert_secondary=True, parent=None, dest_base=""):
542550
type_names = frozenset(str(_) for _ in prov_param.types())
543551
secondary_files = [_.generated_entity() for _ in prov_param.derivations()
544552
if str(_.type) == "cwlprov:SecondaryFile"]
@@ -562,7 +570,7 @@ def convert_param(self, prov_param, crate, convert_secondary=True, parent=None):
562570
basename = getattr(prov_param, "basename", hash_)
563571
else:
564572
basename = hash_
565-
dest = Path(parent.id if parent else "") / basename
573+
dest = Path(parent.id if parent else dest_base) / basename
566574
action_p = crate.dereference(dest.as_posix())
567575
if not action_p:
568576
source = self.root / Path("data") / hash_[:2] / hash_
@@ -583,7 +591,7 @@ def convert_param(self, prov_param, crate, convert_secondary=True, parent=None):
583591
basename = getattr(prov_param, "basename", hash_)
584592
else:
585593
basename = hash_
586-
dest = Path(parent.id if parent else "") / basename
594+
dest = Path(parent.id if parent else dest_base) / basename
587595
action_p = crate.dereference(dest.as_posix())
588596
if not action_p:
589597
action_p = crate.add_directory(dest_path=dest)

tests/test_cli.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,8 +101,8 @@ def test_cli_convert_remap_names(data_dir, tmpdir):
101101
args = ["convert", str(root), "-o", str(crate_dir), "--remap-names"]
102102
assert runner.invoke(cli, args).exit_code == 0
103103
crate = ROCrate(crate_dir)
104-
assert crate.get("grepucase_in/")
105-
assert (crate_dir / "grepucase_in").is_dir()
104+
assert crate.get("data/main/in/grepucase_in/")
105+
assert (crate_dir / "data" / "main" / "in" / "grepucase_in").is_dir()
106106

107107

108108
def test_cli_report_provenance_minimal(data_dir, caplog):

tests/test_cwlprov_crate_builder.py

Lines changed: 43 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1093,48 +1093,73 @@ def test_remap_names(data_dir, tmpdir):
10931093
assert len(wf_objects) == 2
10941094
assert len(wf_results) == 1
10951095
wf_objects_map = {_.id: _ for _ in wf_objects}
1096-
wf_input_dir = wf_objects_map.get("grepucase_in/")
1096+
wf_input_dir = wf_objects_map.get("data/main/in/grepucase_in/")
10971097
assert wf_input_dir
10981098
wf_output_dir = wf_results[0]
1099-
assert wf_output_dir.id == "ucase_out/"
1099+
assert wf_output_dir.id == "data/main/out/ucase_out/"
11001100
assert set(_.id for _ in wf_input_dir["hasPart"]) == {
1101-
"grepucase_in/bar", "grepucase_in/foo"
1101+
"data/main/in/grepucase_in/bar", "data/main/in/grepucase_in/foo"
11021102
}
11031103
assert set(_.id for _ in wf_output_dir["hasPart"]) == {
1104-
"ucase_out/bar.out/", "ucase_out/foo.out/"
1104+
"data/main/out/ucase_out/bar.out/", "data/main/out/ucase_out/foo.out/"
11051105
}
11061106
for d in wf_output_dir["hasPart"]:
1107-
if d.id == "ucase_out/bar.out/":
1108-
assert d["hasPart"][0].id == "ucase_out/bar.out/bar.out.out"
1107+
if d.id == "data/main/out/ucase_out/bar.out/":
1108+
assert d["hasPart"][0].id == "data/main/out/ucase_out/bar.out/bar.out.out"
11091109
else:
1110-
assert d["hasPart"][0].id == "ucase_out/foo.out/foo.out.out"
1110+
assert d["hasPart"][0].id == "data/main/out/ucase_out/foo.out/foo.out.out"
11111111
greptool_action = action_map["packed.cwl#greptool.cwl"]
11121112
greptool_objects = greptool_action["object"]
11131113
greptool_results = greptool_action["result"]
11141114
assert len(greptool_objects) == 2
11151115
assert len(greptool_results) == 1
11161116
greptool_objects_map = {_.id: _ for _ in greptool_objects}
1117-
greptool_input_dir = greptool_objects_map.get("grepucase_in/")
1118-
assert greptool_input_dir is wf_input_dir
1117+
greptool_input_dir = greptool_objects_map.get("data/main/grep/in/grepucase_in/")
1118+
assert greptool_input_dir
1119+
assert set(_.id for _ in greptool_input_dir["hasPart"]) == {
1120+
"data/main/grep/in/grepucase_in/bar", "data/main/grep/in/grepucase_in/foo"
1121+
}
11191122
greptool_output_dir = greptool_results[0]
1120-
assert greptool_output_dir.id == "grep_out/"
1123+
assert greptool_output_dir.id == "data/main/grep/out/grep_out/"
1124+
assert set(_.id for _ in greptool_output_dir["hasPart"]) == {
1125+
"data/main/grep/out/grep_out/bar.out", "data/main/grep/out/grep_out/foo.out"
1126+
}
11211127
ucasetool_action = action_map["packed.cwl#ucasetool.cwl"]
11221128
ucasetool_objects = ucasetool_action["object"]
11231129
ucasetool_results = ucasetool_action["result"]
11241130
assert len(ucasetool_objects) == 1
11251131
assert len(ucasetool_results) == 1
11261132
ucasetool_input_dir = ucasetool_objects[0]
1127-
assert ucasetool_input_dir is greptool_output_dir
1133+
assert ucasetool_input_dir.id == "data/main/ucase/in/grep_out/"
1134+
assert set(_.id for _ in ucasetool_input_dir["hasPart"]) == {
1135+
"data/main/ucase/in/grep_out/bar.out", "data/main/ucase/in/grep_out/foo.out"
1136+
}
11281137
ucasetool_output_dir = ucasetool_results[0]
1129-
assert ucasetool_output_dir is wf_output_dir
1138+
assert ucasetool_output_dir.id == "data/main/ucase/out/ucase_out/"
1139+
assert set(_.id for _ in ucasetool_output_dir["hasPart"]) == {
1140+
"data/main/ucase/out/ucase_out/bar.out/", "data/main/ucase/out/ucase_out/foo.out/"
1141+
}
1142+
1143+
for d in ucasetool_output_dir["hasPart"]:
1144+
if d.id == "data/main/ucase/out/ucase_out/bar.out/":
1145+
assert d["hasPart"][0].id == "data/main/ucase/out/ucase_out/bar.out/bar.out.out"
1146+
else:
1147+
assert d["hasPart"][0].id == "data/main/ucase/out/ucase_out/foo.out/foo.out.out"
1148+
11301149
for e in crate.data_entities:
11311150
assert "alternateName" not in e
11321151
for p in (
1133-
"grepucase_in/bar",
1134-
"grepucase_in/foo",
1135-
"grep_out/bar.out",
1136-
"grep_out/foo.out",
1137-
"ucase_out/bar.out/bar.out.out",
1138-
"ucase_out/foo.out/foo.out.out",
1152+
"data/main/in/grepucase_in/bar",
1153+
"data/main/in/grepucase_in/foo",
1154+
"data/main/out/ucase_out/bar.out/bar.out.out",
1155+
"data/main/out/ucase_out/foo.out/foo.out.out",
1156+
"data/main/grep/in/grepucase_in/bar",
1157+
"data/main/grep/in/grepucase_in/foo",
1158+
"data/main/grep/out/grep_out/bar.out",
1159+
"data/main/grep/out/grep_out/foo.out",
1160+
"data/main/ucase/in/grep_out/bar.out",
1161+
"data/main/ucase/in/grep_out/foo.out",
1162+
"data/main/ucase/out/ucase_out/bar.out/bar.out.out",
1163+
"data/main/ucase/out/ucase_out/foo.out/foo.out.out",
11391164
):
11401165
assert (output / p).is_file()

0 commit comments

Comments
 (0)