Skip to content

Commit 5ba67bc

Browse files
jorgeepditommaso
andauthored
Add check subcommand in lineage (#6074) [ci fast]
Signed-off-by: jorgee <[email protected]> Signed-off-by: Paolo Di Tommaso <[email protected]> Co-authored-by: Paolo Di Tommaso <[email protected]>
1 parent f8dc506 commit 5ba67bc

File tree

9 files changed

+193
-12
lines changed

9 files changed

+193
-12
lines changed

docs/reference/cli.md

+3
Original file line numberDiff line numberDiff line change
@@ -714,6 +714,9 @@ See the {ref}`data-lineage-page` guide to learn how to get started with data lin
714714

715715
**Subcommands**
716716

717+
`check <lid>`
718+
: Validate the checksum of output lineage record.
719+
717720
`diff <lid-1> <lid-2>`
718721
: Display a git-style diff between two lineage records.
719722

modules/nextflow/src/main/groovy/nextflow/cli/CmdLineage.groovy

+28
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ class CmdLineage extends CmdBase implements UsageAware {
4545
void render(ConfigMap config, List<String> args)
4646
void diff(ConfigMap config, List<String> args)
4747
void find(ConfigMap config, List<String> args)
48+
void check(ConfigMap config, List<String> args)
4849
}
4950

5051
interface SubCmd {
@@ -66,6 +67,7 @@ class CmdLineage extends CmdBase implements UsageAware {
6667
commands << new CmdRender()
6768
commands << new CmdDiff()
6869
commands << new CmdFind()
70+
commands << new CmdCheck()
6971
}
7072

7173
@Parameter(hidden = true)
@@ -283,4 +285,30 @@ class CmdLineage extends CmdBase implements UsageAware {
283285

284286
}
285287

288+
class CmdCheck implements SubCmd {
289+
290+
@Override
291+
String getName() { 'check' }
292+
293+
@Override
294+
String getDescription() {
295+
return 'Checks the integrity of an lineage file path'
296+
}
297+
298+
void apply(List<String> args) {
299+
if (args.size() != 1) {
300+
println("ERROR: Incorrect number of parameters")
301+
return
302+
}
303+
operation.check(config, args)
304+
}
305+
306+
@Override
307+
void usage() {
308+
println description
309+
println "Usage: nextflow $NAME $name <lid-file>"
310+
}
311+
312+
}
313+
286314
}

modules/nf-lineage/src/main/nextflow/lineage/cli/LinCommandImpl.groovy

+16
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,13 @@ import groovy.transform.CompileStatic
2525
import nextflow.Session
2626
import nextflow.cli.CmdLineage
2727
import nextflow.config.ConfigMap
28+
import nextflow.exception.AbortOperationException
2829
import nextflow.lineage.LinHistoryRecord
2930
import nextflow.lineage.LinPropertyValidator
3031
import nextflow.lineage.LinStore
3132
import nextflow.lineage.LinStoreFactory
3233
import nextflow.lineage.LinUtils
34+
import nextflow.lineage.fs.LinPathFactory
3335
import nextflow.lineage.serde.LinEncoder
3436
import nextflow.ui.TableBuilder
3537
import org.eclipse.jgit.diff.DiffAlgorithm
@@ -188,6 +190,20 @@ class LinCommandImpl implements CmdLineage.LinCommand {
188190
}
189191
}
190192

193+
@Override
194+
void check(ConfigMap config, List<String> args) {
195+
final store = LinStoreFactory.getOrCreate(new Session(config))
196+
if (!store) {
197+
println ERR_NOT_LOADED
198+
return
199+
}
200+
final valid = LinPathFactory.create(args[0]).validate()
201+
if( !valid )
202+
throw new AbortOperationException(valid.error)
203+
else
204+
println("Checksum validation succeed")
205+
}
206+
191207
private Map<String, List<String>> parseFindArgs(List<String> args){
192208
Map<String, List<String>> params = [:].withDefault { [] }
193209

modules/nf-lineage/src/main/nextflow/lineage/config/LineageConfig.groovy

+6-1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
package nextflow.lineage.config
1818

1919
import groovy.transform.CompileStatic
20+
import groovy.transform.ToString
21+
import groovy.util.logging.Slf4j
2022
import nextflow.Global
2123
import nextflow.Session
2224

@@ -25,6 +27,8 @@ import nextflow.Session
2527
*
2628
* @author Paolo Di Tommaso <[email protected]>
2729
*/
30+
@Slf4j
31+
@ToString
2832
@CompileStatic
2933
class LineageConfig {
3034

@@ -38,7 +42,8 @@ class LineageConfig {
3842
}
3943

4044
static Map<String,Object> asMap() {
41-
session?.config?.navigate('lineage') as Map ?: new HashMap<String,Object>()
45+
final result = session?.config?.navigate('lineage') as Map
46+
return result != null ? result : new HashMap<String,Object>()
4247
}
4348

4449
static LineageConfig create(Session session) {

modules/nf-lineage/src/main/nextflow/lineage/config/LineageStoreOpts.groovy

+3
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,14 @@
1717
package nextflow.lineage.config
1818

1919
import groovy.transform.CompileStatic
20+
import groovy.transform.ToString
21+
2022
/**
2123
* Model data store options
2224
*
2325
* @author Paolo Di Tommaso <[email protected]>
2426
*/
27+
@ToString
2528
@CompileStatic
2629
class LineageStoreOpts {
2730

modules/nf-lineage/src/main/nextflow/lineage/fs/LinFileSystemProvider.groovy

+5-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ import java.nio.file.spi.FileSystemProvider
3838

3939
import groovy.transform.CompileStatic
4040
import nextflow.lineage.config.LineageConfig
41-
41+
import nextflow.util.TestOnly
4242

4343
/**
4444
* File System Provider for LID Paths
@@ -391,4 +391,8 @@ class LinFileSystemProvider extends FileSystemProvider {
391391
throw new UnsupportedOperationException("Set file attributes not supported by ${getScheme().toUpperCase()} file system provider")
392392
}
393393

394+
@TestOnly
395+
void reset() {
396+
fileSystem=null
397+
}
394398
}

modules/nf-lineage/src/main/nextflow/lineage/fs/LinPath.groovy

+33
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import java.nio.file.WatchService
2929
import java.time.OffsetDateTime
3030

3131
import groovy.transform.CompileStatic
32+
import groovy.transform.EqualsAndHashCode
3233
import groovy.transform.Memoized
3334
import groovy.util.logging.Slf4j
3435
import nextflow.file.FileHelper
@@ -587,6 +588,38 @@ class LinPath implements Path, LogicalDataPath {
587588
String toString() {
588589
return "$filePath${query ? '?' + query : ''}${fragment ? '#' + fragment : ''}".toString()
589590
}
591+
/**
592+
* Validates the integrity of the LinPath. If there is a problem with the validation an exception is thrown.
593+
* To validate just try to get the find target target path. It checks if lid exists, it is a FileOutput,
594+
* the target path exists and the checksum is the same as the stored in the metadata.
595+
*/
596+
FileCheck validate() throws Exception{
597+
final obj = fileSystem.store.load(filePath)
598+
if( !obj )
599+
return new FileCheck("File cannot be found")
600+
if( obj instanceof FileOutput ) {
601+
final res = validateDataOutput(obj as FileOutput)
602+
return new FileCheck(res, obj)
603+
}
604+
return new FileCheck("Unexpected lineage object type: ${obj.getClass().getName()}")
605+
}
606+
607+
@EqualsAndHashCode
608+
static class FileCheck {
609+
final String error
610+
final FileOutput file
590611

612+
FileCheck(String error, FileOutput out=null) {
613+
this.error = error
614+
this.file = out
615+
}
616+
617+
/**
618+
* Implements groovy truth
619+
*/
620+
boolean asBoolean() {
621+
return error==null && file!=null
622+
}
623+
}
591624
}
592625

modules/nf-lineage/src/test/nextflow/lineage/cli/LinCommandImplTest.groovy

+64-10
Original file line numberDiff line numberDiff line change
@@ -16,42 +16,58 @@
1616

1717
package nextflow.lineage.cli
1818

19+
import java.nio.file.Files
20+
import java.nio.file.Path
21+
import java.time.Instant
22+
import java.time.OffsetDateTime
23+
import java.time.ZoneOffset
24+
1925
import nextflow.SysEnv
2026
import nextflow.config.ConfigMap
2127
import nextflow.dag.MermaidHtmlRenderer
28+
import nextflow.exception.AbortOperationException
29+
import nextflow.file.FileHelper
30+
import nextflow.lineage.DefaultLinHistoryLog
2231
import nextflow.lineage.LinHistoryRecord
2332
import nextflow.lineage.LinStoreFactory
24-
import nextflow.lineage.DefaultLinHistoryLog
33+
import nextflow.lineage.fs.LinFileSystemProvider
2534
import nextflow.lineage.model.v1beta1.Checksum
26-
import nextflow.lineage.model.v1beta1.FileOutput
2735
import nextflow.lineage.model.v1beta1.DataPath
36+
import nextflow.lineage.model.v1beta1.FileOutput
2837
import nextflow.lineage.model.v1beta1.Parameter
2938
import nextflow.lineage.model.v1beta1.TaskRun
3039
import nextflow.lineage.model.v1beta1.Workflow
3140
import nextflow.lineage.model.v1beta1.WorkflowRun
3241
import nextflow.lineage.serde.LinEncoder
3342
import nextflow.plugin.Plugins
43+
import nextflow.util.CacheHelper
3444
import org.junit.Rule
45+
import spock.lang.Shared
3546
import spock.lang.Specification
36-
import spock.lang.TempDir
3747
import test.OutputCapture
38-
import java.nio.file.Files
39-
import java.nio.file.Path
40-
import java.time.Instant
41-
import java.time.OffsetDateTime
42-
import java.time.ZoneOffset
4348

4449
class LinCommandImplTest extends Specification{
4550

46-
@TempDir
51+
@Shared
4752
Path tmpDir
4853

54+
@Shared
4955
Path storeLocation
56+
57+
@Shared
5058
ConfigMap configMap
5159

60+
def reset() {
61+
def provider = FileHelper.getProviderFor('lid') as LinFileSystemProvider
62+
provider?.reset()
63+
LinStoreFactory.reset()
64+
}
65+
5266
def setup() {
67+
reset()
5368
// clear the environment to avoid the local env pollute the test env
5469
SysEnv.push([:])
70+
tmpDir = Files.createTempDirectory('tmp')
5571
storeLocation = tmpDir.resolve("store")
5672
configMap = new ConfigMap([lineage: [enabled: true, store: [location: storeLocation.toString(), logLocation: storeLocation.resolve(".log").toString()]]])
5773
}
@@ -60,11 +76,13 @@ class LinCommandImplTest extends Specification{
6076
Plugins.stop()
6177
LinStoreFactory.reset()
6278
SysEnv.pop()
79+
tmpDir?.deleteDir()
6380
}
6481

6582
def setupSpec() {
66-
LinStoreFactory.reset()
83+
reset()
6784
}
85+
6886
/*
6987
* Read more http://mrhaki.blogspot.com.es/2015/02/spocklight-capture-and-assert-system.html
7088
*/
@@ -488,6 +506,42 @@ class LinCommandImplTest extends Specification{
488506
stdout.join('\n') == expectedOutput1 || stdout.join('\n') == expectedOutput2
489507
}
490508

509+
def 'should print correct validate path' () {
510+
given:
511+
def outputFolder = tmpDir.resolve('output')
512+
Files.createDirectories(outputFolder)
513+
def outputFile = outputFolder.resolve('file1.txt')
514+
outputFile.text = "this is file1 == "
515+
516+
and:
517+
def encoder = new LinEncoder().withPrettyPrint(true)
518+
def hash = CacheHelper.hasher(outputFile).hash().toString()
519+
def correctData = new FileOutput(outputFile.toString(), new Checksum(hash,"nextflow", "standard"))
520+
def incorrectData = new FileOutput(outputFile.toString(), new Checksum("incorrectHash","nextflow", "standard"))
521+
def lid1 = storeLocation.resolve('12345/output/file1.txt/.data.json')
522+
Files.createDirectories(lid1.parent)
523+
lid1.text = encoder.encode(correctData)
524+
def lid2 = storeLocation.resolve('12345/output/file2.txt/.data.json')
525+
Files.createDirectories(lid2.parent)
526+
lid2.text = encoder.encode(incorrectData)
527+
when:
528+
new LinCommandImpl().check(configMap, ["lid://12345/output/file1.txt"])
529+
def stdout = capture
530+
.toString()
531+
.readLines()// remove the log part
532+
.findResults { line -> !line.contains('DEBUG') ? line : null }
533+
.findResults { line -> !line.contains('INFO') ? line : null }
534+
.findResults { line -> !line.contains('plugin') ? line : null }
535+
def expectedOutput1 = "Checksum validation succeed"
536+
then:
537+
stdout.size() == 1
538+
stdout[0] == expectedOutput1
491539

540+
when:
541+
new LinCommandImpl().check(configMap, ["lid://12345/output/file2.txt"])
542+
then:
543+
def err = thrown(AbortOperationException)
544+
err.message == "Checksum of '${outputFile}' does not match with lineage metadata"
545+
}
492546

493547
}

modules/nf-lineage/src/test/nextflow/lineage/fs/LinPathTest.groovy

+35
Original file line numberDiff line numberDiff line change
@@ -682,5 +682,40 @@ class LinPathTest extends Specification {
682682
thrown(FileNotFoundException)
683683
}
684684

685+
def 'should validate path' () {
686+
given:
687+
def outputFolder = data.resolve('output')
688+
outputFolder.mkdirs()
689+
def outputFile = outputFolder.resolve('file1.txt')
690+
outputFile.text = "this is file1"
691+
def encoder = new LinEncoder()
692+
def hash = CacheHelper.hasher(outputFile).hash().toString()
693+
def correctData = new FileOutput(outputFile.toString(), new Checksum(hash,"nextflow", "standard"))
694+
def incorrectData = new FileOutput(outputFile.toString(), new Checksum("incorrectHash","nextflow", "standard"))
695+
wdir.resolve('12345/output/file1.txt').mkdirs()
696+
wdir.resolve('12345/output/file2.txt').mkdirs()
697+
wdir.resolve('12345/output/file1.txt/.data.json').text = encoder.encode(correctData)
698+
wdir.resolve('12345/output/file2.txt/.data.json').text = encoder.encode(incorrectData)
699+
def lidFs = new LinFileSystemProvider().newFileSystem(new URI("lid:///"), [enabled: true, store: [location: wdir.toString()]])
700+
701+
when:
702+
def succeed = new LinPath(lidFs, '12345/output/file1.txt').validate()
703+
then:
704+
succeed
705+
706+
when:
707+
succeed = new LinPath(lidFs, '12345/output/file2.txt').validate()
708+
then:
709+
!succeed
710+
711+
when:
712+
succeed = new LinPath(lidFs, '12345/output/file3.txt').validate()
713+
then:
714+
!succeed
715+
716+
cleanup:
717+
outputFile.delete()
718+
}
719+
685720

686721
}

0 commit comments

Comments
 (0)