inoki-giskard commited on
Commit
b6a7e2b
·
1 Parent(s): 27538a2

Init cicd with commit 9bf277b

Browse files
cicd/.github/workflows/giskard_action.yaml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Deployment workflow
2
+
3
+ on:
4
+ push:
5
+ paths:
6
+ - 'examples/github/train.py'
7
+ - 'examples/github/titanic_test_data.csv'
8
+ - 'examples/github/requirements.txt' # temporarily
9
+ - '.github/workflows/giskard_action.yaml' # temporarily
10
+
11
+ jobs:
12
+ Deployment:
13
+ runs-on: ubuntu-latest
14
+ steps:
15
+ - name: Extract branch name
16
+ shell: bash
17
+ run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})"
18
+ id: extract_branch
19
+
20
+ - name: checkout repo content
21
+ uses: actions/checkout@v4 # checkout the repository content to github runner
22
+
23
+ - name: setup python
24
+ uses: actions/setup-python@v4
25
+ with:
26
+ python-version: '3.10' # install the python version needed
27
+
28
+ - uses: syphar/restore-virtualenv@v1
29
+ id: cache-virtualenv
30
+ with:
31
+ requirement_files: examples/github/requirements.txt # this is optional
32
+
33
+ - uses: syphar/restore-pip-download-cache@v1
34
+ if: steps.cache-virtualenv.outputs.cache-hit != 'true'
35
+
36
+ # the package installation will only be executed when the
37
+ # requirements-files have changed.
38
+ - run: pip install -r examples/github/requirements.txt
39
+ env:
40
+ EMAIL_ADDRESS: ${{ secrets.EMAIL_ADDRESS }}
41
+ EMAIL_PASSWORD: ${{ secrets.EMAIL_PASSWORD }}
42
+ EMAIL_RECIPIENT: ${{ secrets.EMAIL_RECIPIENT }}
43
+ if: steps.cache-virtualenv.outputs.cache-hit != 'true'
44
+
45
+ - name: training
46
+ run: |
47
+ python examples/github/train.py
48
+
49
+ - name: execute pipeline
50
+ run: |
51
+ python cli.py --loader github --model examples/github/artifacts/model --dataset examples/github/artifacts/dataset --output_format markdown
cicd/.models_and_datasets_to_be_skipped.csv ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ model,dataset,status
2
+ facebook/bart-large-mnli,multi_nli,error
3
+ distilbert-base-uncased-finetuned-sst-2-english,sst2,done
4
+ cardiffnlp/twitter-roberta-base-sentiment-latest,tweet_eval,done
cicd/automation/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from .post_discussion import create_discussion
2
+
3
+ __all__ = ["create_discussion"]
cicd/automation/post_discussion.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ import huggingface_hub as hf_hub
2
+ def create_discussion(repo_id, model_name, hf_token, report):
3
+ # Create a discussion
4
+ discussion = hf_hub.create_discussion(repo_id, title=f"Report for {model_name}", token=hf_token, description=report, repo_type="space")
5
+ return discussion
cicd/cli.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+
3
+ from giskard_cicd.loaders import GithubLoader, HuggingFaceLoader
4
+ from giskard_cicd.pipeline.runner import PipelineRunner
5
+
6
+ from automation import create_discussion
7
+
8
+ if __name__ == "__main__":
9
+ parser = argparse.ArgumentParser(
10
+ prog="Giskard Scanner", description="Scans a model for vulnerabilities and produces a report."
11
+ )
12
+ parser.add_argument(
13
+ "--loader",
14
+ help="Which loader to use to set up the model. Currently only `github` and `huggingface` are supported.",
15
+ required=True,
16
+ )
17
+ parser.add_argument("--model", help="The model to scan.", required=True)
18
+ parser.add_argument("--dataset", help="The validation or test dataset that will be used.")
19
+ parser.add_argument(
20
+ "--dataset_split", help="The split of the dataset to use. If not provided, the best split will be selected."
21
+ )
22
+ parser.add_argument("--dataset_config", help="The name of the dataset config subset to use.")
23
+ parser.add_argument("--scan_config", help="Path to YAML file containing the configuration of the scan.")
24
+ parser.add_argument("--output", help="Optional name of the output file.")
25
+ parser.add_argument("--output_format", help="Format of the report (either HTML or markdown). Default is HTML.")
26
+ parser.add_argument("--output_portal", help="The output portal of the report (either huggingface or local directory). Default is local.")
27
+ parser.add_argument("--discussion_repo", help="The repo to push the report to.")
28
+ parser.add_argument("--hf_token", help="The token to push the report to the repo.")
29
+
30
+ args = parser.parse_args()
31
+
32
+ supported_loaders = {
33
+ "huggingface": HuggingFaceLoader(),
34
+ "github": GithubLoader(),
35
+ }
36
+
37
+ runner = PipelineRunner(loaders=supported_loaders)
38
+
39
+ runner_kwargs = {"loader_id": args.loader,
40
+ "model": args.model,
41
+ "dataset": args.dataset,
42
+ "scan_config": args.scan_config}
43
+
44
+ if args.loader == "huggingface":
45
+ runner_kwargs.update({"dataset_split": args.dataset_split,
46
+ "dataset_config": args.dataset_config})
47
+
48
+ report = runner.run(**runner_kwargs)
49
+
50
+ # In the future, write markdown report or directly push to discussion.
51
+ if args.output_format == "markdown":
52
+ rendered_report = report.to_markdown(template="github")
53
+ else:
54
+ rendered_report = report.to_html()
55
+
56
+ if args.output_portal == "huggingface":
57
+ # Push to discussion
58
+ create_discussion(args.discussion_repo, args.model, args.hf_token, rendered_report)
59
+
60
+
61
+ if args.output:
62
+ with open(args.output, "w") as f:
63
+ f.write(rendered_report)
64
+ else:
65
+ # To stdout
66
+ # print(rendered_report)
67
+ model_name = args.model.split("/")[-1]
68
+ with open(f"{model_name}_report.html", "w") as f:
69
+ f.write(rendered_report)
cicd/examples/github/cicd_config.yaml ADDED
@@ -0,0 +1 @@
 
 
1
+ artifact_path: "examples/github"
cicd/examples/github/readme.md ADDED
@@ -0,0 +1 @@
 
 
1
+ # Github CI/CD
cicd/examples/github/requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ giskard>=2.0.0b
2
+ git+https://github.com/Giskard-AI/cicd.git@main
3
+ json5==0.9.10
4
+ jsonpatch==1.32
5
+ jsonpointer==2.3
6
+ jsonschema==3.2.0
cicd/examples/github/titanic_test_data.csv ADDED
@@ -0,0 +1,447 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ "PassengerId","Pclass","Name","Sex","Age","SibSp","Parch","Fare","Embarked","Survived"
2
+ 124,2,"Webber, Miss. Susan","female",32.5,0,0,13.0,"S","yes"
3
+ 715,2,"Greenberg, Mr. Samuel","male",52.0,0,0,13.0,"S","no"
4
+ 413,1,"Minahan, Miss. Daisy E","female",33.0,1,0,90.0,"Q","yes"
5
+ 82,3,"Sheerlinck, Mr. Jan Baptist","male",29.0,0,0,9.5,"S","yes"
6
+ 556,1,"Wright, Mr. George","male",62.0,0,0,26.55,"S","no"
7
+ 533,3,"Elias, Mr. Joseph Jr","male",17.0,1,1,7.2292,"C","no"
8
+ 850,1,"Goldenberg, Mrs. Samuel L (Edwiga Grabowska)","female","_GSK_NA_",1,0,89.1042,"C","yes"
9
+ 569,3,"Doharr, Mr. Tannous","male","_GSK_NA_",0,0,7.2292,"C","no"
10
+ 126,3,"Nicola-Yarred, Master. Elias","male",12.0,1,0,11.2417,"C","yes"
11
+ 544,2,"Beane, Mr. Edward","male",32.0,1,0,26.0,"S","yes"
12
+ 111,1,"Porter, Mr. Walter Chamberlain","male",47.0,0,0,52.0,"S","no"
13
+ 484,3,"Turkula, Mrs. (Hedwig)","female",63.0,0,0,9.5875,"S","yes"
14
+ 593,3,"Elsbury, Mr. William James","male",47.0,0,0,7.25,"S","no"
15
+ 422,3,"Charters, Mr. David","male",21.0,0,0,7.7333,"Q","no"
16
+ 847,3,"Sage, Mr. Douglas Bullen","male","_GSK_NA_",8,2,69.55,"S","no"
17
+ 328,2,"Ball, Mrs. (Ada E Hall)","female",36.0,0,0,13.0,"S","yes"
18
+ 828,2,"Mallet, Master. Andre","male",1.0,0,2,37.0042,"C","yes"
19
+ 883,3,"Dahlberg, Miss. Gerda Ulrika","female",22.0,0,0,10.5167,"S","no"
20
+ 437,3,"Ford, Miss. Doolina Margaret ""Daisy""","female",21.0,2,2,34.375,"S","no"
21
+ 88,3,"Slocovski, Mr. Selman Francis","male","_GSK_NA_",0,0,8.05,"S","no"
22
+ 705,3,"Hansen, Mr. Henrik Juul","male",26.0,1,0,7.8542,"S","no"
23
+ 391,1,"Carter, Mr. William Ernest","male",36.0,1,2,120.0,"S","yes"
24
+ 40,3,"Nicola-Yarred, Miss. Jamila","female",14.0,1,0,11.2417,"C","yes"
25
+ 672,1,"Davidson, Mr. Thornton","male",31.0,1,0,52.0,"S","no"
26
+ 620,2,"Gavey, Mr. Lawrence","male",26.0,0,0,10.5,"S","no"
27
+ 791,3,"Keane, Mr. Andrew ""Andy""","male","_GSK_NA_",0,0,7.75,"Q","no"
28
+ 63,1,"Harris, Mr. Henry Birkhardt","male",45.0,1,0,83.475,"S","no"
29
+ 800,3,"Van Impe, Mrs. Jean Baptiste (Rosalie Paula Govaert)","female",30.0,1,1,24.15,"S","no"
30
+ 317,2,"Kantor, Mrs. Sinai (Miriam Sternin)","female",24.0,1,0,26.0,"S","yes"
31
+ 617,3,"Danbom, Mr. Ernst Gilbert","male",34.0,1,1,14.4,"S","no"
32
+ 206,3,"Strom, Miss. Telma Matilda","female",2.0,0,1,10.4625,"S","no"
33
+ 274,1,"Natsch, Mr. Charles H","male",37.0,0,1,29.7,"C","no"
34
+ 567,3,"Stoytcheff, Mr. Ilia","male",19.0,0,0,7.8958,"S","no"
35
+ 632,3,"Lundahl, Mr. Johan Svensson","male",51.0,0,0,7.0542,"S","no"
36
+ 888,1,"Graham, Miss. Margaret Edith","female",19.0,0,0,30.0,"S","yes"
37
+ 480,3,"Hirvonen, Miss. Hildur E","female",2.0,0,1,12.2875,"S","yes"
38
+ 477,2,"Renouf, Mr. Peter Henry","male",34.0,1,0,21.0,"S","no"
39
+ 424,3,"Danbom, Mrs. Ernst Gilbert (Anna Sigrid Maria Brogren)","female",28.0,1,1,14.4,"S","no"
40
+ 741,1,"Hawksford, Mr. Walter James","male","_GSK_NA_",0,0,30.0,"S","yes"
41
+ 531,2,"Quick, Miss. Phyllis May","female",2.0,1,1,26.0,"S","yes"
42
+ 799,3,"Ibrahim Shawah, Mr. Yousseff","male",30.0,0,0,7.2292,"C","no"
43
+ 160,3,"Sage, Master. Thomas Henry","male","_GSK_NA_",8,2,69.55,"S","no"
44
+ 116,3,"Pekoniemi, Mr. Edvard","male",21.0,0,0,7.925,"S","no"
45
+ 290,3,"Connolly, Miss. Kate","female",22.0,0,0,7.75,"Q","yes"
46
+ 252,3,"Strom, Mrs. Wilhelm (Elna Matilda Persson)","female",29.0,1,1,10.4625,"S","no"
47
+ 306,1,"Allison, Master. Hudson Trevor","male",0.92,1,2,151.55,"S","yes"
48
+ 449,3,"Baclini, Miss. Marie Catherine","female",5.0,2,1,19.2583,"C","yes"
49
+ 483,3,"Rouse, Mr. Richard Henry","male",50.0,0,0,8.05,"S","no"
50
+ 587,2,"Jarvis, Mr. John Denzil","male",47.0,0,0,15.0,"S","no"
51
+ 25,3,"Palsson, Miss. Torborg Danira","female",8.0,3,1,21.075,"S","no"
52
+ 289,2,"Hosono, Mr. Masabumi","male",42.0,0,0,13.0,"S","yes"
53
+ 769,3,"Moran, Mr. Daniel J","male","_GSK_NA_",1,0,24.15,"Q","no"
54
+ 697,3,"Kelly, Mr. James","male",44.0,0,0,8.05,"S","no"
55
+ 172,3,"Rice, Master. Arthur","male",4.0,4,1,29.125,"Q","no"
56
+ 548,2,"Padro y Manent, Mr. Julian","male","_GSK_NA_",0,0,13.8625,"C","yes"
57
+ 586,1,"Taussig, Miss. Ruth","female",18.0,0,2,79.65,"S","yes"
58
+ 52,3,"Nosworthy, Mr. Richard Cater","male",21.0,0,0,7.8,"S","no"
59
+ 862,2,"Giles, Mr. Frederick Edward","male",21.0,1,0,11.5,"S","no"
60
+ 553,3,"O'Brien, Mr. Timothy","male","_GSK_NA_",0,0,7.8292,"Q","no"
61
+ 36,1,"Holverson, Mr. Alexander Oskar","male",42.0,1,0,52.0,"S","no"
62
+ 261,3,"Smith, Mr. Thomas","male","_GSK_NA_",0,0,7.75,"Q","no"
63
+ 366,3,"Adahl, Mr. Mauritz Nils Martin","male",30.0,0,0,7.25,"S","no"
64
+ 201,3,"Vande Walle, Mr. Nestor Cyriel","male",28.0,0,0,9.5,"S","no"
65
+ 761,3,"Garfirth, Mr. John","male","_GSK_NA_",0,0,14.5,"S","no"
66
+ 706,2,"Morley, Mr. Henry Samuel (""Mr Henry Marshall"")","male",39.0,0,0,26.0,"S","no"
67
+ 594,3,"Bourke, Miss. Mary","female","_GSK_NA_",0,2,7.75,"Q","no"
68
+ 53,1,"Harper, Mrs. Henry Sleeper (Myna Haxtun)","female",49.0,1,0,76.7292,"C","yes"
69
+ 546,1,"Nicholson, Mr. Arthur Ernest","male",64.0,0,0,26.0,"S","no"
70
+ 195,1,"Brown, Mrs. James Joseph (Margaret Tobin)","female",44.0,0,0,27.7208,"C","yes"
71
+ 530,2,"Hocking, Mr. Richard George","male",23.0,2,1,11.5,"S","no"
72
+ 702,1,"Silverthorne, Mr. Spencer Victor","male",35.0,0,0,26.2875,"S","yes"
73
+ 279,3,"Rice, Master. Eric","male",7.0,4,1,29.125,"Q","no"
74
+ 223,3,"Green, Mr. George Henry","male",51.0,0,0,8.05,"S","no"
75
+ 372,3,"Wiklund, Mr. Jakob Alfred","male",18.0,1,0,6.4958,"S","no"
76
+ 5,3,"Allen, Mr. William Henry","male",35.0,0,0,8.05,"S","no"
77
+ 519,2,"Angle, Mrs. William A (Florence ""Mary"" Agnes Hughes)","female",36.0,1,0,26.0,"S","yes"
78
+ 326,1,"Young, Miss. Marie Grice","female",36.0,0,0,135.6333,"C","yes"
79
+ 492,3,"Windelov, Mr. Einar","male",21.0,0,0,7.25,"S","no"
80
+ 344,2,"Sedgwick, Mr. Charles Frederick Waddington","male",25.0,0,0,13.0,"S","no"
81
+ 469,3,"Scanlan, Mr. James","male","_GSK_NA_",0,0,7.725,"Q","no"
82
+ 77,3,"Staneff, Mr. Ivan","male","_GSK_NA_",0,0,7.8958,"S","no"
83
+ 272,3,"Tornquist, Mr. William Henry","male",25.0,0,0,0.0,"S","yes"
84
+ 753,3,"Vande Velde, Mr. Johannes Joseph","male",33.0,0,0,9.5,"S","no"
85
+ 658,3,"Bourke, Mrs. John (Catherine)","female",32.0,1,1,15.5,"Q","no"
86
+ 388,2,"Buss, Miss. Kate","female",36.0,0,0,13.0,"S","yes"
87
+ 738,1,"Lesurer, Mr. Gustave J","male",35.0,0,0,512.3292,"C","yes"
88
+ 823,1,"Reuchlin, Jonkheer. John George","male",38.0,0,0,0.0,"S","no"
89
+ 814,3,"Andersson, Miss. Ebba Iris Alfrida","female",6.0,4,2,31.275,"S","no"
90
+ 596,3,"Van Impe, Mr. Jean Baptiste","male",36.0,1,1,24.15,"S","no"
91
+ 468,1,"Smart, Mr. John Montgomery","male",56.0,0,0,26.55,"S","no"
92
+ 95,3,"Coxon, Mr. Daniel","male",59.0,0,0,7.25,"S","no"
93
+ 148,3,"Ford, Miss. Robina Maggie ""Ruby""","female",9.0,2,2,34.375,"S","no"
94
+ 704,3,"Gallagher, Mr. Martin","male",25.0,0,0,7.7417,"Q","no"
95
+ 426,3,"Wiseman, Mr. Phillippe","male","_GSK_NA_",0,0,7.25,"S","no"
96
+ 730,3,"Ilmakangas, Miss. Pieta Sofia","female",25.0,1,0,7.925,"S","no"
97
+ 525,3,"Kassem, Mr. Fared","male","_GSK_NA_",0,0,7.2292,"C","no"
98
+ 727,2,"Renouf, Mrs. Peter Henry (Lillian Jefferys)","female",30.0,3,0,21.0,"S","yes"
99
+ 578,1,"Silvey, Mrs. William Baird (Alice Munger)","female",39.0,1,0,55.9,"S","yes"
100
+ 467,2,"Campbell, Mr. William","male","_GSK_NA_",0,0,0.0,"S","no"
101
+ 609,2,"Laroche, Mrs. Joseph (Juliette Marie Louise Lafargue)","female",22.0,1,2,41.5792,"C","yes"
102
+ 774,3,"Elias, Mr. Dibo","male","_GSK_NA_",0,0,7.225,"C","no"
103
+ 504,3,"Laitinen, Miss. Kristina Sofia","female",37.0,0,0,9.5875,"S","no"
104
+ 100,2,"Kantor, Mr. Sinai","male",34.0,1,0,26.0,"S","no"
105
+ 320,1,"Spedden, Mrs. Frederic Oakley (Margaretta Corning Stone)","female",40.0,1,1,134.5,"C","yes"
106
+ 98,1,"Greenfield, Mr. William Bertram","male",23.0,0,1,63.3583,"C","yes"
107
+ 880,1,"Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)","female",56.0,0,1,83.1583,"C","yes"
108
+ 716,3,"Soholt, Mr. Peter Andreas Lauritz Andersen","male",19.0,0,0,7.65,"S","no"
109
+ 349,3,"Coutts, Master. William Loch ""William""","male",3.0,1,1,15.9,"S","yes"
110
+ 44,2,"Laroche, Miss. Simonne Marie Anne Andree","female",3.0,1,2,41.5792,"C","yes"
111
+ 631,1,"Barkworth, Mr. Algernon Henry Wilson","male",80.0,0,0,30.0,"S","yes"
112
+ 154,3,"van Billiard, Mr. Austin Blyler","male",40.5,0,2,14.5,"S","no"
113
+ 683,3,"Olsvigen, Mr. Thor Anderson","male",20.0,0,0,9.225,"S","no"
114
+ 92,3,"Andreasson, Mr. Paul Edvin","male",20.0,0,0,7.8542,"S","no"
115
+ 574,3,"Kelly, Miss. Mary","female","_GSK_NA_",0,0,7.75,"Q","yes"
116
+ 541,1,"Crosby, Miss. Harriet R","female",36.0,0,2,71.0,"S","yes"
117
+ 886,3,"Rice, Mrs. William (Margaret Norton)","female",39.0,0,5,29.125,"Q","no"
118
+ 215,3,"Kiernan, Mr. Philip","male","_GSK_NA_",1,0,7.75,"Q","no"
119
+ 381,1,"Bidois, Miss. Rosalie","female",42.0,0,0,227.525,"C","yes"
120
+ 776,3,"Myhrman, Mr. Pehr Fabian Oliver Malkolm","male",18.0,0,0,7.75,"S","no"
121
+ 430,3,"Pickard, Mr. Berk (Berk Trembisky)","male",32.0,0,0,8.05,"S","yes"
122
+ 222,2,"Bracken, Mr. James H","male",27.0,0,0,13.0,"S","no"
123
+ 820,3,"Skoog, Master. Karl Thorsten","male",10.0,3,2,27.9,"S","no"
124
+ 51,3,"Panula, Master. Juha Niilo","male",7.0,4,1,39.6875,"S","no"
125
+ 250,2,"Carter, Rev. Ernest Courtenay","male",54.0,1,0,26.0,"S","no"
126
+ 692,3,"Karun, Miss. Manca","female",4.0,0,1,13.4167,"C","yes"
127
+ 435,1,"Silvey, Mr. William Baird","male",50.0,1,0,55.9,"S","no"
128
+ 781,3,"Ayoub, Miss. Banoura","female",13.0,0,0,7.2292,"C","yes"
129
+ 491,3,"Hagland, Mr. Konrad Mathias Reiersen","male","_GSK_NA_",1,0,19.9667,"S","no"
130
+ 554,3,"Leeni, Mr. Fahim (""Philip Zenni"")","male",22.0,0,0,7.225,"C","yes"
131
+ 656,2,"Hickman, Mr. Leonard Mark","male",24.0,2,0,73.5,"S","no"
132
+ 380,3,"Gustafsson, Mr. Karl Gideon","male",19.0,0,0,7.775,"S","no"
133
+ 509,3,"Olsen, Mr. Henry Margido","male",28.0,0,0,22.525,"S","no"
134
+ 230,3,"Lefebre, Miss. Mathilde","female","_GSK_NA_",3,1,25.4667,"S","no"
135
+ 458,1,"Kenyon, Mrs. Frederick R (Marion)","female","_GSK_NA_",1,0,51.8625,"S","yes"
136
+ 733,2,"Knight, Mr. Robert J","male","_GSK_NA_",0,0,0.0,"S","no"
137
+ 740,3,"Nankoff, Mr. Minko","male","_GSK_NA_",0,0,7.8958,"S","no"
138
+ 420,3,"Van Impe, Miss. Catharina","female",10.0,0,2,24.15,"S","no"
139
+ 175,1,"Smith, Mr. James Clinch","male",56.0,0,0,30.6958,"C","no"
140
+ 767,1,"Brewe, Dr. Arthur Jackson","male","_GSK_NA_",0,0,39.6,"C","no"
141
+ 608,1,"Daniel, Mr. Robert Williams","male",27.0,0,0,30.5,"S","yes"
142
+ 75,3,"Bing, Mr. Lee","male",32.0,0,0,56.4958,"S","yes"
143
+ 242,3,"Murphy, Miss. Katherine ""Kate""","female","_GSK_NA_",1,0,15.5,"Q","yes"
144
+ 506,1,"Penasco y Castellana, Mr. Victor de Satode","male",18.0,1,0,108.9,"C","no"
145
+ 481,3,"Goodwin, Master. Harold Victor","male",9.0,5,2,46.9,"S","no"
146
+ 461,1,"Anderson, Mr. Harry","male",48.0,0,0,26.55,"S","yes"
147
+ 185,3,"Kink-Heilmann, Miss. Luise Gretchen","female",4.0,0,2,22.025,"S","yes"
148
+ 866,2,"Bystrom, Mrs. (Karolina)","female",42.0,0,0,13.0,"S","yes"
149
+ 165,3,"Panula, Master. Eino Viljami","male",1.0,4,1,39.6875,"S","no"
150
+ 406,2,"Gale, Mr. Shadrach","male",34.0,1,0,21.0,"S","no"
151
+ 248,2,"Hamalainen, Mrs. William (Anna)","female",24.0,0,2,14.5,"S","yes"
152
+ 211,3,"Ali, Mr. Ahmed","male",24.0,0,0,7.05,"S","no"
153
+ 337,1,"Pears, Mr. Thomas Clinton","male",29.0,1,0,66.6,"S","no"
154
+ 879,3,"Laleff, Mr. Kristo","male","_GSK_NA_",0,0,7.8958,"S","no"
155
+ 15,3,"Vestrom, Miss. Hulda Amanda Adolfina","female",14.0,0,0,7.8542,"S","no"
156
+ 56,1,"Woolner, Mr. Hugh","male","_GSK_NA_",0,0,35.5,"S","yes"
157
+ 302,3,"McCoy, Mr. Bernard","male","_GSK_NA_",2,0,23.25,"Q","yes"
158
+ 97,1,"Goldschmidt, Mr. George B","male",71.0,0,0,34.6542,"C","no"
159
+ 600,1,"Duff Gordon, Sir. Cosmo Edmund (""Mr Morgan"")","male",49.0,1,0,56.9292,"C","yes"
160
+ 876,3,"Najib, Miss. Adele Kiamie ""Jane""","female",15.0,0,0,7.225,"C","yes"
161
+ 731,1,"Allen, Miss. Elisabeth Walton","female",29.0,0,0,211.3375,"S","yes"
162
+ 744,3,"McNamee, Mr. Neal","male",24.0,1,0,16.1,"S","no"
163
+ 30,3,"Todoroff, Mr. Lalio","male","_GSK_NA_",0,0,7.8958,"S","no"
164
+ 673,2,"Mitchell, Mr. Henry Michael","male",70.0,0,0,10.5,"S","no"
165
+ 841,3,"Alhomaki, Mr. Ilmari Rudolf","male",20.0,0,0,7.925,"S","no"
166
+ 140,1,"Giglio, Mr. Victor","male",24.0,0,0,79.2,"C","no"
167
+ 62,1,"Icard, Miss. Amelie","female",38.0,0,0,80.0,"_GSK_NA_","yes"
168
+ 639,3,"Panula, Mrs. Juha (Maria Emilia Ojala)","female",41.0,0,5,39.6875,"S","no"
169
+ 693,3,"Lam, Mr. Ali","male","_GSK_NA_",0,0,56.4958,"S","yes"
170
+ 176,3,"Klasen, Mr. Klas Albin","male",18.0,1,1,7.8542,"S","no"
171
+ 417,2,"Drew, Mrs. James Vivian (Lulu Thorne Christian)","female",34.0,1,1,32.5,"S","yes"
172
+ 348,3,"Davison, Mrs. Thomas Henry (Mary E Finck)","female","_GSK_NA_",1,0,16.1,"S","yes"
173
+ 542,3,"Andersson, Miss. Ingeborg Constanzia","female",9.0,4,2,31.275,"S","no"
174
+ 433,2,"Louch, Mrs. Charles Alexander (Alice Adelaide Slow)","female",42.0,1,0,26.0,"S","yes"
175
+ 760,1,"Rothes, the Countess. of (Lucy Noel Martha Dyer-Edwards)","female",33.0,0,0,86.5,"S","yes"
176
+ 725,1,"Chambers, Mr. Norman Campbell","male",27.0,1,0,53.1,"S","yes"
177
+ 451,2,"West, Mr. Edwy Arthur","male",36.0,1,2,27.75,"S","no"
178
+ 91,3,"Christmann, Mr. Emil","male",29.0,0,0,8.05,"S","no"
179
+ 76,3,"Moen, Mr. Sigurd Hansen","male",25.0,0,0,7.65,"S","no"
180
+ 47,3,"Lennon, Mr. Denis","male","_GSK_NA_",1,0,15.5,"Q","no"
181
+ 65,1,"Stewart, Mr. Albert A","male","_GSK_NA_",0,0,27.7208,"C","no"
182
+ 258,1,"Cherry, Miss. Gladys","female",30.0,0,0,86.5,"S","yes"
183
+ 214,2,"Givard, Mr. Hans Kristensen","male",30.0,0,0,13.0,"S","no"
184
+ 245,3,"Attalah, Mr. Sleiman","male",30.0,0,0,7.225,"C","no"
185
+ 599,3,"Boulos, Mr. Hanna","male","_GSK_NA_",0,0,7.225,"C","no"
186
+ 400,2,"Trout, Mrs. William H (Jessie L)","female",28.0,0,0,12.65,"S","yes"
187
+ 772,3,"Jensen, Mr. Niels Peder","male",48.0,0,0,7.8542,"S","no"
188
+ 37,3,"Mamee, Mr. Hanna","male","_GSK_NA_",0,0,7.2292,"C","yes"
189
+ 114,3,"Jussila, Miss. Katriina","female",20.0,1,0,9.825,"S","no"
190
+ 853,3,"Boulos, Miss. Nourelain","female",9.0,1,1,15.2458,"C","no"
191
+ 676,3,"Edvardsson, Mr. Gustaf Hjalmar","male",18.0,0,0,7.775,"S","no"
192
+ 287,3,"de Mulder, Mr. Theodore","male",30.0,0,0,9.5,"S","yes"
193
+ 583,2,"Downton, Mr. William James","male",54.0,0,0,26.0,"S","no"
194
+ 71,2,"Jenkin, Mr. Stephen Curnow","male",32.0,0,0,10.5,"S","no"
195
+ 120,3,"Andersson, Miss. Ellis Anna Maria","female",2.0,4,2,31.275,"S","no"
196
+ 144,3,"Burke, Mr. Jeremiah","male",19.0,0,0,6.75,"Q","no"
197
+ 493,1,"Molson, Mr. Harry Markland","male",55.0,0,0,30.5,"S","no"
198
+ 870,3,"Johnson, Master. Harold Theodor","male",4.0,1,1,11.1333,"S","yes"
199
+ 869,3,"van Melkebeke, Mr. Philemon","male","_GSK_NA_",0,0,9.5,"S","no"
200
+ 13,3,"Saundercock, Mr. William Henry","male",20.0,0,0,8.05,"S","no"
201
+ 685,2,"Brown, Mr. Thomas William Solomon","male",60.0,1,1,39.0,"S","no"
202
+ 643,3,"Skoog, Miss. Margit Elizabeth","female",2.0,3,2,27.9,"S","no"
203
+ 87,3,"Ford, Mr. William Neal","male",16.0,1,3,34.375,"S","no"
204
+ 296,1,"Lewy, Mr. Ervin G","male","_GSK_NA_",0,0,27.7208,"C","no"
205
+ 694,3,"Saad, Mr. Khalil","male",25.0,0,0,7.225,"C","no"
206
+ 410,3,"Lefebre, Miss. Ida","female","_GSK_NA_",3,1,25.4667,"S","no"
207
+ 645,3,"Baclini, Miss. Eugenie","female",0.75,2,1,19.2583,"C","yes"
208
+ 803,1,"Carter, Master. William Thornton II","male",11.0,1,2,120.0,"S","yes"
209
+ 450,1,"Peuchen, Major. Arthur Godfrey","male",52.0,0,0,30.5,"S","yes"
210
+ 550,2,"Davies, Master. John Morgan Jr","male",8.0,1,1,36.75,"S","yes"
211
+ 352,1,"Williams-Lambert, Mr. Fletcher Fellows","male","_GSK_NA_",0,0,35.0,"S","no"
212
+ 580,3,"Jussila, Mr. Eiriik","male",32.0,0,0,7.925,"S","yes"
213
+ 319,1,"Wick, Miss. Mary Natalie","female",31.0,0,2,164.8667,"S","yes"
214
+ 831,3,"Yasbeck, Mrs. Antoni (Selini Alexander)","female",15.0,1,0,14.4542,"C","yes"
215
+ 777,3,"Tobin, Mr. Roger","male","_GSK_NA_",0,0,7.75,"Q","no"
216
+ 341,2,"Navratil, Master. Edmond Roger","male",2.0,1,1,26.0,"S","yes"
217
+ 871,3,"Balkic, Mr. Cerin","male",26.0,0,0,7.8958,"S","no"
218
+ 271,1,"Cairns, Mr. Alexander","male","_GSK_NA_",0,0,31.0,"S","no"
219
+ 755,2,"Herman, Mrs. Samuel (Jane Laver)","female",48.0,1,2,65.0,"S","yes"
220
+ 110,3,"Moran, Miss. Bertha","female","_GSK_NA_",1,0,24.15,"Q","yes"
221
+ 829,3,"McCormack, Mr. Thomas Joseph","male","_GSK_NA_",0,0,7.75,"Q","yes"
222
+ 448,1,"Seward, Mr. Frederic Kimber","male",34.0,0,0,26.55,"S","yes"
223
+ 33,3,"Glynn, Miss. Mary Agatha","female","_GSK_NA_",0,0,7.75,"Q","yes"
224
+ 465,3,"Maisner, Mr. Simon","male","_GSK_NA_",0,0,8.05,"S","no"
225
+ 427,2,"Clarke, Mrs. Charles V (Ada Maria Winfield)","female",28.0,1,0,26.0,"S","yes"
226
+ 204,3,"Youseff, Mr. Gerious","male",45.5,0,0,7.225,"C","no"
227
+ 431,1,"Bjornstrom-Steffansson, Mr. Mauritz Hakan","male",28.0,0,0,26.55,"S","yes"
228
+ 732,3,"Hassan, Mr. Houssein G N","male",11.0,0,0,18.7875,"C","no"
229
+ 787,3,"Sjoblom, Miss. Anna Sofia","female",18.0,0,0,7.4958,"S","yes"
230
+ 508,1,"Bradley, Mr. George (""George Arthur Brayton"")","male","_GSK_NA_",0,0,26.55,"S","yes"
231
+ 802,2,"Collyer, Mrs. Harvey (Charlotte Annie Tate)","female",31.0,1,1,26.25,"S","yes"
232
+ 310,1,"Francatelli, Miss. Laura Mabel","female",30.0,0,0,56.9292,"C","yes"
233
+ 107,3,"Salkjelsvik, Miss. Anna Kristine","female",21.0,0,0,7.65,"S","yes"
234
+ 299,1,"Saalfeld, Mr. Adolphe","male","_GSK_NA_",0,0,30.5,"S","yes"
235
+ 459,2,"Toomey, Miss. Ellen","female",50.0,0,0,10.5,"S","yes"
236
+ 641,3,"Jensen, Mr. Hans Peder","male",20.0,0,0,7.8542,"S","no"
237
+ 668,3,"Rommetvedt, Mr. Knud Paust","male","_GSK_NA_",0,0,7.775,"S","no"
238
+ 523,3,"Lahoud, Mr. Sarkis","male","_GSK_NA_",0,0,7.225,"C","no"
239
+ 710,3,"Moubarek, Master. Halim Gonios (""William George"")","male","_GSK_NA_",1,1,15.2458,"C","yes"
240
+ 249,1,"Beckwith, Mr. Richard Leonard","male",37.0,1,1,52.5542,"S","yes"
241
+ 677,3,"Sawyer, Mr. Frederick Charles","male",24.5,0,0,8.05,"S","no"
242
+ 595,2,"Chapman, Mr. John Henry","male",37.0,1,0,26.0,"S","no"
243
+ 667,2,"Butler, Mr. Reginald Fenton","male",25.0,0,0,13.0,"S","no"
244
+ 537,1,"Butt, Major. Archibald Willingham","male",45.0,0,0,26.55,"S","no"
245
+ 666,2,"Hickman, Mr. Lewis","male",32.0,2,0,73.5,"S","no"
246
+ 581,2,"Christy, Miss. Julie Rachel","female",25.0,1,1,30.0,"S","yes"
247
+ 630,3,"O'Connell, Mr. Patrick D","male","_GSK_NA_",0,0,7.7333,"Q","no"
248
+ 648,1,"Simonius-Blumer, Col. Oberst Alfons","male",56.0,0,0,35.5,"C","yes"
249
+ 878,3,"Petroff, Mr. Nedelio","male",19.0,0,0,7.8958,"S","no"
250
+ 269,1,"Graham, Mrs. William Thompson (Edith Junkins)","female",58.0,0,1,153.4625,"S","yes"
251
+ 234,3,"Asplund, Miss. Lillian Gertrud","female",5.0,4,2,31.3875,"S","yes"
252
+ 644,3,"Foo, Mr. Choong","male","_GSK_NA_",0,0,56.4958,"S","yes"
253
+ 118,2,"Turpin, Mr. William John Robert","male",29.0,1,0,21.0,"S","no"
254
+ 333,1,"Graham, Mr. George Edward","male",38.0,0,1,153.4625,"S","no"
255
+ 454,1,"Goldenberg, Mr. Samuel L","male",49.0,1,0,89.1042,"C","yes"
256
+ 139,3,"Osen, Mr. Olaf Elon","male",16.0,0,0,9.2167,"S","no"
257
+ 606,3,"Lindell, Mr. Edvard Bengtsson","male",36.0,1,0,15.55,"S","no"
258
+ 535,3,"Cacic, Miss. Marija","female",30.0,0,0,8.6625,"S","no"
259
+ 221,3,"Sunderland, Mr. Victor Francis","male",16.0,0,0,8.05,"S","yes"
260
+ 444,2,"Reynaldo, Ms. Encarnacion","female",28.0,0,0,13.0,"S","yes"
261
+ 330,1,"Hippach, Miss. Jean Gertrude","female",16.0,0,1,57.9792,"C","yes"
262
+ 805,3,"Hedman, Mr. Oskar Arvid","male",27.0,0,0,6.975,"S","yes"
263
+ 55,1,"Ostby, Mr. Engelhart Cornelius","male",65.0,0,1,61.9792,"C","no"
264
+ 528,1,"Farthing, Mr. John","male","_GSK_NA_",0,0,221.7792,"S","no"
265
+ 359,3,"McGovern, Miss. Mary","female","_GSK_NA_",0,0,7.8792,"Q","yes"
266
+ 354,3,"Arnold-Franchi, Mr. Josef","male",25.0,1,0,17.8,"S","no"
267
+ 678,3,"Turja, Miss. Anna Sofia","female",18.0,0,0,9.8417,"S","yes"
268
+ 273,2,"Mellinger, Mrs. (Elizabeth Anne Maidment)","female",41.0,0,1,19.5,"S","yes"
269
+ 429,3,"Flynn, Mr. James","male","_GSK_NA_",0,0,7.75,"Q","no"
270
+ 536,2,"Hart, Miss. Eva Miriam","female",7.0,0,2,26.25,"S","yes"
271
+ 838,3,"Sirota, Mr. Maurice","male","_GSK_NA_",0,0,8.05,"S","no"
272
+ 179,2,"Hale, Mr. Reginald","male",30.0,0,0,13.0,"S","no"
273
+ 339,3,"Dahl, Mr. Karl Edwart","male",45.0,0,0,8.05,"S","yes"
274
+ 724,2,"Hodges, Mr. Henry Price","male",50.0,0,0,13.0,"S","no"
275
+ 524,1,"Hippach, Mrs. Louis Albert (Ida Sophia Fischer)","female",44.0,0,1,57.9792,"C","yes"
276
+ 734,2,"Berriman, Mr. William John","male",23.0,0,0,13.0,"S","no"
277
+ 164,3,"Calic, Mr. Jovo","male",17.0,0,0,8.6625,"S","no"
278
+ 304,2,"Keane, Miss. Nora A","female","_GSK_NA_",0,0,12.35,"Q","yes"
279
+ 356,3,"Vanden Steen, Mr. Leo Peter","male",28.0,0,0,9.5,"S","no"
280
+ 436,1,"Carter, Miss. Lucile Polk","female",14.0,1,2,120.0,"S","yes"
281
+ 622,1,"Kimball, Mr. Edwin Nelson Jr","male",42.0,1,0,52.5542,"S","yes"
282
+ 551,1,"Thayer, Mr. John Borland Jr","male",17.0,0,2,110.8833,"C","yes"
283
+ 109,3,"Rekic, Mr. Tido","male",38.0,0,0,7.8958,"S","no"
284
+ 265,3,"Henry, Miss. Delia","female","_GSK_NA_",0,0,7.75,"Q","no"
285
+ 628,1,"Longley, Miss. Gretchen Fiske","female",21.0,0,0,77.9583,"S","yes"
286
+ 394,1,"Newell, Miss. Marjorie","female",23.0,1,0,113.275,"C","yes"
287
+ 748,2,"Sinkkonen, Miss. Anna","female",30.0,0,0,13.0,"S","yes"
288
+ 698,3,"Mullens, Miss. Katherine ""Katie""","female","_GSK_NA_",0,0,7.7333,"Q","yes"
289
+ 66,3,"Moubarek, Master. Gerios","male","_GSK_NA_",1,1,15.2458,"C","yes"
290
+ 681,3,"Peters, Miss. Katie","female","_GSK_NA_",0,0,8.1375,"Q","no"
291
+ 663,1,"Colley, Mr. Edward Pomeroy","male",47.0,0,0,25.5875,"S","no"
292
+ 158,3,"Corn, Mr. Harry","male",30.0,0,0,8.05,"S","no"
293
+ 298,1,"Allison, Miss. Helen Loraine","female",2.0,1,2,151.55,"S","no"
294
+ 674,2,"Wilhelms, Mr. Charles","male",31.0,0,0,13.0,"S","yes"
295
+ 808,3,"Pettersson, Miss. Ellen Natalia","female",18.0,0,0,7.775,"S","no"
296
+ 545,1,"Douglas, Mr. Walter Donald","male",50.0,1,0,106.425,"C","no"
297
+ 338,1,"Burns, Miss. Elizabeth Margaret","female",41.0,0,0,134.5,"C","yes"
298
+ 833,3,"Saad, Mr. Amin","male","_GSK_NA_",0,0,7.2292,"C","no"
299
+ 94,3,"Dean, Mr. Bertram Frank","male",26.0,1,2,20.575,"S","no"
300
+ 133,3,"Robins, Mrs. Alexander A (Grace Charity Laury)","female",47.0,1,0,14.5,"S","no"
301
+ 383,3,"Tikkanen, Mr. Juho","male",32.0,0,0,7.925,"S","no"
302
+ 720,3,"Johnson, Mr. Malkolm Joackim","male",33.0,0,0,7.775,"S","no"
303
+ 739,3,"Ivanoff, Mr. Kanio","male","_GSK_NA_",0,0,7.8958,"S","no"
304
+ 343,2,"Collander, Mr. Erik Gustaf","male",28.0,0,0,13.0,"S","no"
305
+ 647,3,"Cor, Mr. Liudevit","male",19.0,0,0,7.8958,"S","no"
306
+ 286,3,"Stankovic, Mr. Ivan","male",33.0,0,0,8.6625,"C","no"
307
+ 743,1,"Ryerson, Miss. Susan Parker ""Suzette""","female",21.0,2,2,262.375,"C","yes"
308
+ 371,1,"Harder, Mr. George Achilles","male",25.0,1,0,55.4417,"C","yes"
309
+ 457,1,"Millet, Mr. Francis Davis","male",65.0,0,0,26.55,"S","no"
310
+ 882,3,"Markun, Mr. Johann","male",33.0,0,0,7.8958,"S","no"
311
+ 884,2,"Banfield, Mr. Frederick James","male",28.0,0,0,10.5,"S","no"
312
+ 560,3,"de Messemaeker, Mrs. Guillaume Joseph (Emma)","female",36.0,1,0,17.4,"S","yes"
313
+ 168,3,"Skoog, Mrs. William (Anna Bernhardina Karlsson)","female",45.0,1,4,27.9,"S","no"
314
+ 636,2,"Davis, Miss. Mary","female",28.0,0,0,13.0,"S","yes"
315
+ 885,3,"Sutehall, Mr. Henry Jr","male",25.0,0,0,7.05,"S","no"
316
+ 131,3,"Drazenoic, Mr. Jozef","male",33.0,0,0,7.8958,"C","no"
317
+ 505,1,"Maioni, Miss. Roberta","female",16.0,0,0,86.5,"S","yes"
318
+ 332,1,"Partner, Mr. Austen","male",45.5,0,0,28.5,"S","no"
319
+ 132,3,"Coelho, Mr. Domingos Fernandeo","male",20.0,0,0,7.05,"S","no"
320
+ 500,3,"Svensson, Mr. Olof","male",24.0,0,0,7.7958,"S","no"
321
+ 135,2,"Sobey, Mr. Samuel James Hayden","male",25.0,0,0,13.0,"S","no"
322
+ 192,2,"Carbines, Mr. William","male",19.0,0,0,13.0,"S","no"
323
+ 61,3,"Sirayanian, Mr. Orsen","male",22.0,0,0,7.2292,"C","no"
324
+ 819,3,"Holm, Mr. John Fredrik Alexander","male",43.0,0,0,6.45,"S","no"
325
+ 428,2,"Phillips, Miss. Kate Florence (""Mrs Kate Louise Phillips Marshall"")","female",19.0,0,0,26.0,"S","yes"
326
+ 161,3,"Cribb, Mr. John Hatfield","male",44.0,0,1,16.1,"S","no"
327
+ 117,3,"Connors, Mr. Patrick","male",70.5,0,0,7.75,"Q","no"
328
+ 839,3,"Chip, Mr. Chang","male",32.0,0,0,56.4958,"S","yes"
329
+ 861,3,"Hansen, Mr. Claus Peter","male",41.0,2,0,14.1083,"S","no"
330
+ 688,3,"Dakic, Mr. Branko","male",19.0,0,0,10.1708,"S","no"
331
+ 283,3,"de Pelsmaeker, Mr. Alfons","male",16.0,0,0,9.5,"S","no"
332
+ 402,3,"Adams, Mr. John","male",26.0,0,0,8.05,"S","no"
333
+ 843,1,"Serepeca, Miss. Augusta","female",30.0,0,0,31.0,"C","yes"
334
+ 48,3,"O'Driscoll, Miss. Bridget","female","_GSK_NA_",0,0,7.75,"Q","yes"
335
+ 770,3,"Gronnestad, Mr. Daniel Danielsen","male",32.0,0,0,8.3625,"S","no"
336
+ 405,3,"Oreskovic, Miss. Marija","female",20.0,0,0,8.6625,"S","no"
337
+ 874,3,"Vander Cruyssen, Mr. Victor","male",47.0,0,0,9.0,"S","no"
338
+ 196,1,"Lurette, Miss. Elise","female",58.0,0,0,146.5208,"C","yes"
339
+ 167,1,"Chibnall, Mrs. (Edith Martha Bowerman)","female","_GSK_NA_",0,1,55.0,"S","yes"
340
+ 517,2,"Lemore, Mrs. (Amelia Milley)","female",34.0,0,0,10.5,"S","yes"
341
+ 526,3,"Farrell, Mr. James","male",40.5,0,0,7.75,"Q","no"
342
+ 473,2,"West, Mrs. Edwy Arthur (Ada Mary Worth)","female",33.0,1,2,27.75,"S","yes"
343
+ 113,3,"Barton, Mr. David John","male",22.0,0,0,8.05,"S","no"
344
+ 701,1,"Astor, Mrs. John Jacob (Madeleine Talmadge Force)","female",18.0,1,0,227.525,"C","yes"
345
+ 369,3,"Jermyn, Miss. Annie","female","_GSK_NA_",0,0,7.75,"Q","yes"
346
+ 779,3,"Kilgannon, Mr. Thomas J","male","_GSK_NA_",0,0,7.7375,"Q","no"
347
+ 475,3,"Strandberg, Miss. Ida Sofia","female",22.0,0,0,9.8375,"S","no"
348
+ 184,2,"Becker, Master. Richard F","male",1.0,2,1,39.0,"S","yes"
349
+ 707,2,"Kelly, Mrs. Florence ""Fannie""","female",45.0,0,0,13.5,"S","yes"
350
+ 136,2,"Richard, Mr. Emile","male",23.0,0,0,15.0458,"C","no"
351
+ 865,2,"Gill, Mr. John William","male",24.0,0,0,13.0,"S","no"
352
+ 364,3,"Asim, Mr. Adola","male",35.0,0,0,7.05,"S","no"
353
+ 149,2,"Navratil, Mr. Michel (""Louis M Hoffman"")","male",36.5,0,2,26.0,"S","no"
354
+ 789,3,"Dean, Master. Bertram Vere","male",1.0,1,2,20.575,"S","yes"
355
+ 745,3,"Stranden, Mr. Juho","male",31.0,0,0,7.925,"S","yes"
356
+ 293,2,"Levy, Mr. Rene Jacques","male",36.0,0,0,12.875,"C","no"
357
+ 726,3,"Oreskovic, Mr. Luka","male",20.0,0,0,8.6625,"S","no"
358
+ 679,3,"Goodwin, Mrs. Frederick (Augusta Tyler)","female",43.0,1,6,46.9,"S","no"
359
+ 476,1,"Clifford, Mr. George Quincy","male","_GSK_NA_",0,0,52.0,"S","no"
360
+ 157,3,"Gilnagh, Miss. Katherine ""Katie""","female",16.0,0,0,7.7333,"Q","yes"
361
+ 875,2,"Abelson, Mrs. Samuel (Hannah Wizosky)","female",28.0,1,0,24.0,"C","yes"
362
+ 193,3,"Andersen-Jensen, Miss. Carla Christine Nielsine","female",19.0,1,0,7.8542,"S","yes"
363
+ 357,1,"Bowerman, Miss. Elsie Edith","female",22.0,0,1,55.0,"S","yes"
364
+ 610,1,"Shutes, Miss. Elizabeth W","female",40.0,0,0,153.4625,"S","yes"
365
+ 568,3,"Palsson, Mrs. Nils (Alma Cornelia Berglund)","female",29.0,0,4,21.075,"S","no"
366
+ 634,1,"Parr, Mr. William Henry Marsh","male","_GSK_NA_",0,0,0.0,"S","no"
367
+ 18,2,"Williams, Mr. Charles Eugene","male","_GSK_NA_",0,0,13.0,"S","yes"
368
+ 751,2,"Wells, Miss. Joan","female",4.0,1,1,23.0,"S","yes"
369
+ 128,3,"Madsen, Mr. Fridtjof Arne","male",24.0,0,0,7.1417,"S","yes"
370
+ 38,3,"Cann, Mr. Ernest Charles","male",21.0,0,0,8.05,"S","no"
371
+ 564,3,"Simmons, Mr. John","male","_GSK_NA_",0,0,8.05,"S","no"
372
+ 224,3,"Nenkoff, Mr. Christo","male","_GSK_NA_",0,0,7.8958,"S","no"
373
+ 266,2,"Reeves, Mr. David","male",36.0,0,0,10.5,"S","no"
374
+ 397,3,"Olsson, Miss. Elina","female",31.0,0,0,7.8542,"S","no"
375
+ 754,3,"Jonkoff, Mr. Lalio","male",23.0,0,0,7.8958,"S","no"
376
+ 412,3,"Hart, Mr. Henry","male","_GSK_NA_",0,0,6.8583,"Q","no"
377
+ 890,1,"Behr, Mr. Karl Howell","male",26.0,0,0,30.0,"C","yes"
378
+ 709,1,"Cleaver, Miss. Alice","female",22.0,0,0,151.55,"S","yes"
379
+ 818,2,"Mallet, Mr. Albert","male",31.0,1,1,37.0042,"C","no"
380
+ 336,3,"Denkoff, Mr. Mitto","male","_GSK_NA_",0,0,7.8958,"S","no"
381
+ 809,2,"Meyer, Mr. August","male",39.0,0,0,13.0,"S","no"
382
+ 373,3,"Beavan, Mr. William Thomas","male",19.0,0,0,8.05,"S","no"
383
+ 311,1,"Hays, Miss. Margaret Bechstein","female",24.0,0,0,83.1583,"C","yes"
384
+ 181,3,"Sage, Miss. Constance Gladys","female","_GSK_NA_",8,2,69.55,"S","no"
385
+ 392,3,"Jansson, Mr. Carl Olof","male",21.0,0,0,7.7958,"S","yes"
386
+ 496,3,"Yousseff, Mr. Gerious","male","_GSK_NA_",0,0,14.4583,"C","no"
387
+ 81,3,"Waelens, Mr. Achille","male",22.0,0,0,9.0,"S","no"
388
+ 125,1,"White, Mr. Percival Wayland","male",54.0,0,1,77.2875,"S","no"
389
+ 301,3,"Kelly, Miss. Anna Katherine ""Annie Kate""","female","_GSK_NA_",0,0,7.75,"Q","yes"
390
+ 816,1,"Fry, Mr. Richard","male","_GSK_NA_",0,0,0.0,"S","no"
391
+ 794,1,"Hoyt, Mr. William Fisher","male","_GSK_NA_",0,0,30.6958,"C","no"
392
+ 867,2,"Duran y More, Miss. Asuncion","female",27.0,1,0,13.8583,"C","yes"
393
+ 759,3,"Theobald, Mr. Thomas Leonard","male",34.0,0,0,8.05,"S","no"
394
+ 793,3,"Sage, Miss. Stella Anna","female","_GSK_NA_",8,2,69.55,"S","no"
395
+ 764,1,"Carter, Mrs. William Ernest (Lucile Polk)","female",36.0,1,2,120.0,"S","yes"
396
+ 687,3,"Panula, Mr. Jaako Arnold","male",14.0,4,1,39.6875,"S","no"
397
+ 246,1,"Minahan, Dr. William Edward","male",44.0,2,0,90.0,"Q","no"
398
+ 309,2,"Abelson, Mr. Samuel","male",30.0,1,0,24.0,"C","no"
399
+ 708,1,"Calderhead, Mr. Edward Pennington","male",42.0,0,0,26.2875,"S","yes"
400
+ 848,3,"Markoff, Mr. Marin","male",35.0,0,0,7.8958,"C","no"
401
+ 825,3,"Panula, Master. Urho Abraham","male",2.0,4,1,39.6875,"S","no"
402
+ 690,1,"Madill, Miss. Georgette Alexandra","female",15.0,0,1,211.3375,"S","yes"
403
+ 385,3,"Plotcharsky, Mr. Vasil","male","_GSK_NA_",0,0,7.8958,"S","no"
404
+ 758,2,"Bailey, Mr. Percy Andrew","male",18.0,0,0,11.5,"S","no"
405
+ 233,2,"Sjostedt, Mr. Ernst Adolf","male",59.0,0,0,13.5,"S","no"
406
+ 651,3,"Mitkoff, Mr. Mito","male","_GSK_NA_",0,0,7.8958,"S","no"
407
+ 616,2,"Herman, Miss. Alice","female",24.0,1,2,65.0,"S","yes"
408
+ 19,3,"Vander Planke, Mrs. Julius (Emelia Maria Vandemoortele)","female",31.0,1,0,18.0,"S","no"
409
+ 183,3,"Asplund, Master. Clarence Gustaf Hugo","male",9.0,4,2,31.3875,"S","no"
410
+ 597,2,"Leitch, Miss. Jessie Wills","female","_GSK_NA_",0,0,33.0,"S","yes"
411
+ 463,1,"Gee, Mr. Arthur H","male",47.0,0,0,38.5,"S","no"
412
+ 67,2,"Nye, Mrs. (Elizabeth Ramell)","female",29.0,0,0,10.5,"S","yes"
413
+ 788,3,"Rice, Master. George Hugh","male",8.0,4,1,29.125,"Q","no"
414
+ 518,3,"Ryan, Mr. Patrick","male","_GSK_NA_",0,0,24.15,"Q","no"
415
+ 104,3,"Johansson, Mr. Gustaf Joel","male",33.0,0,0,8.6542,"S","no"
416
+ 729,2,"Bryhl, Mr. Kurt Arnold Gottfrid","male",25.0,1,0,26.0,"S","no"
417
+ 8,3,"Palsson, Master. Gosta Leonard","male",2.0,3,1,21.075,"S","no"
418
+ 812,3,"Lester, Mr. James","male",39.0,0,0,24.15,"S","no"
419
+ 502,3,"Canavan, Miss. Mary","female",21.0,0,0,7.75,"Q","no"
420
+ 614,3,"Horgan, Mr. John","male","_GSK_NA_",0,0,7.75,"Q","no"
421
+ 34,2,"Wheadon, Mr. Edward H","male",66.0,0,0,10.5,"S","no"
422
+ 294,3,"Haas, Miss. Aloisia","female",24.0,0,0,8.85,"S","no"
423
+ 323,2,"Slayter, Miss. Hilda Mary","female",30.0,0,0,12.35,"Q","yes"
424
+ 652,2,"Doling, Miss. Elsie","female",18.0,0,1,23.0,"S","yes"
425
+ 827,3,"Lam, Mr. Len","male","_GSK_NA_",0,0,56.4958,"S","no"
426
+ 331,3,"McCoy, Miss. Agnes","female","_GSK_NA_",2,0,23.25,"Q","yes"
427
+ 439,1,"Fortune, Mr. Mark","male",64.0,1,4,263.0,"S","no"
428
+ 798,3,"Osman, Mrs. Mara","female",31.0,0,0,8.6833,"S","yes"
429
+ 623,3,"Nakid, Mr. Sahid","male",20.0,1,1,15.7417,"C","yes"
430
+ 276,1,"Andrews, Miss. Kornelia Theodosia","female",63.0,1,0,77.9583,"S","yes"
431
+ 78,3,"Moutal, Mr. Rahamin Haim","male","_GSK_NA_",0,0,8.05,"S","no"
432
+ 742,1,"Cavendish, Mr. Tyrell William","male",36.0,1,0,78.85,"S","no"
433
+ 370,1,"Aubart, Mme. Leontine Pauline","female",24.0,0,0,69.3,"C","yes"
434
+ 425,3,"Rosblom, Mr. Viktor Richard","male",18.0,1,1,20.2125,"S","no"
435
+ 189,3,"Bourke, Mr. John","male",40.0,1,1,15.5,"Q","no"
436
+ 143,3,"Hakkarainen, Mrs. Pekka Pietari (Elin Matilda Dolck)","female",24.0,1,0,15.85,"S","yes"
437
+ 627,2,"Kirkland, Rev. Charles Leonard","male",57.0,0,0,12.35,"Q","no"
438
+ 703,3,"Barbara, Miss. Saiide","female",18.0,0,1,14.4542,"C","no"
439
+ 638,2,"Collyer, Mr. Harvey","male",31.0,1,1,26.25,"S","no"
440
+ 549,3,"Goldsmith, Mr. Frank John","male",33.0,1,1,20.525,"S","no"
441
+ 43,3,"Kraeff, Mr. Theodor","male","_GSK_NA_",0,0,7.8958,"C","no"
442
+ 68,3,"Crease, Mr. Ernest James","male",19.0,0,0,8.1583,"S","no"
443
+ 756,2,"Hamalainen, Master. Viljo","male",0.67,1,1,14.5,"S","yes"
444
+ 443,3,"Petterson, Mr. Johan Emil","male",25.0,1,0,7.775,"S","no"
445
+ 472,3,"Cacic, Mr. Luka","male",38.0,0,0,8.6625,"S","no"
446
+ 696,2,"Chapman, Mr. Charles Henry","male",52.0,0,0,13.5,"S","no"
447
+ 665,3,"Lindqvist, Mr. Eino William","male",20.0,1,0,7.925,"S","yes"
cicd/examples/github/train.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import giskard
2
+ import yaml
3
+
4
+ path_to_config = __file__.split("train.py")[0]+"cicd_config.yaml"
5
+ with open(path_to_config) as yaml_f:
6
+ cicd_config = yaml.load(yaml_f, Loader=yaml.Loader)
7
+
8
+ # Replace this with your own data & model creation.
9
+ df = giskard.demo.titanic_df()
10
+ data_preprocessor, clf = giskard.demo.titanic_pipeline()
11
+
12
+ # Wrap your Pandas DataFrame with Giskard.Dataset (test set, a golden dataset, etc.). Check the dedicated doc page: https://docs.giskard.ai/en/latest/guides/wrap_dataset/index.html
13
+ giskard_dataset = giskard.Dataset(
14
+ df=df, # A pandas.DataFrame that contains the raw data (before all the pre-processing steps) and the actual ground truth variable (target).
15
+ target="Survived", # Ground truth variable
16
+ name="Titanic dataset", # Optional
17
+ cat_columns=['Pclass', 'Sex', "SibSp", "Parch", "Embarked"] # Optional, but is a MUST if available. Inferred automatically if not.
18
+ )
19
+
20
+ # Wrap your model with Giskard.Model. Check the dedicated doc page: https://docs.giskard.ai/en/latest/guides/wrap_model/index.html
21
+ # you can use any tabular, text or LLM models (PyTorch, HuggingFace, LangChain, etc.),
22
+ # for classification, regression & text generation.
23
+ def prediction_function(df):
24
+ # The pre-processor can be a pipeline of one-hot encoding, imputer, scaler, etc.
25
+ preprocessed_df = data_preprocessor(df)
26
+ return clf.predict_proba(preprocessed_df)
27
+
28
+ giskard_model = giskard.Model(
29
+ model=prediction_function, # A prediction function that encapsulates all the data pre-processing steps and that could be executed with the dataset used by the scan.
30
+ model_type="classification", # Either regression, classification or text_generation.
31
+ name="Titanic model", # Optional
32
+ classification_labels=clf.classes_, # Their order MUST be identical to the prediction_function's output order
33
+ feature_names=['PassengerId', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked'], # Default: all columns of your dataset
34
+ # classification_threshold=0.5, # Default: 0.5
35
+ )
36
+
37
+ from giskard_cicd.utils import dump_model_and_dataset_for_cicd
38
+ dump_model_and_dataset_for_cicd(cicd_config["artifact_path"], giskard_model, giskard_dataset)
cicd/giskard_cicd/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from .utils import dump_model_and_dataset_for_cicd
2
+
3
+ __all__ = ["dump_model_and_dataset_for_cicd"]
cicd/giskard_cicd/loaders/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from giskard_cicd.loaders.github_loader import GithubLoader
2
+ from giskard_cicd.loaders.huggingface_loader import HuggingFaceLoader
3
+ from giskard_cicd.loaders.base_loader import BaseLoader
4
+
5
+ __all__ = ["GithubLoader", "HuggingFaceLoader", "BaseLoader"]
cicd/giskard_cicd/loaders/base_loader.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Load models and datasets from Github."""
2
+
3
+ import logging
4
+ from abc import ABC, abstractmethod
5
+
6
+ from giskard.models.base import BaseModel
7
+ from giskard.core.model_validation import validate_model
8
+ from giskard import Dataset
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class LoaderError(RuntimeError):
14
+ """Could not load the model and/or dataset."""
15
+
16
+
17
+ class DatasetError(LoaderError):
18
+ """Problems related to the dataset."""
19
+
20
+
21
+ class ModelError(LoaderError):
22
+ """Problems related to the model."""
23
+
24
+
25
+ class BaseLoader(ABC):
26
+
27
+ @abstractmethod
28
+ def load_giskard_model_dataset(self) -> (BaseModel, Dataset):
29
+ ...
30
+
31
+ def validate(self):
32
+ gsk_model, gsk_dataset = self.load_giskard_model_dataset()
33
+ validate_model(gsk_model, gsk_dataset)
cicd/giskard_cicd/loaders/github_loader.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import yaml
2
+ from giskard import Dataset, Model
3
+ from giskard.models.base import BaseModel
4
+ from giskard.ml_worker.utils.file_utils import get_file_name
5
+
6
+ from .base_loader import BaseLoader
7
+ from pathlib import Path
8
+ from giskard.core.core import DatasetMeta
9
+
10
+
11
+ class GithubLoader(BaseLoader):
12
+ #TODO: change the way dataset is loaded, factor out some of the logic contained in Dataset.download()
13
+ def load_giskard_model_dataset(self, model, dataset) -> (BaseModel, Dataset):
14
+ with open(Path(dataset) / "giskard-dataset-meta.yaml") as f:
15
+ saved_meta = yaml.load(f, Loader=yaml.Loader)
16
+ meta = DatasetMeta(
17
+ name=saved_meta["name"],
18
+ target=saved_meta["target"],
19
+ column_types=saved_meta["column_types"],
20
+ column_dtypes=saved_meta["column_dtypes"],
21
+ number_of_rows=saved_meta["number_of_rows"],
22
+ category_features=saved_meta["category_features"],
23
+ )
24
+
25
+ df = Dataset.load(Path(dataset) / get_file_name("data", "csv.zst", False))
26
+ df = Dataset.cast_column_to_dtypes(df, meta.column_dtypes)
27
+
28
+ return Model.load(model), Dataset(
29
+ df=df,
30
+ name=meta.name,
31
+ target=meta.target,
32
+ column_types=meta.column_types,
33
+ )
cicd/giskard_cicd/loaders/huggingface_loader.py ADDED
@@ -0,0 +1,254 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Load models and datasets from the HuggingFace hub."""
2
+
3
+ import logging
4
+ import time
5
+
6
+ import datasets
7
+ import giskard as gsk
8
+ import huggingface_hub
9
+ import torch
10
+ from giskard import Dataset
11
+ from giskard.models.base import BaseModel
12
+ from giskard.models.huggingface import HuggingFaceModel
13
+ from transformers.pipelines import TextClassificationPipeline
14
+ import pandas as pd
15
+ from .base_loader import BaseLoader, DatasetError
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class HuggingFaceLoader(BaseLoader):
21
+
22
+ def __init__(self, device=None):
23
+ self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
24
+
25
+ def _find_dataset_id_from_model(self, model_id):
26
+ """Find the dataset ID from the model metadata."""
27
+ model_card = huggingface_hub.model_info(model_id).cardData
28
+
29
+ if "datasets" not in model_card:
30
+ msg = f"Could not find dataset for model `{model_id}`."
31
+ raise DatasetError(msg)
32
+
33
+ # Take the first one
34
+ dataset_id = model_card["datasets"][0]
35
+ return dataset_id
36
+
37
+ def load_giskard_model_dataset(self, model, dataset=None, dataset_config=None, dataset_split=None):
38
+ # If no dataset was provided, we try to get it from the model metadata.
39
+ if dataset is None:
40
+ logger.debug("No dataset provided. Trying to get it from the model metadata.")
41
+ dataset = self._find_dataset_id_from_model(model)
42
+ logger.debug(f"Found dataset `{dataset}`.")
43
+
44
+ # Loading the model is easy. What is complicated is to get the dataset.
45
+ # So we start by trying to get the dataset, because if we fail, we don't
46
+ # want to waste time downloading the model.
47
+ hf_dataset = self.load_dataset(dataset, dataset_config, dataset_split, model)
48
+
49
+ # Load the model.
50
+ hf_model = self.load_model(model)
51
+
52
+ # Check that the dataset has the good feature names for the task.
53
+ feature_mapping = self._get_feature_mapping(hf_model, hf_dataset)
54
+
55
+ df = self._flatten_hf_dataset(hf_dataset, dataset_split)
56
+ df = pd.DataFrame(df).rename(columns={v: k for k, v in feature_mapping.items()})
57
+
58
+ # remove rows with multiple labels
59
+ # this is a hacky way to do it
60
+ # we do not support multi-label classification for now
61
+ if "label" in df and isinstance(df.label[0], list):
62
+ df = df[df.apply(lambda row: len(row['label']) == 1, axis=1)]
63
+ else:
64
+ print(df)
65
+ # @TODO: currently for classification models only.
66
+ id2label = hf_model.model.config.id2label
67
+
68
+ if "label" in df and isinstance(df.label[0], list):
69
+ # need to include all labels
70
+ # rewrite this lambda function to include all labels
71
+ df.label = df.label.apply(lambda x: id2label[x[0]])
72
+ else:
73
+ # TODO: when the label for test is not provided, what do we do?
74
+ df["label"] = df.label.apply(lambda x: id2label[x] if x >= 0 else "-1")
75
+ # map the list of label ids to the list of labels
76
+ # df["label"] = df.label.apply(lambda x: [id2label[i] for i in x])
77
+ gsk_dataset = gsk.Dataset(df, target="label", column_types={"text": "text"}, validation=False)
78
+
79
+ gsk_model = HuggingFaceModel(
80
+ hf_model,
81
+ model_type="classification",
82
+ data_preprocessing_function=lambda df: df.text.tolist(),
83
+ classification_labels=[id2label[i] for i in range(len(id2label))],
84
+ batch_size=None,
85
+ device=self.device,
86
+ )
87
+
88
+ # Optimize batch size
89
+ if self.device.startswith("cuda"):
90
+ gsk_model.batch_size = self._find_optimal_batch_size(gsk_model, gsk_dataset)
91
+
92
+ return gsk_model, gsk_dataset
93
+
94
+ def load_dataset(self, dataset_id, dataset_config=None, dataset_split=None, model_id=None):
95
+ print(f"Loading dataset {dataset_id} with config {dataset_config} and split {dataset_split}")
96
+ """Load a dataset from the HuggingFace Hub."""
97
+ logger.debug(f"Trying to load dataset `{dataset_id}` (config = `{dataset_config}`, split = `{dataset_split}`).")
98
+ try:
99
+ # we do not set the split here
100
+ # because we want to be able to select the best split later with preprocessing
101
+ hf_dataset = datasets.load_dataset(dataset_id, name=dataset_config)
102
+ if dataset_split is None:
103
+ dataset_split = self._select_best_dataset_split(list(hf_dataset.keys()))
104
+ logger.debug(f"No split provided, automatically selected split = `{dataset_split}`).")
105
+ hf_dataset = hf_dataset[dataset_split]
106
+
107
+ return hf_dataset
108
+ except ValueError as err:
109
+ msg = f"Could not load dataset `{dataset_id}` with config `{dataset_config}`."
110
+ raise DatasetError(msg) from err
111
+
112
+ def load_model(self, model_id):
113
+ from transformers import pipeline
114
+
115
+ task = huggingface_hub.model_info(model_id).pipeline_tag
116
+
117
+ return pipeline(task=task, model=model_id, device=self.device)
118
+
119
+ def _get_dataset_features(self, hf_dataset):
120
+ '''
121
+ Recursively get the features of the dataset
122
+ '''
123
+ dataset_features = {}
124
+ try:
125
+ dataset_features = hf_dataset.features
126
+ return dataset_features
127
+ except AttributeError:
128
+ print("hf_dataset.features not found")
129
+ if isinstance(hf_dataset, datasets.DatasetDict):
130
+ keys = list(hf_dataset.keys())
131
+ return self._get_dataset_features(hf_dataset[keys[0]])
132
+
133
+ def _flatten_hf_dataset(self, hf_dataset, data_split=None):
134
+ '''
135
+ Flatten the dataset to a pandas dataframe
136
+ '''
137
+ flat_dataset = pd.DataFrame()
138
+ if isinstance(hf_dataset, datasets.DatasetDict):
139
+ keys = list(hf_dataset.keys())
140
+
141
+ for k in keys:
142
+ if k.startswith("train"):
143
+ continue
144
+ elif k.startswith(data_split):
145
+ # TODO: only support one split for now
146
+ # Maybe we can merge all the datasets into one
147
+ flat_dataset = hf_dataset[k]
148
+ break
149
+ else:
150
+ flat_dataset = hf_dataset[k]
151
+
152
+ # If there are only train datasets
153
+ if isinstance(flat_dataset, pd.DataFrame) and flat_dataset.empty:
154
+ flat_dataset = hf_dataset[keys[0]]
155
+
156
+ return flat_dataset
157
+
158
+ def _get_feature_mapping(self, hf_model, hf_dataset):
159
+ if isinstance(hf_model, TextClassificationPipeline):
160
+ task_features = {"text": "string", "label": "class_label"}
161
+ else:
162
+ print(type(hf_model))
163
+ msg = "Unsupported model type."
164
+ raise NotImplementedError(msg)
165
+
166
+ dataset_features = self._get_dataset_features(hf_dataset)
167
+ print(dataset_features)
168
+ # map features
169
+ feature_mapping = {}
170
+ for f in set(dataset_features):
171
+ if f in task_features:
172
+ feature_mapping[f] = f
173
+ else:
174
+ for t in task_features:
175
+ if f.startswith(t):
176
+ feature_mapping[t] = f
177
+
178
+ if not set(task_features) - set(feature_mapping):
179
+ return feature_mapping
180
+ else:
181
+ # If not, we try to find a suitable mapping by matching types.
182
+ return self._amend_missing_features(task_features, dataset_features, feature_mapping)
183
+
184
+ def _amend_missing_features(self, task_features, dataset_features, feature_mapping):
185
+ '''
186
+ Question: what is this code doing?
187
+ '''
188
+ available_features = set(dataset_features) - set(feature_mapping)
189
+ missing_features = set(task_features) - set(feature_mapping)
190
+
191
+ for feature in missing_features:
192
+ expected_type = task_features[feature]
193
+ if expected_type == "class_label":
194
+ candidates = [f for f in available_features if isinstance(dataset_features[f], datasets.ClassLabel)]
195
+ else:
196
+ candidates = [f for f in available_features if dataset_features[f].dtype == expected_type]
197
+
198
+ # If we have more than one match, it`s not possible to know which one is the good one.
199
+ if len(candidates) != 1:
200
+ msg = f"Could not find a suitable mapping for feature for `{feature}`."
201
+ raise RuntimeError(msg)
202
+
203
+ feature_mapping[feature] = candidates[0]
204
+ available_features.remove(candidates[0])
205
+ return feature_mapping
206
+
207
+ def _select_best_dataset_split(self, split_names):
208
+ """Get the best split for testing.
209
+
210
+ Selects the split `test` if available, otherwise `validation`, and as a last resort `train`.
211
+ If there is only one split, we return that split.
212
+ """
213
+ # If only one split is available, we just use that one.
214
+ if len(split_names) == 1:
215
+ return split_names[0]
216
+
217
+ # Otherwise iterate based on the preferred prefixes.
218
+ for prefix in ["test", "valid", "train"]:
219
+ try:
220
+ return next(x for x in split_names if x.startswith(prefix))
221
+ except StopIteration:
222
+ pass
223
+
224
+ return None
225
+
226
+ def _find_optimal_batch_size(self, model: BaseModel, dataset: Dataset):
227
+ """Find the optimal batch size for the model and dataset."""
228
+ initial_batch_size = model.batch_size
229
+ try:
230
+ model.batch_size = 1
231
+ inference_time = float("inf")
232
+ while True:
233
+ num_runs = min(30, len(dataset) // model.batch_size)
234
+ num_samples = num_runs * model.batch_size
235
+ if num_runs == 0:
236
+ return model.batch_size // 2
237
+
238
+ ds_slice = dataset.slice(lambda df: df.sample(num_samples), row_level=False)
239
+
240
+ t_start = time.perf_counter_ns()
241
+ try:
242
+ with gsk.models.cache.no_cache():
243
+ model.predict(ds_slice)
244
+ except RuntimeError:
245
+ return model.batch_size // 2
246
+ elapsed = time.perf_counter_ns() - t_start
247
+
248
+ time_per_sample = elapsed / (num_samples)
249
+ if time_per_sample > inference_time:
250
+ return model.batch_size // 2
251
+ inference_time = time_per_sample
252
+ model.batch_size *= 2
253
+ finally:
254
+ model.batch_size = initial_batch_size
cicd/giskard_cicd/pipeline/runner.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import yaml
2
+ import giskard as gsk
3
+ import time
4
+
5
+
6
+ class PipelineReport:
7
+ def __init__(self, scan_result):
8
+ self.scan_result = scan_result
9
+
10
+ def to_html(self):
11
+ return self.scan_result.to_html()
12
+
13
+ def to_markdown(self, template):
14
+ return self.scan_result.to_markdown(template="github")
15
+
16
+
17
+ class PipelineRunner:
18
+ def __init__(self, loaders):
19
+ self.loaders = loaders
20
+
21
+ def run(self, loader_id, **kwargs):
22
+
23
+ # Get the loader
24
+ loader = self.loaders[loader_id]
25
+
26
+ # Get scan configuration
27
+ scan_config_path = kwargs.pop("scan_config", None)
28
+ params, detectors = None, None
29
+ if scan_config_path is not None:
30
+ with open(scan_config_path) as yaml_f:
31
+ scan_config = yaml.load(yaml_f, Loader=yaml.Loader)
32
+ params = dict(scan_config.get("configuration", None))
33
+ detectors = list(scan_config.get("detectors", None))
34
+
35
+ start = time.time()
36
+ # Load the model and dataset
37
+ gsk_model, gsk_dataset = loader.load_giskard_model_dataset(**kwargs)
38
+ print(f"Loading took {time.time() - start:.2f}s")
39
+
40
+ start = time.time()
41
+ # Run the scanner
42
+ scan_result = gsk.scan(gsk_model, gsk_dataset, params=params, only=detectors)
43
+ print(f"Scanning took {time.time() - start:.2f}s")
44
+
45
+ # Report
46
+ report = PipelineReport(scan_result)
47
+
48
+ return report
cicd/giskard_cicd/utils.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pathlib
2
+
3
+
4
+ def dump_model_and_dataset_for_cicd(artifact_path, giskard_model, giskard_dataset):
5
+ from giskard.core.model_validation import validate_model, validate_model_loading_and_saving
6
+
7
+ try:
8
+ reloaded_model = validate_model_loading_and_saving(giskard_model)
9
+ except Exception as e:
10
+ raise Exception("An issue occured during the serialization/deserialization of your model. Please submit the traceback as a GitHub issue in the following "
11
+ "repository for further assistance: https://github.com/Giskard-AI/giskard.") from e
12
+ try:
13
+ validate_model(reloaded_model, giskard_dataset)
14
+ except Exception as e:
15
+ raise Exception("An issue occured during the validation of your model. Please submit the traceback as a GitHub issue in the following "
16
+ "repository for further assistance: https://github.com/Giskard-AI/giskard.") from e
17
+
18
+ pathlib.Path(artifact_path).mkdir(parents=True, exist_ok=True)
19
+ pathlib.Path(artifact_path+'/artifacts').mkdir(parents=True, exist_ok=True)
20
+ pathlib.Path(artifact_path+'/artifacts/dataset').mkdir(parents=True, exist_ok=True)
21
+ pathlib.Path(artifact_path+'/artifacts/model').mkdir(parents=True, exist_ok=True)
22
+
23
+ #TODO: change the Dataset.save() method to be like Model.save(), i.e. without the id requirement
24
+ giskard_dataset.save(pathlib.Path(artifact_path+"/artifacts/dataset"), 0)
25
+ giskard_model.save(pathlib.Path(artifact_path+"/artifacts/model"))
26
+ print("Your model and dataset are successfully dumped for CI/CD.")
cicd/pyproject.toml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = ["setuptools"]
3
+ build-backend = "setuptools.build_meta"
4
+ [project]
5
+ name = "giskard_cicd"
6
+ readme = "README.md"
7
+ dependencies = [
8
+ "datasets",
9
+ "giskard >= 2.0.0b",
10
+ "huggingface_hub",
11
+ "torch",
12
+ "transformers",
13
+ ]
14
+ requires-python = ">=3.9"
cicd/readme.md ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Giskard CI/CD runner (WIP)
2
+
3
+ ## Overview
4
+
5
+ The idea is to have a common CI/CD core that can interface with different input sources (loaders) and output destinations (reporters).
6
+
7
+ The **core** is responsible for running the tests and generating a report.
8
+
9
+ The **loaders** are responsible for loading the model and dataset, wrapped as Giskard objects, from a given source (for example the HuggingFace hub, a Github repository, etc.).
10
+
11
+ The **reporters** are responsible for sending the report to the appropriate destination (e.g. a comment to a Github PR, a HuggingFace discussion, etc.).
12
+
13
+
14
+ ### Tasks
15
+
16
+ Task could be data objects containing all the information needed to run a CI/CD pipeline. For example:
17
+
18
+ ```json
19
+ {
20
+ "loader_id": "huggingface",
21
+ "model": "distilbert-base-uncased",
22
+ "dataset": "sst2",
23
+ "loader_args": {
24
+ "dataset_split": "validation",
25
+ },
26
+ "reporter_id": "huggingface_discussion",
27
+ "reporter_args": {
28
+ "discussion_id": 1234,
29
+ }
30
+ }
31
+ ```
32
+
33
+ or
34
+
35
+
36
+ ```json
37
+ {
38
+ "loader_id": "github",
39
+ "model": "my.package::load_model",
40
+ "dataset": "my.package::load_test_dataset",
41
+ "loader_args": {
42
+ "repository": "My-Organization/my_project",
43
+ "branch": "dev-test2",
44
+ },
45
+ "reporter_id": "github_pr",
46
+ "reported_args": {
47
+ "repository": "My-Organization/my_project",
48
+ "pr_id": 1234,
49
+ }
50
+ }
51
+ ```
52
+
53
+ These tasks may be generated by a watcher (e.g. a Github action, a HuggingFace webhook, etc.) and put in a queue. The CI/CD runner will then pick them up and run the pipeline.
54
+
55
+ Otherwise, a single task can be created to run a single-shot Github action, without queueing.
56
+
57
+
58
+ ### CI/CD Core
59
+
60
+ In pseudocode, the CI/CD core could look like this:
61
+
62
+ ```python
63
+ task = get_task_from_queue_or_envirnoment()
64
+
65
+ loader = get_loader(task.loader_id)
66
+ gsk_model, gsk_dataset = loader.load_model_dataset(
67
+ task.model,
68
+ task.dataset,
69
+ **task.loader_args,
70
+ )
71
+
72
+ runner = PipelineRunner()
73
+ report = runner.run(gsk_model, gsk_dataset)
74
+
75
+ reporter = get_reporter(task.reporter_id)
76
+ reporter.push_report(report, **task.reporter_args)
77
+ ```
78
+
79
+ ## Prototype
80
+
81
+ Current implementation has two loaders:
82
+ - The `github` loader which can be run from the command line (after running `python train.py` in `examples/github`):
83
+
84
+ ```bash
85
+ $ python cli.py --loader github --model examples/github/artifacts/model --dataset examples/github/artifacts/dataset
86
+ ```
87
+
88
+ - The `huggingface` loader which can be run from the command line:
89
+
90
+ ```bash
91
+ $ python cli.py --loader huggingface --model distilbert-base-uncased-finetuned-sst-2-english --dataset_split validation --output demo_report.html
92
+ ```
93
+
94
+ - Automatically post to discussion area for a given repo
95
+ ```bash
96
+ $ python cli.py --loader huggingface --model distilbert-base-uncased-finetuned-sst-2-english --dataset_split validation --output_format markdown --output_portal huggingface --discussion_repo [REPO_ID] --hf_token [HF_TOKEN]
97
+ ```
98
+
99
+ This will launch a pipeline that will load the model and dataset from the HuggingFace hub, run the scan and generate a report in HTML format (for now).
cicd/retriever.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import huggingface_hub
3
+
4
+
5
+ def model_has_dataset(model):
6
+ for tag in model.tags:
7
+ if tag.startswith("dataset:"):
8
+ return True
9
+ return False
10
+
11
+
12
+ if __name__ == "__main__":
13
+ parser = argparse.ArgumentParser(
14
+ prog="Giskard Retriever", description="Retrieves HF models that are bound to datasets."
15
+ )
16
+ parser.add_argument(
17
+ "--model_type",
18
+ help="Hugging Face model types. default: text-classification",
19
+ required=False,
20
+ )
21
+ parser.add_argument("--output_format",
22
+ help="Format of the information retrieved. Default: parquet. Options: parquet, csv, json.")
23
+
24
+ args = parser.parse_args()
25
+
26
+ MODEL_TYPE = args.model_type if args.model_type is not None else "text-classification"
27
+
28
+ models_with_dataset = filter(
29
+ model_has_dataset, huggingface_hub.list_models(filter=MODEL_TYPE, sort="likes", direction=-1)
30
+ )
31
+
32
+ import pandas as pd
33
+
34
+ df = pd.DataFrame(
35
+ [
36
+ {
37
+ "modelId": m.modelId,
38
+ "modelType": MODEL_TYPE,
39
+ "author": m.author,
40
+ "downloads": m.downloads,
41
+ "likes": m.likes,
42
+ "datasets": [t[8:] for t in m.tags if t.startswith("dataset:")],
43
+ }
44
+ for m in models_with_dataset
45
+ ]
46
+ )
47
+
48
+ output_format = args.output_format
49
+
50
+ if output_format is None or output_format == "parquet":
51
+ df.to_parquet(f"models_{MODEL_TYPE}.parquet", index=False)
52
+ elif output_format == "csv":
53
+ df.to_csv(f"models_{MODEL_TYPE}.csv", columns=df.columns, index=False)
54
+ elif output_format == "json":
55
+ df.to_json(f"models_{MODEL_TYPE}.json", index=False)
cicd/scan_config_template.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ detectors:
2
+ - ethical_bias
3
+
4
+ configuration:
5
+ ethical_bias:
6
+ threshold:
7
+ 0.01
cicd/scan_retrieved.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import pandas as pd
3
+ from ast import literal_eval
4
+ from string import Template
5
+ import os
6
+
7
+
8
+ def model_has_dataset(model):
9
+ for tag in model.tags:
10
+ if tag.startswith("dataset:"):
11
+ return True
12
+ return False
13
+
14
+
15
+ if __name__ == "__main__":
16
+ parser = argparse.ArgumentParser(
17
+ prog="Giskard Batch Scanner", description="Scan Retrieved HF models."
18
+ )
19
+ parser.add_argument(
20
+ "--data_path",
21
+ help="Path to retrieved models in csv format (need to run retrieve.py first).",
22
+ required=True,
23
+ )
24
+ parser.add_argument("--first_Nmodels",
25
+ help="Number of models to be scanned from the sorted list of models available.",
26
+ required=True)
27
+ parser.add_argument("--output_path",
28
+ help="Path of dir to save all the reports",
29
+ required=True)
30
+
31
+ args = parser.parse_args()
32
+
33
+ df = pd.read_csv(args.data_path)
34
+
35
+ df_to_be_skipped = None
36
+ to_be_skipped_file_path = ".models_and_datasets_to_be_skipped.csv"
37
+ if os.path.exists(to_be_skipped_file_path):
38
+ df_to_be_skipped = pd.read_csv(to_be_skipped_file_path)
39
+
40
+ command_template = Template("python cli.py --loader huggingface --model $model --dataset $dataset "
41
+ "--dataset_split $dataset_split --dataset_config $dataset_config "
42
+ "--output ${output_path}/${model_name}__default_scan_with__${dataset_name}.html")
43
+
44
+ result_path_template = Template("${output_path}/${model_name}__default_scan_with__${dataset_name}.${suffix}")
45
+
46
+ if not os.path.exists(args.output_path):
47
+ os.makedirs(args.output_path)
48
+
49
+ dataset_split_exceptions = {"facebook/bart-large-mnli": "validation_matched"}
50
+
51
+ dataset_config_exceptions = {"tweet_eval": "sentiment"}
52
+
53
+ for i in range(int(args.first_Nmodels)):
54
+ row = df.iloc[i]
55
+ model = row.modelId
56
+ dataset = literal_eval(row.datasets)[0]
57
+
58
+ message = f"{model} with {dataset}"
59
+
60
+ if ((df_to_be_skipped['model'] == model) & (df_to_be_skipped['dataset'] == dataset)).any() \
61
+ and df_to_be_skipped is not None:
62
+ print(f"[{i}] ==== ⏩ skipping {message} ====")
63
+ continue
64
+
65
+ print(f"[{i}] ==== 🔍 scanning {message} ====")
66
+
67
+ result_path = result_path_template.substitute(model_name=model.replace("/", "--"),
68
+ dataset_name=dataset.replace("/", "--"),
69
+ output_path=args.output_path,
70
+ suffix="html")
71
+ if os.path.exists(result_path):
72
+ answer = input(f"{result_path} already exists, Overwrite[o] or Skip[s]? ")
73
+
74
+ while answer not in ["o", "s"]:
75
+ answer = input("Invalid answer, please choose between 'o' and 's'")
76
+
77
+ if answer == 'o':
78
+ os.remove(result_path)
79
+ elif answer == 's':
80
+ continue
81
+
82
+ command = command_template.substitute(model=model, dataset=dataset,
83
+ dataset_split=dataset_split_exceptions.get(model, "validation"),
84
+ dataset_config=dataset_config_exceptions.get(dataset, None),
85
+ model_name=model.replace("/", "--"),
86
+ dataset_name=dataset.replace("/", "--"),
87
+ output_path=args.output_path)
88
+
89
+ try:
90
+ os.system(command) # call the cli script in order for try, except to work
91
+ new_row = pd.DataFrame({"model": model, "dataset": dataset, "status": "done"}, index=[0])
92
+ df_to_be_skipped = pd.concat([df_to_be_skipped, new_row], ignore_index=True)
93
+ df_to_be_skipped.to_csv(to_be_skipped_file_path, index=False)
94
+ except Exception as e:
95
+ new_row = pd.DataFrame({"model": model, "dataset": dataset, "status": "error"}, index=[0])
96
+ df_to_be_skipped = pd.concat([df_to_be_skipped, new_row], ignore_index=True)
97
+ df_to_be_skipped.to_csv(to_be_skipped_file_path, index=False)
98
+ result_path = result_path_template.substitute(model_name=model.replace("/", "--"),
99
+ dataset_name=dataset.replace("/", "--"),
100
+ output_path=args.output_path,
101
+ suffix="error")
102
+ with open(result_path, "w") as error_log:
103
+ error_log.write(e)
104
+ print(
105
+ f"Something went wrong while {message}, error is logged at {result_path}. "
106
+ "continuing with the next model...")
107
+ # raise Exception(f"Something went wrong while {message}") from e
cicd/setup.cfg ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [metadata]
2
+ name = giskard_cicd
3
+ version = 0.1.0
4
+
5
+ [options]
6
+ packages = find:
7
+ install_requires =
8
+ giskard >= 2.0.0b
9
+ transformers
10
+ huggingface_hub
11
+ datasets
12
+ torch
13
+