Skip to content

Commit 3c5bdc0

Browse files
authored
Merge pull request ClickHouse#33751 from ClickHouse/fix-docker-dependencies
Fix docker dependencies
2 parents 0e37e8b + 7a1f471 commit 3c5bdc0

File tree

6 files changed

+456
-79
lines changed

6 files changed

+456
-79
lines changed

.github/workflows/master.yml

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,18 @@ on: # yamllint disable-line rule:truthy
99
branches:
1010
- 'master'
1111
jobs:
12+
PythonUnitTests:
13+
runs-on: [self-hosted, style-checker]
14+
steps:
15+
- name: Clear repository
16+
run: |
17+
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
18+
- name: Check out repository code
19+
uses: actions/checkout@v2
20+
- name: Python unit tests
21+
run: |
22+
cd "$GITHUB_WORKSPACE/tests/ci"
23+
python3 -m unittest discover -s . -p '*_test.py'
1224
DockerHubPushAarch64:
1325
runs-on: [self-hosted, func-tester-aarch64]
1426
steps:
@@ -44,7 +56,7 @@ jobs:
4456
name: changed_images_amd64
4557
path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json
4658
DockerHubPush:
47-
needs: [DockerHubPushAmd64, DockerHubPushAarch64]
59+
needs: [DockerHubPushAmd64, DockerHubPushAarch64, PythonUnitTests]
4860
runs-on: [self-hosted, style-checker]
4961
steps:
5062
- name: Clear repository

.github/workflows/pull_request.yml

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,19 @@ jobs:
3131
run: |
3232
cd "$GITHUB_WORKSPACE/tests/ci"
3333
python3 run_check.py
34+
PythonUnitTests:
35+
needs: CheckLabels
36+
runs-on: [self-hosted, style-checker]
37+
steps:
38+
- name: Clear repository
39+
run: |
40+
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
41+
- name: Check out repository code
42+
uses: actions/checkout@v2
43+
- name: Python unit tests
44+
run: |
45+
cd "$GITHUB_WORKSPACE/tests/ci"
46+
python3 -m unittest discover -s . -p '*_test.py'
3447
DockerHubPushAarch64:
3548
needs: CheckLabels
3649
runs-on: [self-hosted, func-tester-aarch64]
@@ -68,7 +81,7 @@ jobs:
6881
name: changed_images_amd64
6982
path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json
7083
DockerHubPush:
71-
needs: [DockerHubPushAmd64, DockerHubPushAarch64]
84+
needs: [DockerHubPushAmd64, DockerHubPushAarch64, PythonUnitTests]
7285
runs-on: [self-hosted, style-checker]
7386
steps:
7487
- name: Clear repository

tests/ci/docker_images_check.py

Lines changed: 117 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import shutil
77
import subprocess
88
import time
9-
from typing import List, Tuple
9+
from typing import List, Optional, Set, Tuple, Union
1010

1111
from github import Github
1212

@@ -24,21 +24,49 @@
2424
TEMP_PATH = os.path.join(RUNNER_TEMP, "docker_images_check")
2525

2626

27+
class DockerImage:
28+
def __init__(
29+
self,
30+
path: str,
31+
repo: str,
32+
parent: Optional["DockerImage"] = None,
33+
gh_repo_path: str = GITHUB_WORKSPACE,
34+
):
35+
self.path = path
36+
self.full_path = os.path.join(gh_repo_path, path)
37+
self.repo = repo
38+
self.parent = parent
39+
self.built = False
40+
41+
def __eq__(self, other):
42+
"""Is used to check if DockerImage is in a set or not"""
43+
return self.path == other.path
44+
45+
def __hash__(self):
46+
return hash(self.path)
47+
48+
def __str__(self):
49+
return self.repo
50+
51+
def __repr__(self):
52+
return f"DockerImage(path={self.path},path={self.path},parent={self.parent})"
53+
54+
2755
def get_changed_docker_images(
2856
pr_info: PRInfo, repo_path: str, image_file_path: str
29-
) -> List[Tuple[str, str]]:
57+
) -> Set[DockerImage]:
3058
images_dict = {}
3159
path_to_images_file = os.path.join(repo_path, image_file_path)
3260
if os.path.exists(path_to_images_file):
33-
with open(path_to_images_file, "r") as dict_file:
61+
with open(path_to_images_file, "rb") as dict_file:
3462
images_dict = json.load(dict_file)
3563
else:
3664
logging.info(
3765
"Image file %s doesnt exists in repo %s", image_file_path, repo_path
3866
)
3967

4068
if not images_dict:
41-
return []
69+
return set()
4270

4371
files_changed = pr_info.changed_files
4472

@@ -54,49 +82,40 @@ def get_changed_docker_images(
5482
for dockerfile_dir, image_description in images_dict.items():
5583
for f in files_changed:
5684
if f.startswith(dockerfile_dir):
85+
name = image_description["name"]
5786
logging.info(
5887
"Found changed file '%s' which affects "
5988
"docker image '%s' with path '%s'",
6089
f,
61-
image_description["name"],
90+
name,
6291
dockerfile_dir,
6392
)
64-
changed_images.append(dockerfile_dir)
93+
changed_images.append(DockerImage(dockerfile_dir, name))
6594
break
6695

6796
# The order is important: dependents should go later than bases, so that
6897
# they are built with updated base versions.
6998
index = 0
7099
while index < len(changed_images):
71100
image = changed_images[index]
72-
for dependent in images_dict[image]["dependent"]:
101+
for dependent in images_dict[image.path]["dependent"]:
73102
logging.info(
74103
"Marking docker image '%s' as changed because it "
75104
"depends on changed docker image '%s'",
76105
dependent,
77106
image,
78107
)
79-
changed_images.append(dependent)
108+
changed_images.append(DockerImage(dependent, image.repo, image))
80109
index += 1
81110
if index > 5 * len(images_dict):
82111
# Sanity check to prevent infinite loop.
83112
raise RuntimeError(
84113
f"Too many changed docker images, this is a bug. {changed_images}"
85114
)
86115

87-
# If a dependent image was already in the list because its own files
88-
# changed, but then it was added as a dependent of a changed base, we
89-
# must remove the earlier entry so that it doesn't go earlier than its
90-
# base. This way, the dependent will be rebuilt later than the base, and
91-
# will correctly use the updated version of the base.
92-
seen = set()
93-
no_dups_reversed = []
94-
for x in reversed(changed_images):
95-
if x not in seen:
96-
seen.add(x)
97-
no_dups_reversed.append(x)
98-
99-
result = [(x, images_dict[x]["name"]) for x in reversed(no_dups_reversed)]
116+
# With reversed changed_images set will use images with parents first, and
117+
# images without parents then
118+
result = set(reversed(changed_images))
100119
logging.info(
101120
"Changed docker images for PR %s @ %s: '%s'",
102121
pr_info.number,
@@ -106,66 +125,109 @@ def get_changed_docker_images(
106125
return result
107126

108127

128+
def gen_versions(
129+
pr_info: PRInfo, suffix: Optional[str]
130+
) -> Tuple[List[str], Union[str, List[str]]]:
131+
pr_commit_version = str(pr_info.number) + "-" + pr_info.sha
132+
# The order is important, PR number is used as cache during the build
133+
versions = [str(pr_info.number), pr_commit_version]
134+
result_version = pr_commit_version
135+
if pr_info.number == 0:
136+
# First get the latest for cache
137+
versions.insert(0, "latest")
138+
139+
if suffix:
140+
# We should build architecture specific images separately and merge a
141+
# manifest lately in a different script
142+
versions = [f"{v}-{suffix}" for v in versions]
143+
# changed_images_{suffix}.json should contain all changed images
144+
result_version = versions
145+
146+
return versions, result_version
147+
148+
109149
def build_and_push_one_image(
110-
path_to_dockerfile_folder: str, image_name: str, version_string: str, push: bool
150+
image: DockerImage,
151+
version_string: str,
152+
push: bool,
153+
child: bool,
111154
) -> Tuple[bool, str]:
112-
path = path_to_dockerfile_folder
113155
logging.info(
114156
"Building docker image %s with version %s from path %s",
115-
image_name,
157+
image.repo,
116158
version_string,
117-
path,
159+
image.full_path,
118160
)
119161
build_log = os.path.join(
120-
TEMP_PATH,
121-
"build_and_push_log_{}_{}".format(
122-
str(image_name).replace("/", "_"), version_string
123-
),
162+
TEMP_PATH, f"build_and_push_log_{image.repo.replace('/', '_')}_{version_string}"
124163
)
125164
push_arg = ""
126165
if push:
127166
push_arg = "--push "
128167

129-
with open(build_log, "w") as bl:
168+
from_tag_arg = ""
169+
if child:
170+
from_tag_arg = f"--build-arg FROM_TAG={version_string} "
171+
172+
with open(build_log, "wb") as bl:
130173
cmd = (
131174
"docker buildx build --builder default "
132-
f"--build-arg FROM_TAG={version_string} "
175+
f"{from_tag_arg}"
133176
f"--build-arg BUILDKIT_INLINE_CACHE=1 "
134-
f"--tag {image_name}:{version_string} "
135-
f"--cache-from type=registry,ref={image_name}:{version_string} "
177+
f"--tag {image.repo}:{version_string} "
178+
f"--cache-from type=registry,ref={image.repo}:{version_string} "
136179
f"{push_arg}"
137-
f"--progress plain {path}"
180+
f"--progress plain {image.full_path}"
138181
)
139182
logging.info("Docker command to run: %s", cmd)
140-
retcode = subprocess.Popen(cmd, shell=True, stderr=bl, stdout=bl).wait()
183+
with subprocess.Popen(cmd, shell=True, stderr=bl, stdout=bl) as proc:
184+
retcode = proc.wait()
185+
141186
if retcode != 0:
142187
return False, build_log
143188

144-
logging.info("Processing of %s successfully finished", image_name)
189+
logging.info("Processing of %s successfully finished", image.repo)
145190
return True, build_log
146191

147192

148193
def process_single_image(
149-
versions: List[str], path_to_dockerfile_folder: str, image_name: str, push: bool
194+
image: DockerImage,
195+
versions: List[str],
196+
push: bool,
197+
child: bool,
150198
) -> List[Tuple[str, str, str]]:
151199
logging.info("Image will be pushed with versions %s", ", ".join(versions))
152200
result = []
153201
for ver in versions:
154202
for i in range(5):
155-
success, build_log = build_and_push_one_image(
156-
path_to_dockerfile_folder, image_name, ver, push
157-
)
203+
success, build_log = build_and_push_one_image(image, ver, push, child)
158204
if success:
159-
result.append((image_name + ":" + ver, build_log, "OK"))
205+
result.append((image.repo + ":" + ver, build_log, "OK"))
160206
break
161207
logging.info(
162208
"Got error will retry %s time and sleep for %s seconds", i, i * 5
163209
)
164210
time.sleep(i * 5)
165211
else:
166-
result.append((image_name + ":" + ver, build_log, "FAIL"))
212+
result.append((image.repo + ":" + ver, build_log, "FAIL"))
167213

168214
logging.info("Processing finished")
215+
image.built = True
216+
return result
217+
218+
219+
def process_image_with_parents(
220+
image: DockerImage, versions: List[str], push: bool, child: bool = False
221+
) -> List[Tuple[str, str, str]]:
222+
result = [] # type: List[Tuple[str,str,str]]
223+
if image.built:
224+
return result
225+
226+
if image.parent is not None:
227+
result += process_image_with_parents(image.parent, versions, push, False)
228+
child = True
229+
230+
result += process_single_image(image, versions, push, child)
169231
return result
170232

171233

@@ -182,7 +244,7 @@ def process_test_results(
182244
build_url = s3_client.upload_test_report_to_s3(
183245
build_log, s3_path_prefix + "/" + os.path.basename(build_log)
184246
)
185-
url_part += '<a href="{}">build_log</a>'.format(build_url)
247+
url_part += f'<a href="{build_url}">build_log</a>'
186248
if url_part:
187249
test_name = image + " (" + url_part + ")"
188250
else:
@@ -255,8 +317,6 @@ def main():
255317
shell=True,
256318
)
257319

258-
repo_path = GITHUB_WORKSPACE
259-
260320
if os.path.exists(TEMP_PATH):
261321
shutil.rmtree(TEMP_PATH)
262322
os.makedirs(TEMP_PATH)
@@ -267,43 +327,30 @@ def main():
267327
else:
268328
pr_info = PRInfo(need_changed_files=True)
269329

270-
changed_images = get_changed_docker_images(pr_info, repo_path, "docker/images.json")
271-
logging.info(
272-
"Has changed images %s", ", ".join([str(image[0]) for image in changed_images])
330+
changed_images = get_changed_docker_images(
331+
pr_info, GITHUB_WORKSPACE, "docker/images.json"
273332
)
274-
pr_commit_version = str(pr_info.number) + "-" + pr_info.sha
275-
# The order is important, PR number is used as cache during the build
276-
versions = [str(pr_info.number), pr_commit_version]
277-
result_version = pr_commit_version
278-
if pr_info.number == 0:
279-
# First get the latest for cache
280-
versions.insert(0, "latest")
333+
logging.info("Has changed images %s", ", ".join([im.path for im in changed_images]))
281334

282-
if args.suffix:
283-
# We should build architecture specific images separately and merge a
284-
# manifest lately in a different script
285-
versions = [f"{v}-{args.suffix}" for v in versions]
286-
# changed_images_{suffix}.json should contain all changed images
287-
result_version = versions
335+
image_versions, result_version = gen_versions(pr_info, args.suffix)
288336

289337
result_images = {}
290338
images_processing_result = []
291-
for rel_path, image_name in changed_images:
292-
full_path = os.path.join(repo_path, rel_path)
293-
images_processing_result += process_single_image(
294-
versions, full_path, image_name, push
339+
for image in changed_images:
340+
images_processing_result += process_image_with_parents(
341+
image, image_versions, push
295342
)
296-
result_images[image_name] = result_version
343+
result_images[image.repo] = result_version
297344

298345
if changed_images:
299-
description = "Updated " + ",".join([im[1] for im in changed_images])
346+
description = "Updated " + ",".join([im.repo for im in changed_images])
300347
else:
301348
description = "Nothing to update"
302349

303350
if len(description) >= 140:
304351
description = description[:136] + "..."
305352

306-
with open(changed_json, "w") as images_file:
353+
with open(changed_json, "w", encoding="utf-8") as images_file:
307354
json.dump(result_images, images_file)
308355

309356
s3_helper = S3Helper("https://s3.amazonaws.com")
@@ -317,8 +364,8 @@ def main():
317364

318365
url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [], NAME)
319366

320-
print("::notice ::Report url: {}".format(url))
321-
print('::set-output name=url_output::"{}"'.format(url))
367+
print(f"::notice ::Report url: {url}")
368+
print(f'::set-output name=url_output::"{url}"')
322369

323370
if args.no_reports:
324371
return

0 commit comments

Comments
 (0)