From e37f4d7917df20195676032a2d0ced18437ca6d0 Mon Sep 17 00:00:00 2001 From: Kevin Fenzi Date: Thu, 2 Jun 2022 12:49:38 -0700 Subject: [PATCH] grokmirror 2.0 changes grokmirror 2.0+ merges the old fsck and repos configs into one grokmirror config. Signed-off-by: Kevin Fenzi --- roles/grokmirror_mirror/tasks/main.yml | 9 +- roles/grokmirror_mirror/templates/fsck.conf | 66 ---- .../grokmirror_mirror/templates/grokfsck.cron | 2 +- .../templates/grokmirror.conf | 328 ++++++++++++++++++ .../templates/grokmirror.cron | 2 +- roles/grokmirror_mirror/templates/repos.conf | 106 ------ 6 files changed, 332 insertions(+), 181 deletions(-) delete mode 100644 roles/grokmirror_mirror/templates/fsck.conf create mode 100644 roles/grokmirror_mirror/templates/grokmirror.conf delete mode 100644 roles/grokmirror_mirror/templates/repos.conf diff --git a/roles/grokmirror_mirror/tasks/main.yml b/roles/grokmirror_mirror/tasks/main.yml index 96bf748a10..a300651090 100644 --- a/roles/grokmirror_mirror/tasks/main.yml +++ b/roles/grokmirror_mirror/tasks/main.yml @@ -21,13 +21,8 @@ tags: - grokmirror-mirror -- name: install grokmirror repos config file from template - template: src=repos.conf dest={{grokmirror_topdir}}/repos.conf owner=root group=root mode=644 - tags: - - grokmirror-mirror - -- name: install grokmirror fsck config file from template - template: src=fsck.conf dest={{grokmirror_topdir}}/fsck.conf owner=root group=root mode=644 +- name: install grokmirror config file from template + template: src=grokmirror.conf dest={{grokmirror_topdir}}/grokmirror.conf owner=root group=root mode=644 tags: - grokmirror-mirror diff --git a/roles/grokmirror_mirror/templates/fsck.conf b/roles/grokmirror_mirror/templates/fsck.conf deleted file mode 100644 index 87a15925bd..0000000000 --- a/roles/grokmirror_mirror/templates/fsck.conf +++ /dev/null @@ -1,66 +0,0 @@ -# You can have multiple sections, just name them appropriately -[src.fedoraproject.org] -# Where is the manifest containing the list of repositories? -manifest = {{ grokmirror_topdir }}/src.fedoraproject.org/manifest.js.gz -# -# Where are the repositories kept? -#toplevel = /var/lib/git/mirror -toplevel = {{ grokmirror_topdir }}/src.fedoraproject.org -# -# Where do we put the logs? -#log = /var/log/mirror/kernelorg-fsck.log -log = {{ grokmirror_topdir }}/src.fedoraproject.org/src.fedoraproject.org-fsck.log -# -# Log level can be "info" or "debug" -#loglevel = info -loglevel = info -# -# Make sure there is only one instance of grok-fsck running by -# trying to exclusive-lock this file before we do anything. -lock = /var/tmp/kernelorg-fsck.lock -# -# Where to keep the status file -#statusfile = /var/lib/mirror/kernelorg-fsck.js -statusfile = /var/tmp/kernelorg-fsck.js -# -# How often should we check each repository, in days. -# Any newly added repository will have the first check within a random -# period of 0 and $frequency, and then every $frequency after that, -# to assure that not all repositories are checked on the same day. -# Don't set to less than 7 unless you only mirror a few repositories -# (or really like to thrash your disks). -#frequency = 30 -frequency = 30 -# -# Some errors are relatively benign and can be safely ignored. Add matching -# substrings to this field to ignore them. -ignore_errors = dangling commit - dangling blob - notice: HEAD points to an unborn branch - notice: No default references - contains zero-padded file modes -# -# Repack the repositories after calling git-fsck. -# This will especially save you space if you have shared repositories. -# To check if you have shared repositories, look at your manifest.js.gz to -# see if any repository definition has a "reference" key. -repack = yes -# -# Default repack flags are -A -d -l -q, but you can specify your own here -repack_flags = -A -d -l -q -# -# Once in a while you should repack your repositories more thoroughly, -# by passing a -f flag and using a larger window/depth. You shouldn't need -# to do this more frequently than once every 10-15 regular repacks. -# -# Trigger a full repack every N times. -full_repack_every = 10 -# -# What flags should we use when doing a full repack. -full_repack_flags = -A -d -l -q -f --window=200 --depth=50 -# -# Run git-prune to remove obsolete old objects if no other repositories are -# using the repo in their objects/info/alternates. If other repositories -# are relying on this repo via alternates, it will not be pruned to avoid -# potential corruption. -prune = yes diff --git a/roles/grokmirror_mirror/templates/grokfsck.cron b/roles/grokmirror_mirror/templates/grokfsck.cron index 87fb163cb3..39670a2e1b 100644 --- a/roles/grokmirror_mirror/templates/grokfsck.cron +++ b/roles/grokmirror_mirror/templates/grokfsck.cron @@ -1 +1 @@ -00 02 * * * grokmirror /usr/bin/grok-fsck -c {{ grokmirror_topdir }}/fsck.conf +00 02 * * * grokmirror /usr/bin/grok-fsck -c {{ grokmirror_topdir }}/grokmirror.conf diff --git a/roles/grokmirror_mirror/templates/grokmirror.conf b/roles/grokmirror_mirror/templates/grokmirror.conf new file mode 100644 index 0000000000..f2355d5025 --- /dev/null +++ b/roles/grokmirror_mirror/templates/grokmirror.conf @@ -0,0 +1,328 @@ +# Grokmirror 2.x and above have a single config file per each set +# of mirrored repos, instead of a separate repos.conf and fsck.conf +# with multiple sections. +# +# You can use ${varname} interpolation within the same section +# or ${sectname:varname} from any other section. +[core] +# +# Where are our mirrored repositories kept? +toplevel = {{ grokmirror_topdir }}/src.fedoraproject.org/ +# +# Where should we keep our manifest file? +manifest = ${toplevel}/manifest.js.gz +# +# Where should we put our log? Make sure it is logrotated, +# otherwise it will grow indefinitely. +log = ${toplevel}/log +# +# Options are "info" and "debug" for all the debug data (lots!) +loglevel = info +# +# Grokmirror version 2.x and above can automatically recognize related repositories +# by analyzing root commits. If it finds two or more related repositories, it can set +# up a unified "object storage" repo and fetch all refs from each related repository. +# For example, you can have two forks of linux.git: +# foo/bar/linux.git: +# refs/heads/master +# refs/heads/devbranch +# refs/tags/v5.0-rc3 +# ... +# baz/quux/linux.git: +# refs/heads/master +# refs/heads/devbranch +# refs/tags/v5.0-rc3 +# ... +# Grokmirror will set up an object storage repository and fetch all refs from +# both repositories: +# objstore/[random-guid-name].git +# refs/virtual/[sha1-of-foo/bar/linux.git:12]/heads/master +# refs/virtual/[sha1-of-foo/bar/linux.git:12]/heads/devbranch +# refs/virtual/[sha1-of-foo/bar/linux.git:12]/tags/v5.0-rc3 +# ... +# refs/virtual/[sha1-of-baz/quux/linux.git:12]/heads/master +# refs/virtual/[sha1-of-baz/quux/linux.git:12]/heads/devbranch +# refs/virtual/[sha1-of-baz/quux/linux.git:12]/tags/v5.0-rc3 +# ... +# +# This will dramatically improve storage on disk, as original repositories will be +# repacked to almost nothing. Grokmirror will repack the object storage repository +# with --delta-islands to help optimize packs for efficient clones. +objstore = ${toplevel}/objstore +# +# When copying objects into objstore repositories, we will use regular git +# porcelain commands, such as git fetch. However, this tends to be slow due to +# git erring on the side of caution when calculating haves and wants, so if you +# are running a busy mirror and want to save a lot of cycles, you will want to +# enable the setting below, which will use internal git plumbing for much more +# direct object copying between repos. +#objstore_uses_plumbing = yes +# +# Due to the nature of git alternates, if two repositories share all their objects +# with an "object storage" repo, any object from repoA can be retrieved from repoB +# via most web UIs if someone knows the object hash. +# E.g. this is how this trick works on Github: +# https://github.com/torvalds/linux/blob/b4061a10fc29010a610ff2b5b20160d7335e69bf/drivers/hid/hid-samsung.c#L113-L118 +# +# If you have private repositories that should absolutely not reveal any objects, +# add them here using shell-style globbing. They will still be set up for alternates +# if we find common roots with public repositories, but we won't fetch any objects +# from these repos into refs/virtual/*. +# +# Leave blank if you don't have any private repos (or don't offer a web UI). +#private = */private/* + +# Used by grok-manifest (and others for "pretty"). These options can be +# overridden using matching command-line switches to grok-manifest. +[manifest] +# Enable to save pretty-printed js (larger and slower, but easier to debug) +pretty = no +# List of repositories to ignore -- can take multiple entries with newline+tab +# and accepts shell globbing. +ignore = /testing/* + /private/* +# Enable to fetch objects into objstore repos after commit. This can be useful if +# someone tries to push the same objects to a sibling repository, but may significantly +# slow down post-commit hook operation, negating any speed gains. If set to no, the +# objects will be fetched during regular grok-fsck runs. +fetch_objstore = no +# Only include repositories that have git-daemon-export-ok. +check_export_ok = no + +# Used by grok-pull, mostly +[remote] +# The host part of the mirror you're pulling from. +site = https://src.fedoraproject.org +# +# Where the grok manifest is published. The following protocols +# are supported at this time: +# http:// or https:// using If-Modified-Since http header +# file:// (when manifest file is on NFS, for example) +# NB: You can no longer specify username:password as part of the URL with +# grokmirror 2.x and above. You can use a netrc file for this purpose. +manifest = ${site}/manifest.js.gz +# +# As an alternative to setting a manifest URL, you can define a manifest_command. +# It has three possible outcomes: +# exit code 0 + full remote manifest on stdout (must be valid json) +# exit code 1 + error message on stdout +# exit code 127 + nothing on stdout if remote manifest hasn't changed +# It should also accept '--force' as a single argument to force manifest retrieval +# even if it hasn't changed. +# See contrib/gitolite/* for example commands to use with gitolite. +#manifest_command = /usr/local/bin/grok-get-gl-manifest.sh +# +# If the remote is providing pre-generated preload bundles, list the path +# here. This is only useful if you're mirroring the entire repository +# collection and not just a handful of select repos. +#preload_bundle_url = https://some-cdn-site.com/preload/ + +# Used by grok-pull +[pull] +# +# Write out projects.list that can be used by gitweb or cgit. +# Leave blank if you don't want a projects.list. +projectslist = +# +# When generating projects.list, start at this subpath instead +# of at the toplevel. Useful when mirroring kernel or when generating +# multiple gitweb/cgit configurations for the same tree. +projectslist_trimtop = +# +# When generating projects.list, also create entries for symlinks. +# Otherwise we assume they are just legacy and keep them out of +# web interfaces. +projectslist_symlinks = no +# +# A simple hook to execute whenever a repository is modified. +# It passes the full path to the git repository modified as the final +# argument. You can define multiple hooks if you separate them by +# newline+whitespace. +post_update_hook = +# +# Should we purge repositories that are not present in the remote +# manifest? If set to "no" this can be overridden via the -p flag to +# grok-pull (useful if you have a very large collection of repos +# and don't want to walk the entire tree on each manifest run). +# See also: purgeprotect. +purge = yes +# +# There may be repositories that aren't replicated with grokmirror that +# you don't want to be purged. You can list them below using bash-style +# globbing. Separate multiple entries using newline+whitespace. +#nopurge = /gitolite-admin.git +# +# This prevents catastrophic mirror purges when our upstream gives us a +# manifest that is dramatically smaller than ours. The default is to +# refuse the purge if the remote manifest has over 5% fewer repositories +# than what we have, or in other words, if we have 100 repos and the +# remote manifest has shrunk to 95 repos or fewer, we refuse to purge, +# suspecting that something has gone wrong. You can set purgeprotect to +# a higher percentage, or override it entirely with --force-purge +# commandline flag. +purgeprotect = 5 +# +# If owner is not specified in the manifest, who should be listed +# as the default owner in tools like gitweb or cgit? +#default_owner = Grokmirror User +default_owner = Grokmirror User +# +# By default, we'll call the upstream origin "_grokmirror", but you can set your +# own name here (e.g. just call it "origin") +remotename = _grokmirror +# +# To speed up updates, grok-pull will use multiple threads. Please be +# considerate to the mirror you're pulling from and don't set this very +# high. You may also run into per-ip multiple session limits, so leave +# this number at a nice low setting. +pull_threads = 5 +# +# If git fetch fails, we will retry up to this many times before +# giving up and marking that repository as failed. +retries = 3 +# +# Use shell-globbing to list the repositories you would like to mirror. +# If you want to mirror everything, just say "*". Separate multiple entries +# with newline plus tab. Examples: +# +# mirror everything: +#include = * +# +# mirror just the main kernel sources: +#include = /pub/scm/linux/kernel/git/torvalds/linux.git +# /pub/scm/linux/kernel/git/stable/linux.git +# /pub/scm/linux/kernel/git/next/linux-next.git +include = * +# +# This is processed after the include. If you want to exclude some +# specific entries from an all-inclusive globbing above. E.g., to +# exclude all linux-2.4 git sources: +#exclude = */linux-2.4* +exclude = +# +# List repositories that should always reject forced pushes. +#ffonly = */torvalds/linux.git +# +# If you enable the following option and run grok-pull with -o, +# grok-pull will run continuously and will periodically recheck the +# remote maniefest for new updates. See contrib for an example systemd +# service you can set up to continuously update your local mirror. The +# value is in seconds. +#refresh = 900 +# +# If you enable refresh, you can also enable the socket listener that +# allows for rapid push notifications from your primary mirror. The +# socket expects repository names matching what is in the local +# manifest, followed by a newline. E.g.: +# /pub/scm/linux/kernel/git/torvalds/linux.git\n +# +# Anything not matching a repository in the local manifest will be ignored. +# See contrib for example pubsub listener. +#socket = ${core:toplevel}/.updater.socket + +# Used by grok-fsck +[fsck] +# +# How often should we check each repository, in days. Any newly added +# repository will have the first check within a random period of 0 and +# $frequency, and then every $frequency after that, to assure that not +# all repositories are checked on the same day. Don't set to less than +# 7 unless you only mirror a few repositories (or really like to thrash +# your disks). +frequency = 30 +# +# Where to keep the status file +statusfile = ${core:toplevel}/fsck.status.js +# +# Some errors are relatively benign and can be safely ignored. Add +# matching substrings to this field to ignore them. +ignore_errors = notice: + warning: disabling bitmap writing + ignoring extra bitmap file + missingTaggerEntry + missingSpaceBeforeDate +# +# If the fsck process finds errors that match any of these strings +# during its run, it will ask grok-pull to reclone this repository when +# it runs next. Only useful for minion mirrors, not for mirror masters. +reclone_on_errors = fatal: bad tree object + fatal: Failed to traverse parents + missing commit + missing blob + missing tree + broken link +# +# Should we repack the repositories? You almost always want this on, +# unless you are doing something really odd. +repack = yes +# +# We set proper flags for repacking depending if the repo is using +# alternates or not, and whether this is a full repack or not. We will +# also always build bitmaps (when it makes sense), to make cloning +# faster. You can add other flags (e.g. --threads and --window-memory) +# via the following parameter: +extra_repack_flags = +# +# These flags are added *in addition* to extra_repack_flags +extra_repack_flags_full = --window=250 --depth=50 +# +# If git version is new enough to support generating commit graphs, we +# will always generate them, though if your git version is older than +# 2.24.0, the graphs won't be automatically used unless core.commitgraph +# is set to true. You can turn off graph generation by setting the +# commitgraph option to "no". Graph generation will be skipped for +# child repos that use alternates. +commitgraph = yes +# +# Run git-prune to remove obsolete loose objects. Grokmirror will make +# sure this is a safe operation when it comes to objstore repos, so you +# should leave this enabled. +prune = yes +# +# Grokmirror is extremely careful about not pruning the repositories +# that are used by others via git alternates. However, it cannot prevent +# some other git process (not grokmirror-managed) from inadvertently +# running "git prune/gc". For example, this may happen if an admin +# mistypes a command in the wrong directory. Setting precious=yes will +# add extensions.preciousObjects=true to the git configuration file in +# such repositories, which will help prevent repository corruption +# between grok-fsck runs. +# +# When set to "yes", grokmirror will temporarily turn this feature off +# when running scheduled repacks in order to be able to delete redundant +# packs and loose objects that have already been packed. This is usually +# a safe operation when done by grok-fsck itself. However, if you set +# this to "always", grokmirror will leave this enabled even during +# grok-fsck runs, for maximum paranoia. Be warned, that this will result +# in ever-growing git repositories, so it only makes sense in very rare +# situations, such as for backup purposes. +precious = yes +# +# If you have a lot of forks using the same objstore repo, you may end +# up with thousands of refs being negotiated during each remote update. +# This tends to result in higher load and bigger negotiation transfers. +# Setting the "baselines" option allows you to designate a set of repos +# that are likely to have most of the relevant objects and ignore the +# rest of the objstore refs. This is done using the +# core.alternateRefsPrefixes feature (see git-config). +baselines = */kernel/git/next/linux-next.git +# +# Objstore repos are repacked with delta island support (see man +# git-config), but if you have one repo that is a lot more likely to be +# cloned than all the other ones, you can designate it as "islandCore", +# which will give it priority when creating packs. +islandcores = */kernel/git/torvalds/linux.git +# +# Generate preload bundles for objstore repos and put them into this +# location. Unless you are running a major mirroring hub site, you +# do not want this enabled. See corresponding preload_bundle_url +# entry in the [remote] section. +#preload_bundle_outdir = /some/http/accessible/path +# +# If there are any critical errors, the report will be sent to root. You +# can change the settings below to configure report delivery to suit +# your needs: +#report_to = root +#report_from = root +#report_subject = git fsck errors on my beautiful replica +#report_mailhost = localhost diff --git a/roles/grokmirror_mirror/templates/grokmirror.cron b/roles/grokmirror_mirror/templates/grokmirror.cron index 0e74c321b6..e0e230f1fe 100644 --- a/roles/grokmirror_mirror/templates/grokmirror.cron +++ b/roles/grokmirror_mirror/templates/grokmirror.cron @@ -1 +1 @@ -*/30 * * * * grokmirror /usr/bin/grok-pull -p -c {{ grokmirror_topdir }}/repos.conf +*/30 * * * * grokmirror /usr/bin/grok-pull -p -c {{ grokmirror_topdir }}/grokmirror.conf diff --git a/roles/grokmirror_mirror/templates/repos.conf b/roles/grokmirror_mirror/templates/repos.conf deleted file mode 100644 index 03698775a3..0000000000 --- a/roles/grokmirror_mirror/templates/repos.conf +++ /dev/null @@ -1,106 +0,0 @@ -# You can pull from multiple grok mirrors, just create -# a separate section for each mirror. The name can be anything. -[src.fedoraproject.org] -# The host part of the mirror you're pulling from. -#site = git://git.kernel.org -site = https://src.fedoraproject.org -# -# Where the grok manifest is published. The following protocols -# are supported at this time: -# http:// or https:// using If-Modified-Since http header -# file:// (when manifest file is on NFS, for example) -#manifest = http://git.kernel.org/manifest.js.gz -manifest = https://src.fedoraproject.org/grokmirror/manifest.js.gz -# -# Where are we going to put the mirror on our disk? -#toplevel = /var/lib/git/mirror -toplevel = {{ grokmirror_topdir }}/src.fedoraproject.org/ -# -# Where do we store our own manifest? Usually in the toplevel. -#mymanifest = /var/lib/git/mirror/manifest.js.gz -mymanifest = {{ grokmirror_topdir }}/src.fedoraproject.org/manifest.js.gz -# -# Write out projects.list that can be used by gitweb or cgit. -# Leave blank if you don't want a projects.list. -#projectslist = /var/lib/git/mirror/projects.list -projectslist = -# -# When generating projects.list, start at this subpath instead -# of at the toplevel. Useful when mirroring kernel or when generating -# multiple gitweb/cgit configurations for the same tree. -#projectslist_trimtop = /pub/scm/ -projectslist_trimtop = /pub/scm/ -# -# The default behaviour is to use cross-repository references (if present) -# to set git alternates between projects. This helps both significantly -# reduce the bandwidth during cloning and dramatically reduce the mirror size -# on disk. If for some reason you always want to create non-shared repositories, -# set ignore_repo_references to "yes" -# NOTE: this will NOT remove alternates for repositories already mirrored. You -# will need to run "git repack -a" and then manually remove the -# objects/info/alternates file for each repository. -#ignore_repo_references = no -# -# When generating projects.list, also create entries for symlinks. -# Otherwise we assume they are just legacy and keep them out of -# web interfaces. -#projectslist_symlinks = yes -projectslist_symlinks = no -# -# A simple hook to execute whenever a repository is modified. -# It passes the full path to the git repository modified as the only -# argument. -#post_update_hook = /usr/local/bin/make-git-fairies-appear -post_update_hook = -# -# This prevents catastrophic mirror purges when our upstream master gives us -# a manifest that is dramatically smaller than ours. The default is to refuse -# the purge if the remote manifest has over 10% fewer repositories than what we -# have, or in other words, if we have 100 repos and the remote manifest has -# shrunk to 95 repos or less, we refuse to purge, suspecting that something has -# gone wrong. You can set purgeprotect to a higher percentage, or override -# it entirely with --force-purge commandline flag. -#purgeprotect = 5 -purgeprotect = 5 -# -# If owner is not specified in the manifest, who should be listed -# as the default owner in tools like gitweb or cgit? -#default_owner = Grokmirror User -default_owner = Grokmirror User -# -# Where do we put the logs? -#log = /var/log/mirror/kernelorg.log -log = {{ grokmirror_topdir }}/src.fedoraproject.org/src.fedoraproject.org.log -# -# Log level can be "info" or "debug" -#loglevel = info -loglevel = info -# -# To speed up updates, grok-pull will use multiple threads. Please be -# considerate to the mirror you're pulling from and don't set this very -# high. You may also run into per-ip multiple session limits, so leave this -# number at a nice low setting. -#pull_threads = 5 -pull_threads = 5 -# -# Use shell-globbing to list the repositories you would like to mirror. -# If you want to mirror everything, just say "*". Separate multiple entries -# with newline plus tab. Examples: -# -# mirror everything: -#include = * -# -# mirror just the main kernel sources: -#include = /pub/scm/linux/kernel/git/torvalds/linux.git -# /pub/scm/linux/kernel/git/stable/linux-stable.git -# /pub/scm/linux/kernel/git/next/linux-next.git -# -# mirror just git: -#include = /pub/scm/git/* -include = * -# -# This is processed after the include. If you want to exclude some specific -# entries from an all-inclusive globbing above. E.g., to exclude all linux-2.4 -# git sources: -#exclude = */linux-2.4* -exclude =