From 32e0b6cc5b672c2e547ec7a14ab1d8071d706b1d Mon Sep 17 00:00:00 2001 From: siemhesda <143130929+siemhesda@users.noreply.github.com> Date: Thu, 18 Jan 2024 11:37:29 -0800 Subject: [PATCH 1/2] docs: remove submodules part from the install docs --- docs/lib/content/commands/npm-install.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/docs/lib/content/commands/npm-install.md b/docs/lib/content/commands/npm-install.md index a705da2cb32d3..225cbecb51647 100644 --- a/docs/lib/content/commands/npm-install.md +++ b/docs/lib/content/commands/npm-install.md @@ -256,9 +256,6 @@ into a tarball (b). `#semver:` is specified, then the default branch of the repository is used. - If the repository makes use of submodules, those submodules will be - cloned as well. - If the package being installed contains a `prepare` script, its `dependencies` and `devDependencies` will be installed, and the prepare script will be run, before the package is packaged and installed. From 7addae37c89471ac57645fd6c56bb080ff57c9fb Mon Sep 17 00:00:00 2001 From: siemhesda <143130929+siemhesda@users.noreply.github.com> Date: Thu, 1 Feb 2024 09:53:30 -0800 Subject: [PATCH 2/2] feat: add-remote-git - attemp to bring back submodules --- workspaces/arborist/lib/add-remote-git.js | 547 ++++++++++++++++++++++ 1 file changed, 547 insertions(+) create mode 100644 workspaces/arborist/lib/add-remote-git.js diff --git a/workspaces/arborist/lib/add-remote-git.js b/workspaces/arborist/lib/add-remote-git.js new file mode 100644 index 0000000000000..a9fddd1792798 --- /dev/null +++ b/workspaces/arborist/lib/add-remote-git.js @@ -0,0 +1,547 @@ +const assert = require('assert') +const fs = require('graceful-fs') +const path = require('path') +const url = require('url') + +const chownr = require('chownr') +const dezalgo = require('dezalgo') +const hostedFromURL = require('hosted-git-info').fromUrl +const inflight = require('inflight') +const log = require('npmlog') +const mkdir = require('mkdirp') +const normalizeGitUrl = require('normalize-git-url') +const npa = require('npm-package-arg') +const realizePackageSpecifier = require('realize-package-specifier') +const uniqueFilename = require('unique-filename') + +const addLocal = require('./add-local.js') +const correctMkdir = require('../utils/correct-mkdir.js') +const git = require('../utils/git.js') +const npm = require('../npm.js') +const rm = require('../utils/gently-rm.js') +const tempFilename = require('../utils/temp-filename.js') + +const remotes = path.resolve(npm.config.get('cache'), '_git-remotes') +const templates = path.join(remotes, '_templates') + +const VALID_VARIABLES = [ + 'GIT_ASKPASS', + 'GIT_PROXY_COMMAND', + 'GIT_SSH', + 'GIT_SSH_COMMAND', + 'GIT_SSL_CAINFO', + 'GIT_SSL_NO_VERIFY', +] + +module.exports = addRemoteGit +function addRemoteGit (uri, _cb) { + assert(typeof uri === 'string', 'must have git URL') + assert(typeof _cb === 'function', 'must have callback') + const cb = dezalgo(_cb) + + log.verbose('addRemoteGit', 'caching', uri) + + // the URL comes in exactly as it was passed on the command line, or as + // normalized by normalize-package-data / read-package-json / read-installed, + // so figure out what to do with it using hosted-git-info + let parsed = hostedFromURL(uri) + if (parsed) { + // normalize GitHub syntax to org/repo (for now) + let from + if (parsed.type === 'github' && parsed.getDefaultRepresentation() === 'shortcut') { + from = parsed.path() + } else { + from = parsed.toString() + } + + log.verbose('addRemoteGit', from, 'is a repository hosted by', parsed.type) + + // prefer explicit URLs to pushing everything through shortcuts + if (parsed.getDefaultRepresentation() !== 'shortcut') { + return tryClone(from, parsed.toString(), false, cb) + } + + // try git:, then git+ssh:, then git+https: before failing + tryGitProto(from, parsed, cb) + } else { + // verify that this is a Git URL before continuing + parsed = npa(uri) + if (parsed.type !== 'git') { + return cb(new Error(uri + 'is not a Git or GitHub URL')) + } + + tryClone(parsed.rawSpec, uri, false, cb) + } +} + +function tryGitProto (from, hostedInfo, cb) { + const gitURL = hostedInfo.git() + if (!gitURL) { + return tryHTTPS(from, hostedInfo, cb) + } + + log.silly('tryGitProto', 'attempting to clone', gitURL) + tryClone(from, gitURL, true, function (er) { + if (er) { + return tryHTTPS(from, hostedInfo, cb) + } + + cb.apply(this, arguments) + }) +} + +function tryHTTPS (from, hostedInfo, cb) { + const httpsURL = hostedInfo.https() + if (!httpsURL) { + return cb(new Error(from + ' can not be cloned via Git, SSH, or HTTPS')) + } + + log.silly('tryHTTPS', 'attempting to clone', httpsURL) + tryClone(from, httpsURL, true, function (er) { + if (er) { + return trySSH(from, hostedInfo, cb) + } + + cb.apply(this, arguments) + }) +} + +function trySSH (from, hostedInfo, cb) { + const sshURL = hostedInfo.ssh() + if (!sshURL) { + return tryHTTPS(from, hostedInfo, cb) + } + + log.silly('trySSH', 'attempting to clone', sshURL) + tryClone(from, sshURL, false, cb) +} + +function tryClone (from, combinedURL, silent, cb) { + log.silly('tryClone', 'cloning', from, 'via', combinedURL) + + const normalized = normalizeGitUrl(combinedURL) + const cloneURL = normalized.url + const treeish = normalized.branch + + // ensure that similarly-named remotes don't collide + let cachedRemote = uniqueFilename(remotes, combinedURL.replace(/[^a-zA-Z0-9]+/g, '-'), cloneURL) + const repoID = path.relative(remotes, cachedRemote) + cachedRemote = path.join(remotes, repoID) + + cb = inflight(repoID, cb) + if (!cb) { + return log.verbose('tryClone', repoID, 'already in flight; waiting') + } + log.verbose('tryClone', repoID, 'not in flight; caching') + + // initialize the remotes cache with the correct perms + getGitDir(function (er) { + if (er) { + return cb(er) + } + fs.stat(cachedRemote, function (er, s) { + if (er) { + return mirrorRemote(from, cloneURL, treeish, cachedRemote, silent, finish) + } + if (!s.isDirectory()) { + return resetRemote(from, cloneURL, treeish, cachedRemote, finish) + } + + validateExistingRemote(from, cloneURL, treeish, cachedRemote, finish) + }) + + // always set permissions on the cached remote + function finish (er, data) { + if (er) { + return cb(er, data) + } + addModeRecursive(cachedRemote, npm.modes.file, function (er) { + return cb(er, data) + }) + } + }) +} + +// don't try too hard to hold on to a remote +function resetRemote (from, cloneURL, treeish, cachedRemote, cb) { + log.info('resetRemote', 'resetting', cachedRemote, 'for', from) + rm(cachedRemote, function (er) { + if (er) { + return cb(er) + } + mirrorRemote(from, cloneURL, treeish, cachedRemote, false, cb) + }) +} + +// reuse a cached remote when possible, but nuke it if it's in an +// inconsistent state +function validateExistingRemote (from, cloneURL, treeish, cachedRemote, cb) { + git.whichAndExec( + ['config', '--get', 'remote.origin.url'], + { cwd: cachedRemote, env: gitEnv() }, + function (er, stdout, stderr) { + let originURL + if (stdout) { + originURL = stdout.trim() + log.silly('validateExistingRemote', from, 'remote.origin.url:', originURL) + } + + if (stderr) { + stderr = stderr.trim() + } + if (stderr || er) { + log.warn('addRemoteGit', from, 'resetting remote', cachedRemote, 'because of error:', stderr || er) + return resetRemote(from, cloneURL, treeish, cachedRemote, cb) + } else if (cloneURL !== originURL) { + log.warn( + 'addRemoteGit', + from, + 'pre-existing cached repo', cachedRemote, 'points to', originURL, 'and not', cloneURL + ) + return resetRemote(from, cloneURL, treeish, cachedRemote, cb) + } + + log.verbose('validateExistingRemote', from, 'is updating existing cached remote', cachedRemote) + updateRemote(from, cloneURL, treeish, cachedRemote, cb) + } + ) +} + +// make a complete bare mirror of the remote repo +// NOTE: npm uses a blank template directory to prevent weird inconsistencies +// https://github.com/npm/npm/issues/5867 +function mirrorRemote (from, cloneURL, treeish, cachedRemote, silent, cb) { + mkdir(cachedRemote, function (er) { + if (er) { + return cb(er) + } + + const args = [ + 'clone', + '--template=' + templates, + '--mirror', + cloneURL, cachedRemote, + ] + git.whichAndExec( + ['clone', '--template=' + templates, '--mirror', cloneURL, cachedRemote], + { cwd: cachedRemote, env: gitEnv() }, + function (er, stdout, stderr) { + if (er) { + const combined = (stdout + '\n' + stderr).trim() + const command = 'git ' + args.join(' ') + ':' + if (silent) { + log.verbose(command, combined) + } else { + log.error(command, combined) + } + return cb(er) + } + log.verbose('mirrorRemote', from, 'git clone ' + cloneURL, stdout.trim()) + setPermissions(from, cloneURL, treeish, cachedRemote, cb) + } + ) + }) +} + +function setPermissions (from, cloneURL, treeish, cachedRemote, cb) { + if (process.platform === 'win32') { + log.verbose('setPermissions', from, 'skipping chownr on Windows') + resolveHead(from, cloneURL, treeish, cachedRemote, cb) + } else { + getGitDir(function (er, cs) { + if (er) { + log.error('setPermissions', from, 'could not get cache stat') + return cb(er) + } + + chownr(cachedRemote, cs.uid, cs.gid, function (er) { + if (er) { + log.error( + 'setPermissions', + 'Failed to change git repository ownership under npm cache for', + cachedRemote + ) + return cb(er) + } + + log.verbose('setPermissions', from, 'set permissions on', cachedRemote) + resolveHead(from, cloneURL, treeish, cachedRemote, cb) + }) + }) + } +} + +// always fetch the origin, even right after mirroring, because this way +// permissions will get set correctly +function updateRemote (from, cloneURL, treeish, cachedRemote, cb) { + git.whichAndExec( + ['fetch', '-a', 'origin'], + { cwd: cachedRemote, env: gitEnv() }, + function (er, stdout, stderr) { + if (er) { + const combined = (stdout + '\n' + stderr).trim() + log.error('git fetch -a origin (' + cloneURL + ')', combined) + return cb(er) + } + log.verbose('updateRemote', 'git fetch -a origin (' + cloneURL + ')', stdout.trim()) + + setPermissions(from, cloneURL, treeish, cachedRemote, cb) + } + ) +} + +// branches and tags are both symbolic labels that can be attached to different +// commits, so resolve the commit-ish to the current actual treeish the label +// corresponds to +// +// important for shrinkwrap +function resolveHead (from, cloneURL, treeish, cachedRemote, cb) { + log.verbose('resolveHead', from, 'original treeish:', treeish) + const args = ['rev-list', '-n1', treeish] + git.whichAndExec( + args, + { cwd: cachedRemote, env: gitEnv() }, + function (er, stdout, stderr) { + if (er) { + log.error('git ' + args.join(' ') + ':', stderr) + return cb(er) + } + + const resolvedTreeish = stdout.trim() + log.silly('resolveHead', from, 'resolved treeish:', resolvedTreeish) + + const resolvedURL = getResolved(cloneURL, resolvedTreeish) + if (!resolvedURL) { + return cb(new Error( + 'unable to clone ' + from + ' because git clone string ' + + cloneURL + ' is in a form npm can\'t handle' + )) + } + log.verbose('resolveHead', from, 'resolved Git URL:', resolvedURL) + + // generate a unique filename + const tmpdir = path.join(tempFilename('git-cache'), resolvedTreeish) + log.silly('resolveHead', 'Git working directory:', tmpdir) + + mkdir(tmpdir, function (er) { + if (er) { + return cb(er) + } + + cloneResolved(from, resolvedURL, resolvedTreeish, cachedRemote, tmpdir, cb) + }) + } + ) +} + +// make a clone from the mirrored cache so we have a temporary directory in +// which we can check out the resolved treeish +function cloneResolved (from, resolvedURL, resolvedTreeish, cachedRemote, tmpdir, cb) { + const args = ['clone', cachedRemote, tmpdir] + git.whichAndExec( + args, + { cwd: cachedRemote, env: gitEnv() }, + function (er, stdout, stderr) { + stdout = (stdout + '\n' + stderr).trim() + if (er) { + log.error('git ' + args.join(' ') + ':', stderr) + return cb(er) + } + log.verbose('cloneResolved', from, 'clone', stdout) + + checkoutTreeish(from, resolvedURL, resolvedTreeish, tmpdir, cb) + } + ) +} + +// there is no safe way to do a one-step clone to a treeish that isn't +// guaranteed to be a branch, so explicitly check out the treeish once it's +// cloned +function checkoutTreeish (from, resolvedURL, resolvedTreeish, tmpdir, cb) { + const args = ['checkout', resolvedTreeish] + git.whichAndExec( + args, + { cwd: tmpdir, env: gitEnv() }, + function (er, stdout, stderr) { + stdout = (stdout + '\n' + stderr).trim() + if (er) { + log.error('git ' + args.join(' ') + ':', stderr) + return cb(er) + } + log.verbose('checkoutTreeish', from, 'checkout', stdout) + + updateSubmodules(from, resolvedURL, tmpdir, cb) + } + ) +} + +function updateSubmodules (from, resolvedURL, tmpdir, cb) { + const args = ['submodule', '-q', 'update', '--init', '--recursive'] + git.whichAndExec( + args, + { cwd: tmpdir, env: gitEnv() }, + function (er, stdout, stderr) { + stdout = (stdout + '\n' + stderr).trim() + if (er) { + log.error('git ' + args.join(' ') + ':', stderr) + return cb(er) + } + log.verbose('updateSubmodules', from, 'submodule update', stdout) + + // convince addLocal that the checkout is a local dependency + realizePackageSpecifier(tmpdir, function (er, spec) { + if (er) { + log.error('addRemoteGit', 'Failed to map', tmpdir, 'to a package specifier') + return cb(er) + } + + // ensure pack logic is applied + // https://github.com/npm/npm/issues/6400 + addLocal(spec, null, function (er, data) { + if (data) { + if (npm.config.get('save-exact')) { + log.verbose('addRemoteGit', 'data._from:', resolvedURL, '(save-exact)') + data._from = resolvedURL + } else { + log.verbose('addRemoteGit', 'data._from:', from) + data._from = from + } + + log.verbose('addRemoteGit', 'data._resolved:', resolvedURL) + data._resolved = resolvedURL + } + + cb(er, data) + }) + }) + } + ) +} + +function getGitDir (cb) { + correctMkdir(remotes, function (er, stats) { + if (er) { + return cb(er) + } + + // We don't need global templates when cloning. Use an empty directory for + // the templates, creating it (and setting its permissions) if necessary. + mkdir(templates, function (er) { + if (er) { + return cb(er) + } + + // Ensure that both the template and remotes directories have the correct + // permissions. + fs.chown(templates, stats.uid, stats.gid, function (er) { + cb(er, stats) + }) + }) + }) +} + +let gitEnv_ +function gitEnv () { + // git responds to env consts in some weird ways in post-receive hooks + // so don't carry those along. + if (gitEnv_) { + return gitEnv_ + } + + // allow users to override npm's insistence on not prompting for + // passphrases, but default to just failing when credentials + // aren't available + gitEnv_ = { GIT_ASKPASS: 'echo' } + + for (const k in process.env) { + if (!~VALID_VARIABLES.indexOf(k) && k.match(/^GIT/)) { + continue + } + gitEnv_[k] = process.env[k] + } + return gitEnv_ +} + +addRemoteGit.getResolved = getResolved +function getResolved (uri, treeish) { + // normalize hosted-git-info clone URLs back into regular URLs + // this will only work on URLs that hosted-git-info recognizes + // https://github.com/npm/npm/issues/7961 + const rehydrated = hostedFromURL(uri) + if (rehydrated) { + uri = rehydrated.toString() + } + + const parsed = url.parse(uri) + + // Checks for known protocols: + // http:, https:, ssh:, and git:, with optional git+ prefix. + if (!parsed.protocol || + !parsed.protocol.match(/^(((git\+)?(https?|ssh))|git|file):$/)) { + uri = 'git+ssh://' + uri + } + + if (!/^git[+:]/.test(uri)) { + uri = 'git+' + uri + } + + // Not all URIs are actually URIs, so use regex for the treeish. + return uri.replace(/(?:#.*)?$/, '#' + treeish) +} + +// similar to chmodr except it add permissions rather than overwriting them +// adapted from https://github.com/isaacs/chmodr/blob/master/chmodr.js +function addModeRecursive (cachedRemote, mode, cb) { + fs.readdir(cachedRemote, function (er, children) { + // Any error other than ENOTDIR means it's not readable, or doesn't exist. + // Give up. + if (er && er.code !== 'ENOTDIR') { + return cb(er) + } + if (er || !children.length) { + return addMode(cachedRemote, mode, cb) + } + + let len = children.length + let errState = null + children.forEach(function (child) { + addModeRecursive(path.resolve(cachedRemote, child), mode, then) + }) + + function then (er) { + if (errState) { + return undefined + } + if (er) { + return cb(errState = er) + } + if (--len === 0) { + return addMode(cachedRemote, dirMode(mode), cb) + } + } + }) +} + +function addMode (cachedRemote, mode, cb) { + fs.stat(cachedRemote, function (er, stats) { + if (er) { + return cb(er) + } + mode = stats.mode | mode + fs.chmod(cachedRemote, mode, cb) + }) +} + +// taken from https://github.com/isaacs/chmodr/blob/master/chmodr.js +function dirMode (mode) { + if (mode & parseInt('0400', 8)) { + mode |= parseInt('0100', 8) + } + if (mode & parseInt('040', 8)) { + mode |= parseInt('010', 8) + } + if (mode & parseInt('04', 8)) { + mode |= parseInt('01', 8) + } + return mode +}