From 1b6009558b816f48719497422c793448521db0f2 Mon Sep 17 00:00:00 2001 From: Emma Mansell <73774046+7emansell@users.noreply.github.com> Date: Mon, 29 Dec 2025 13:28:36 -0500 Subject: [PATCH 01/15] mid --- lib/annotated-marc-serializer.js | 1 + lib/marc-serializer.js | 404 +++++++++++++++++++++++++++++++ lib/resources.js | 24 ++ routes/resources.js | 2 + 4 files changed, 431 insertions(+) create mode 100644 lib/marc-serializer.js diff --git a/lib/annotated-marc-serializer.js b/lib/annotated-marc-serializer.js index 99e717a5..4dd537fa 100644 --- a/lib/annotated-marc-serializer.js +++ b/lib/annotated-marc-serializer.js @@ -392,6 +392,7 @@ AnnotatedMarcSerializer.serialize = function (bib) { // Format for return to client: return { bib: { + unserialized: bib, id: bib.id, nyplSource: bib.nyplSource, fields: Object.keys(doc) diff --git a/lib/marc-serializer.js b/lib/marc-serializer.js new file mode 100644 index 00000000..7a60c7dc --- /dev/null +++ b/lib/marc-serializer.js @@ -0,0 +1,404 @@ +/** + * @typedef {object} AnnotatedMarcRuleSubfieldSpec + * @property {array} subfields - Array of subfields to match + * @property {string} directive - Indicates whether the matching subfields + * should be "include"d or "exclude"d + */ + +/** + * + * @typedef {object} AnnotatedMarcRule + * @property {string} fieldTag - Single character tag broadly classifying tag (e.g. 'y') + * @property {string} marcIndicatorRegExp - Stringified regex for matching a + * VarField tag joined to 1st and 2nd indicators + * @property {AnnotatedMarcRuleSubfieldSpec} subfieldSpec - How to match subfields + * @property {string} label - What label to use in mapping + * @property {string} directive - Whether to include/exclude if matched. + */ + +/** + * @typedef {object} SubField + * @property {string} tag - Identifying tag (e.g. '6', 'a') + * @property {string} content - Value of subfield + */ + +/** + * @typedef {object} VarField + * @property {string} marcTag - Three digit number classifying field (e.g. '100') + * @property {string} fieldTag - Single character tag broadly classifying tag (e.g. 'y') + * @property {string} content - Root level content (usually null/ignored) + * @property {array} subfields + */ + +/** + * @typedef {object} Bib + * @property {array} varFields - Array of varfields + */ + +const arrayUnique = require('./util').arrayUnique +const relatorMappings = require('../data/relator-mappings.json') + +class MarcSerializer { +} + +// Load rules form disc serialization: +MarcSerializer.mappingRules = require('../data/annotated-marc-rules.json') + .map((rule) => { + return Object.assign({}, rule, { + marcIndicatorRegExp: new RegExp(rule.marcIndicatorRegExp) + }) + }) + +MarcSerializer.orderedFieldTags = arrayUnique(MarcSerializer.mappingRules.map((rule) => rule.fieldTag)) + +/** + * Given the raw source of a webpub.def file, returns an array of usable + * rules that relate field labels to marc queries. + */ +MarcSerializer.parseWebpubToAnnotatedMarcRules = function (webpubContent) { + const mappingRules = webpubContent.split(/\n/) + .map((line) => line.trim()) + // Make sure line has content (after removing # comments) + .filter((line) => line && line.replace(/\s*#.*/, '')) + // Convert to columns: + .map((line) => line.split('|')) + // Convert to named columns: + .map((line) => { + return { + type: line[0], + fieldTag: line[1], + marcIndicatorPattern: line[2], + subfields: line[3], + label: line[4] + } + }) + // Make sure we're handling a 'bib' line + .filter((line) => line.type === 'b') + .map((line) => { + // Raw examples: + // b|s|8..|-6|Series||b| + // b|r|310|-6|Current Frequency||b| + // b|y|8[^5].|u|||b| + + const subfields = Array.from(line.subfields) + let subfieldSpec = { subfields, directive: 'include' } + if (subfields[0] === '-') subfieldSpec = { subfields: subfields.slice(1), directive: 'exclude' } + + return { + fieldTag: line.fieldTag, + marcIndicatorRegExp: new RegExp('^' + line.marcIndicatorPattern), + subfieldSpec, + label: line.label, + directive: line.label ? 'include' : 'exclude' + } + }) + + return mappingRules +} + +/** + * Given raw webpub.def content, builds an array of {AnnotatedMarcRule}s + */ +MarcSerializer.buildAnnotatedMarcRules = function (webpubContent) { + // No one can say why, but there's an "Added Title" entry that is commented + // out, but should not be. Un-comment the Added Title catch-all rule: + webpubContent = webpubContent.replace('# b|u||-06|Added Title||b|', 'b|u||-06|Added Title||b|') + + return MarcSerializer.parseWebpubToAnnotatedMarcRules(webpubContent) + // Apply label overrides + .map((rule) => { + // Override label for URLs. We want them labeled "Connect to": + if (rule.fieldTag === 'y' && rule.marcIndicatorRegExp.source === '^856') { + rule.label = 'Connect to:' + } + return rule + }) +} + +/** + * Given a sierra marc document, returns an array of varField bocks matching + * the given rule + */ +MarcSerializer.matchingMarcFields = function (bib, rule) { + return bib.varFields + .filter((field) => { + const fieldMarcIndicator = `${field.marcTag}${field.ind1 || ' '}${field.ind2 || ' '}` + return rule.marcIndicatorRegExp.test(fieldMarcIndicator) && + rule.fieldTag === field.fieldTag + }) +} + +/** + * Given a {VarField} and a {AnnotatedMarcRule}, returns true if matched. + * + * @return {boolean} + */ +MarcSerializer.varFieldMatches = function (field, rule) { + const fieldMarcIndicator = `${field.marcTag}${field.ind1 || ' '}${field.ind2 || ' '}` + return rule.marcIndicatorRegExp.test(fieldMarcIndicator) && + rule.fieldTag === field.fieldTag +} + +/** + * Given a varField, returns a copy with any hidden subfield content replaced + * with "[redacted]" based on given rule + */ +MarcSerializer.buildSourceWithMasking = function (field, rule) { + return Object.assign({}, field, { + subfields: (field.subfields || []) + .map((subfield) => { + let subfieldContent = subfield.content + // If directive is 'include' and subfield not included + // .. or directive is 'exclude', but subfield included, + // [redact] it: + if ((rule.subfieldSpec.directive === 'include' && rule.subfieldSpec.subfields.indexOf(subfield.tag) < 0) || + (rule.subfieldSpec.directive === 'exclude' && rule.subfieldSpec.subfields.indexOf(subfield.tag) >= 0)) { + subfieldContent = '[redacted]' + } + return Object.assign({}, subfield, { content: subfieldContent }) + }) + }) +} + +/** + * Get prefix for a marctag & subfield, given a previous subfield (if avail.) + * + * @param {string} marcTag - MARC tag (e.g. '651') + * @param {string} subfield - Subfield tag (e.g. 'x') + * @param {string} previousSubfield - Tag of preceding subfield if available. + */ +const prefixForSubfield = (marcTag, subfield, previousSubfield = null) => { + // By default, prefix should be ' ' if there's a previous subfield: + let prefix = previousSubfield ? ' ' : '' + + // If subfield is '4' and prev. subfield also '4', add a '.' + if (subfield === '4' && previousSubfield === '4') { + prefix = '. ' + + // Otherwise determine prefix by marcTag/subfield + } else { + switch (parseInt(marcTag)) { + // Subject fields: + case 600: + case 610: + case 611: + case 630: + case 650: + case 655: + case 651: + case 690: + switch (subfield) { + case 'v': + case 'x': + case 'y': + case 'z': + prefix = ' -- ' + break + } + break + } + } + return prefix +} + +/** + * Given an varfield block (presumably matching the given rule), returns + * an object representing the match. + */ +MarcSerializer.formatVarFieldMatch = function (matchingVarField, rule) { + const extractContent = function (subfield) { + if (subfield.tag === '4') { + let newContent = subfield.content + const words = newContent.split(' ') + words.forEach((word) => { + const replacer = word.length === 3 && relatorMappings[word] + if (replacer) { + newContent = newContent.replace(word, replacer) + } + }) + return newContent + } else { + return subfield.content + } + } + + const matchedSubfields = (matchingVarField.subfields || []).filter((subfield) => { + // If rule includes a subfields directive + if (rule.subfieldSpec) { + // Is subfields directive exclusionary? Remove matching: + if (rule.subfieldSpec.directive === 'exclude') return rule.subfieldSpec.subfields.indexOf(subfield.tag) < 0 + // ..Otherwise keep matching: + else return rule.subfieldSpec.subfields.indexOf(subfield.tag) >= 0 + } + return false + }) + if (!matchedSubfields.length) return + const content = matchingVarField.content || matchedSubfields + .map((subfield, ind) => { + const previousSubfield = matchedSubfields[ind - 1] + // Construct prefix based on subfield & prev. subfield: + const prefix = prefixForSubfield(matchingVarField.marcTag, subfield.tag, previousSubfield ? previousSubfield.tag : null) + return prefix + extractContent(subfield) + }).join('') + // Collect other field values apart from primary value: + const additionalFields = {} + + // For "Connect to:" mapped blocks, extract label: + if (rule.label === 'Connect to:') { + const labelSubfields = ['z', 'y', '3'] + additionalFields.label = (matchingVarField.subfields || []) + .filter((s) => labelSubfields.indexOf(s.tag) >= 0) + .map((s) => s.content) + // Make sure it has content: + .filter((s) => s) + .shift() + // If no label found, use URL as label + if (!additionalFields.label) additionalFields.label = content + } + + // Include source field with masked subfields: + const source = MarcSerializer.buildSourceWithMasking(matchingVarField, rule) + + return Object.assign(additionalFields, { content, source }) +} + +/** + * Given an array of varfield blocks and a annotated-marc rule + * returns an array of objects with `content` and `source` properties + */ +MarcSerializer.formatVarFieldMatches = function (matchingVarFields, rule) { + return matchingVarFields.map((field) => MarcSerializer.formatVarFieldMatch(field, rule)) +} + +/** + * Given a document, a label, and an array of values, adds values to doc + * + * @param {object} doc - The plainobject to update and return + * @param {string} label - The label to use + * @param {array} values - Array of values to add + * + * @returns {object} The updated document + */ +MarcSerializer.addStatementsToDoc = function (doc, rule, values) { + const label = rule.label + const fieldTag = rule.fieldTag + const fields = doc[fieldTag] + const last = fields[fields.length - 1] + if (last && last.label === label) { + last.values = last.values.concat(values) + } else { + fields.push({ label, values }) + } + return doc +} + +/** + * Given a doc and a matching rule, writes statement to doc for given varField + * + * @param {object} doc - The plainobject doc to write to + * @param {Bib} bib - Bib document (for use in looking up parallel fields) + * @param {VarField} varField - VarField from which to extract content. + * @param {AnnotatedMarcRule} rule - Rule to apply when extracting content + * (and looking up parallel fields) + * + */ +MarcSerializer.addStatementsForVarFieldForRule = function (doc, bib, varField, rule) { + const content = MarcSerializer.formatVarFieldMatch(varField, rule) + // + if (content) doc = MarcSerializer.addStatementsToDoc(doc, rule, [content]) + + const parallelNumbers = (varField.subfields || []) + .filter((s) => s.tag === '6') + .map((s) => s.content.replace(/^880-/, '')) + + if (parallelNumbers.length > 0) { + // Get parallel varfields: + const matchingParallels = MarcSerializer.matchingMarcFields(bib, Object.assign({}, rule, { fieldTag: 'y', marcIndicatorRegExp: /^880/ })) + .map((varField) => { + return { + field: varField, + linkingValue: (varField.subfields.filter((s) => s.tag === '6') || []) + .map((linkingSubfield) => linkingSubfield.content) + .pop() + } + }) + .filter((parallel) => parallelNumbers.some((parallelNumber) => parallel.linkingValue?.indexOf(parallelNumber) === 4)) + .map((parallel) => parallel.field) + + if (matchingParallels.length > 0) { + const parallelLabel = `Alternate Script for ${rule.label}` + const parallelContent = MarcSerializer.formatVarFieldMatches(matchingParallels, rule) + const pseudoRule = { label: parallelLabel, fieldTag: varField.fieldTag } + doc = MarcSerializer.addStatementsToDoc(doc, pseudoRule, parallelContent) + } + } + + return doc +} + +/** + * + * Given a SierraMarc bib document, returns a new document that presents + * fields queried via data/annotated-marc-rules.json, grouped by label, + * and including the marc source - with hidden subfield values redacted. + * + * Returns an object resembling: + * + * { + * bib: { + * fields: [ + * { + * label: "Title", + * values: [ + * { + * content: "Time is a flat circle", + * source: { ... } + * } + * ] + * } + * ] + * } + * } + */ + +MarcSerializer.setRules = function (rules) { + MarcSerializer.mappingRules = rules +} + +MarcSerializer.initialStateObjectForSerialization = function () { + return MarcSerializer.orderedFieldTags.reduce(function (acc, tag) { + acc[tag] = [] + return acc + }, {}) +} + +MarcSerializer.setRules(MarcSerializer.mappingRules) + +MarcSerializer.serialize = function (bib) { + const doc = bib.varFields.reduce((doc, field) => { + let foundMatch = false + + MarcSerializer.mappingRules.forEach((rule) => { + if (!foundMatch && MarcSerializer.varFieldMatches(field, rule)) { + if (rule.directive === 'include') { + doc = MarcSerializer.addStatementsForVarFieldForRule(doc, bib, field, rule) + } + foundMatch = true + } + }) + + return doc + }, MarcSerializer.initialStateObjectForSerialization()) + // Format for return to client: + return { + bib: { + unserialized: bib, + id: bib.id, + nyplSource: bib.nyplSource, + fields: Object.keys(doc) + .reduce((acc, fieldTag) => acc.concat(doc[fieldTag]), []) + } + } +} + +module.exports = MarcSerializer diff --git a/lib/resources.js b/lib/resources.js index 57532f96..346c24f0 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -231,6 +231,30 @@ module.exports = function (app, _private = null) { .then(AnnotatedMarcSerializer.serialize) } + // Get a single raw marc: + app.resources.marc = async function (params, opts) { + // Convert discovery id to nyplSource and un-prefixed id: + const nyplSourceMapper = await NyplSourceMapper.instance() + const { id, nyplSource } = nyplSourceMapper.splitIdentifier(params.uri) ?? {} + + if (!id || !nyplSource) { + throw new errors.InvalidParameterError(`Invalid bnum: ${params.uri}`) + } + + app.logger.debug('Resources#marc', { id, nyplSource }) + + return makeNyplDataApiClient().get(`bibs/${nyplSource}/${id}`) + .then((resp) => { + // need to check that the query actually found an entry + if (!resp.data) { + throw new errors.NotFoundError(`Record not found: bibs/${nyplSource}/${id}`) + } else { + return resp.data + } + }) + .then(MarcSerializer.serialize) + } + function itemsByFilter (filter, opts) { opts = Object.assign({ _source: null diff --git a/routes/resources.js b/routes/resources.js index 8e1dab30..dbc159d0 100644 --- a/routes/resources.js +++ b/routes/resources.js @@ -106,6 +106,8 @@ module.exports = function (app) { if (req.params.ext === 'annotated-marc') { handler = app.resources.annotatedMarc + } else if (req.params.ext === 'marc') { + handler = app.resources.marc } return handler(params, { baseUrl: app.baseUrl }, req) From d056e5e59788e2865f6372d540fd9ac2d64f4641 Mon Sep 17 00:00:00 2001 From: Emma Mansell <73774046+7emansell@users.noreply.github.com> Date: Mon, 29 Dec 2025 16:00:25 -0500 Subject: [PATCH 02/15] Cant test first try --- lib/marc-serializer.js | 430 +++++++++++++---------------------------- lib/resources.js | 1 + package-lock.json | 1 - 3 files changed, 135 insertions(+), 297 deletions(-) diff --git a/lib/marc-serializer.js b/lib/marc-serializer.js index 7a60c7dc..d85fc770 100644 --- a/lib/marc-serializer.js +++ b/lib/marc-serializer.js @@ -1,294 +1,127 @@ -/** - * @typedef {object} AnnotatedMarcRuleSubfieldSpec - * @property {array} subfields - Array of subfields to match - * @property {string} directive - Indicates whether the matching subfields - * should be "include"d or "exclude"d - */ - -/** - * - * @typedef {object} AnnotatedMarcRule - * @property {string} fieldTag - Single character tag broadly classifying tag (e.g. 'y') - * @property {string} marcIndicatorRegExp - Stringified regex for matching a - * VarField tag joined to 1st and 2nd indicators - * @property {AnnotatedMarcRuleSubfieldSpec} subfieldSpec - How to match subfields - * @property {string} label - What label to use in mapping - * @property {string} directive - Whether to include/exclude if matched. - */ - -/** - * @typedef {object} SubField - * @property {string} tag - Identifying tag (e.g. '6', 'a') - * @property {string} content - Value of subfield - */ - -/** - * @typedef {object} VarField - * @property {string} marcTag - Three digit number classifying field (e.g. '100') - * @property {string} fieldTag - Single character tag broadly classifying tag (e.g. 'y') - * @property {string} content - Root level content (usually null/ignored) - * @property {array} subfields - */ - -/** - * @typedef {object} Bib - * @property {array} varFields - Array of varfields - */ - const arrayUnique = require('./util').arrayUnique -const relatorMappings = require('../data/relator-mappings.json') -class MarcSerializer { -} +class MarcSerializer {} -// Load rules form disc serialization: +// Load rules from disc serialization: MarcSerializer.mappingRules = require('../data/annotated-marc-rules.json') - .map((rule) => { - return Object.assign({}, rule, { - marcIndicatorRegExp: new RegExp(rule.marcIndicatorRegExp) - }) - }) + .map((rule) => ({ + ...rule, + marcIndicatorRegExp: new RegExp(rule.marcIndicatorRegExp) + })) -MarcSerializer.orderedFieldTags = arrayUnique(MarcSerializer.mappingRules.map((rule) => rule.fieldTag)) +MarcSerializer.orderedFieldTags = arrayUnique( + MarcSerializer.mappingRules.map((rule) => rule.fieldTag) +) /** - * Given the raw source of a webpub.def file, returns an array of usable - * rules that relate field labels to marc queries. - */ -MarcSerializer.parseWebpubToAnnotatedMarcRules = function (webpubContent) { - const mappingRules = webpubContent.split(/\n/) - .map((line) => line.trim()) - // Make sure line has content (after removing # comments) - .filter((line) => line && line.replace(/\s*#.*/, '')) - // Convert to columns: - .map((line) => line.split('|')) - // Convert to named columns: - .map((line) => { - return { - type: line[0], - fieldTag: line[1], - marcIndicatorPattern: line[2], - subfields: line[3], - label: line[4] - } - }) - // Make sure we're handling a 'bib' line - .filter((line) => line.type === 'b') - .map((line) => { - // Raw examples: - // b|s|8..|-6|Series||b| - // b|r|310|-6|Current Frequency||b| - // b|y|8[^5].|u|||b| - - const subfields = Array.from(line.subfields) - let subfieldSpec = { subfields, directive: 'include' } - if (subfields[0] === '-') subfieldSpec = { subfields: subfields.slice(1), directive: 'exclude' } - - return { - fieldTag: line.fieldTag, - marcIndicatorRegExp: new RegExp('^' + line.marcIndicatorPattern), - subfieldSpec, - label: line.label, - directive: line.label ? 'include' : 'exclude' - } - }) - - return mappingRules -} - -/** - * Given raw webpub.def content, builds an array of {AnnotatedMarcRule}s - */ -MarcSerializer.buildAnnotatedMarcRules = function (webpubContent) { - // No one can say why, but there's an "Added Title" entry that is commented - // out, but should not be. Un-comment the Added Title catch-all rule: - webpubContent = webpubContent.replace('# b|u||-06|Added Title||b|', 'b|u||-06|Added Title||b|') - - return MarcSerializer.parseWebpubToAnnotatedMarcRules(webpubContent) - // Apply label overrides - .map((rule) => { - // Override label for URLs. We want them labeled "Connect to": - if (rule.fieldTag === 'y' && rule.marcIndicatorRegExp.source === '^856') { - rule.label = 'Connect to:' - } - return rule - }) -} - -/** - * Given a sierra marc document, returns an array of varField bocks matching + * Given a Sierra MARC document, returns an array of varField blocks matching * the given rule */ MarcSerializer.matchingMarcFields = function (bib, rule) { - return bib.varFields - .filter((field) => { - const fieldMarcIndicator = `${field.marcTag}${field.ind1 || ' '}${field.ind2 || ' '}` - return rule.marcIndicatorRegExp.test(fieldMarcIndicator) && - rule.fieldTag === field.fieldTag - }) + return bib.varFields.filter((field) => + MarcSerializer.varFieldMatches(field, rule) + ) } /** * Given a {VarField} and a {AnnotatedMarcRule}, returns true if matched. - * - * @return {boolean} */ MarcSerializer.varFieldMatches = function (field, rule) { - const fieldMarcIndicator = `${field.marcTag}${field.ind1 || ' '}${field.ind2 || ' '}` - return rule.marcIndicatorRegExp.test(fieldMarcIndicator) && - rule.fieldTag === field.fieldTag + const indicator = `${field.marcTag}${field.ind1 || ' '}${field.ind2 || ' '}` + return ( + rule.fieldTag === field.fieldTag && + rule.marcIndicatorRegExp.test(indicator) + ) } - /** * Given a varField, returns a copy with any hidden subfield content replaced - * with "[redacted]" based on given rule + * with '[redacted]' based on given rule */ MarcSerializer.buildSourceWithMasking = function (field, rule) { - return Object.assign({}, field, { - subfields: (field.subfields || []) - .map((subfield) => { - let subfieldContent = subfield.content - // If directive is 'include' and subfield not included - // .. or directive is 'exclude', but subfield included, - // [redact] it: - if ((rule.subfieldSpec.directive === 'include' && rule.subfieldSpec.subfields.indexOf(subfield.tag) < 0) || - (rule.subfieldSpec.directive === 'exclude' && rule.subfieldSpec.subfields.indexOf(subfield.tag) >= 0)) { - subfieldContent = '[redacted]' - } - return Object.assign({}, subfield, { content: subfieldContent }) - }) - }) -} - -/** - * Get prefix for a marctag & subfield, given a previous subfield (if avail.) - * - * @param {string} marcTag - MARC tag (e.g. '651') - * @param {string} subfield - Subfield tag (e.g. 'x') - * @param {string} previousSubfield - Tag of preceding subfield if available. - */ -const prefixForSubfield = (marcTag, subfield, previousSubfield = null) => { - // By default, prefix should be ' ' if there's a previous subfield: - let prefix = previousSubfield ? ' ' : '' - - // If subfield is '4' and prev. subfield also '4', add a '.' - if (subfield === '4' && previousSubfield === '4') { - prefix = '. ' + return { + ...field, + subfields: (field.subfields || []).map((subfield) => { + let content = subfield.content + + if ( + (rule.subfieldSpec.directive === 'include' && + !rule.subfieldSpec.subfields.includes(subfield.tag)) || + (rule.subfieldSpec.directive === 'exclude' && + rule.subfieldSpec.subfields.includes(subfield.tag)) + ) { + content = '[redacted]' + } - // Otherwise determine prefix by marcTag/subfield - } else { - switch (parseInt(marcTag)) { - // Subject fields: - case 600: - case 610: - case 611: - case 630: - case 650: - case 655: - case 651: - case 690: - switch (subfield) { - case 'v': - case 'x': - case 'y': - case 'z': - prefix = ' -- ' - break - } - break - } + return { ...subfield, content } + }) } - return prefix } /** - * Given an varfield block (presumably matching the given rule), returns - * an object representing the match. + * Given a varfield block, returns a structured annotated match */ MarcSerializer.formatVarFieldMatch = function (matchingVarField, rule) { - const extractContent = function (subfield) { - if (subfield.tag === '4') { - let newContent = subfield.content - const words = newContent.split(' ') - words.forEach((word) => { - const replacer = word.length === 3 && relatorMappings[word] - if (replacer) { - newContent = newContent.replace(word, replacer) - } - }) - return newContent - } else { - return subfield.content - } - } + const matchedSubfields = (matchingVarField.subfields || []).filter( + (subfield) => { + if (!rule.subfieldSpec) return false - const matchedSubfields = (matchingVarField.subfields || []).filter((subfield) => { - // If rule includes a subfields directive - if (rule.subfieldSpec) { - // Is subfields directive exclusionary? Remove matching: - if (rule.subfieldSpec.directive === 'exclude') return rule.subfieldSpec.subfields.indexOf(subfield.tag) < 0 - // ..Otherwise keep matching: - else return rule.subfieldSpec.subfields.indexOf(subfield.tag) >= 0 - } - return false - }) - if (!matchedSubfields.length) return - const content = matchingVarField.content || matchedSubfields - .map((subfield, ind) => { - const previousSubfield = matchedSubfields[ind - 1] - // Construct prefix based on subfield & prev. subfield: - const prefix = prefixForSubfield(matchingVarField.marcTag, subfield.tag, previousSubfield ? previousSubfield.tag : null) - return prefix + extractContent(subfield) - }).join('') - // Collect other field values apart from primary value: - const additionalFields = {} + if (rule.subfieldSpec.directive === 'exclude') { + return !rule.subfieldSpec.subfields.includes(subfield.tag) + } - // For "Connect to:" mapped blocks, extract label: - if (rule.label === 'Connect to:') { - const labelSubfields = ['z', 'y', '3'] - additionalFields.label = (matchingVarField.subfields || []) - .filter((s) => labelSubfields.indexOf(s.tag) >= 0) - .map((s) => s.content) - // Make sure it has content: - .filter((s) => s) - .shift() - // If no label found, use URL as label - if (!additionalFields.label) additionalFields.label = content - } + return rule.subfieldSpec.subfields.includes(subfield.tag) + } + ) - // Include source field with masked subfields: - const source = MarcSerializer.buildSourceWithMasking(matchingVarField, rule) + if (!matchedSubfields.length) return null - return Object.assign(additionalFields, { content, source }) + return { + label: rule.label, + marcTag: matchingVarField.marcTag, + fieldTag: matchingVarField.fieldTag, + indicators: [ + matchingVarField.ind1 || ' ', + matchingVarField.ind2 || ' ' + ], + subfields: matchedSubfields, + source: MarcSerializer.buildSourceWithMasking( + matchingVarField, + rule + ) + } } /** - * Given an array of varfield blocks and a annotated-marc rule - * returns an array of objects with `content` and `source` properties + * Given an array of varfield blocks and a rule, + * returns structured matches */ -MarcSerializer.formatVarFieldMatches = function (matchingVarFields, rule) { - return matchingVarFields.map((field) => MarcSerializer.formatVarFieldMatch(field, rule)) +MarcSerializer.formatVarFieldMatches = function ( + matchingVarFields, + rule +) { + return matchingVarFields + .map((field) => + MarcSerializer.formatVarFieldMatch(field, rule) + ) + .filter(Boolean) } /** - * Given a document, a label, and an array of values, adds values to doc - * - * @param {object} doc - The plainobject to update and return - * @param {string} label - The label to use - * @param {array} values - Array of values to add - * - * @returns {object} The updated document + * Given a document, a rule, and an array of values, adds them to doc */ MarcSerializer.addStatementsToDoc = function (doc, rule, values) { - const label = rule.label - const fieldTag = rule.fieldTag - const fields = doc[fieldTag] + const fields = doc[rule.fieldTag] const last = fields[fields.length - 1] - if (last && last.label === label) { + + if (last && last.label === rule.label) { last.values = last.values.concat(values) } else { - fields.push({ label, values }) + fields.push({ + label: rule.label, + values + }) } + return doc } @@ -298,38 +131,61 @@ MarcSerializer.addStatementsToDoc = function (doc, rule, values) { * @param {object} doc - The plainobject doc to write to * @param {Bib} bib - Bib document (for use in looking up parallel fields) * @param {VarField} varField - VarField from which to extract content. - * @param {AnnotatedMarcRule} rule - Rule to apply when extracting content + * @param {MarcRule} rule - Rule to apply when extracting content * (and looking up parallel fields) * */ -MarcSerializer.addStatementsForVarFieldForRule = function (doc, bib, varField, rule) { - const content = MarcSerializer.formatVarFieldMatch(varField, rule) - // - if (content) doc = MarcSerializer.addStatementsToDoc(doc, rule, [content]) +MarcSerializer.addStatementsForVarFieldForRule = function ( + doc, + bib, + varField, + rule +) { + const content = MarcSerializer.formatVarFieldMatch( + varField, + rule + ) + + if (content) { + MarcSerializer.addStatementsToDoc(doc, rule, [content]) + } const parallelNumbers = (varField.subfields || []) .filter((s) => s.tag === '6') .map((s) => s.content.replace(/^880-/, '')) if (parallelNumbers.length > 0) { - // Get parallel varfields: - const matchingParallels = MarcSerializer.matchingMarcFields(bib, Object.assign({}, rule, { fieldTag: 'y', marcIndicatorRegExp: /^880/ })) - .map((varField) => { - return { - field: varField, - linkingValue: (varField.subfields.filter((s) => s.tag === '6') || []) - .map((linkingSubfield) => linkingSubfield.content) - .pop() - } - }) - .filter((parallel) => parallelNumbers.some((parallelNumber) => parallel.linkingValue?.indexOf(parallelNumber) === 4)) - .map((parallel) => parallel.field) - - if (matchingParallels.length > 0) { + const matchingParallels = MarcSerializer.matchingMarcFields( + bib, + { ...rule, fieldTag: 'y', marcIndicatorRegExp: /^880/ } + ) + .map((vf) => ({ + field: vf, + linkingValue: (vf.subfields || []) + .filter((s) => s.tag === '6') + .map((s) => s.content) + .pop() + })) + .filter((p) => + parallelNumbers.some( + (n) => p.linkingValue?.indexOf(n) === 4 + ) + ) + .map((p) => p.field) + + if (matchingParallels.length) { const parallelLabel = `Alternate Script for ${rule.label}` - const parallelContent = MarcSerializer.formatVarFieldMatches(matchingParallels, rule) - const pseudoRule = { label: parallelLabel, fieldTag: varField.fieldTag } - doc = MarcSerializer.addStatementsToDoc(doc, pseudoRule, parallelContent) + const parallelContent = + MarcSerializer.formatVarFieldMatches( + matchingParallels, + rule + ) + + MarcSerializer.addStatementsToDoc( + doc, + { ...rule, label: parallelLabel }, + parallelContent.map((v) => ({ ...v, isParallel: true })) + ) } } @@ -337,36 +193,14 @@ MarcSerializer.addStatementsForVarFieldForRule = function (doc, bib, varField, r } /** - * - * Given a SierraMarc bib document, returns a new document that presents - * fields queried via data/annotated-marc-rules.json, grouped by label, - * and including the marc source - with hidden subfield values redacted. - * - * Returns an object resembling: - * - * { - * bib: { - * fields: [ - * { - * label: "Title", - * values: [ - * { - * content: "Time is a flat circle", - * source: { ... } - * } - * ] - * } - * ] - * } - * } + * Rule setup */ - MarcSerializer.setRules = function (rules) { MarcSerializer.mappingRules = rules } MarcSerializer.initialStateObjectForSerialization = function () { - return MarcSerializer.orderedFieldTags.reduce(function (acc, tag) { + return MarcSerializer.orderedFieldTags.reduce((acc, tag) => { acc[tag] = [] return acc }, {}) @@ -381,7 +215,12 @@ MarcSerializer.serialize = function (bib) { MarcSerializer.mappingRules.forEach((rule) => { if (!foundMatch && MarcSerializer.varFieldMatches(field, rule)) { if (rule.directive === 'include') { - doc = MarcSerializer.addStatementsForVarFieldForRule(doc, bib, field, rule) + MarcSerializer.addStatementsForVarFieldForRule( + doc, + bib, + field, + rule + ) } foundMatch = true } @@ -389,14 +228,13 @@ MarcSerializer.serialize = function (bib) { return doc }, MarcSerializer.initialStateObjectForSerialization()) - // Format for return to client: + return { bib: { unserialized: bib, id: bib.id, nyplSource: bib.nyplSource, - fields: Object.keys(doc) - .reduce((acc, fieldTag) => acc.concat(doc[fieldTag]), []) + fields: Object.values(doc).flat() } } } diff --git a/lib/resources.js b/lib/resources.js index 346c24f0..e0cf6c57 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -8,6 +8,7 @@ const AggregationSerializer = require('./jsonld_serializers.js').AggregationSeri const ItemResultsSerializer = require('./jsonld_serializers.js').ItemResultsSerializer const LocationLabelUpdater = require('./location_label_updater') const AnnotatedMarcSerializer = require('./annotated-marc-serializer') +const MarcSerializer = require('./marc-serializer') const { makeNyplDataApiClient } = require('./data-api-client') const { IndexSearchError, IndexConnectionError } = require('./errors') diff --git a/package-lock.json b/package-lock.json index cd0d3b06..c8f601d9 100644 --- a/package-lock.json +++ b/package-lock.json @@ -3689,7 +3689,6 @@ "version": "0.11.0", "resolved": "https://registry.npmjs.org/@pkgjs/parseargs/-/parseargs-0.11.0.tgz", "integrity": "sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==", - "dev": true, "optional": true, "engines": { "node": ">=14" From 56adb1a0510c439df252a73bd797ca38194baab7 Mon Sep 17 00:00:00 2001 From: Emma Mansell <73774046+7emansell@users.noreply.github.com> Date: Tue, 30 Dec 2025 11:12:40 -0500 Subject: [PATCH 03/15] Progress but can't test properly yet --- lib/annotated-marc-serializer.js | 24 +-- lib/marc-serializer.js | 298 +++++++++++-------------------- lib/marc-util.js | 39 ++++ package-lock.json | 1 + 4 files changed, 148 insertions(+), 214 deletions(-) create mode 100644 lib/marc-util.js diff --git a/lib/annotated-marc-serializer.js b/lib/annotated-marc-serializer.js index 4dd537fa..b400570e 100644 --- a/lib/annotated-marc-serializer.js +++ b/lib/annotated-marc-serializer.js @@ -37,6 +37,7 @@ const arrayUnique = require('./util').arrayUnique const relatorMappings = require('../data/relator-mappings.json') +const { varFieldMatches, buildSourceWithMasking } = require('./marc-util') class AnnotatedMarcSerializer { } @@ -133,32 +134,13 @@ AnnotatedMarcSerializer.matchingMarcFields = function (bib, rule) { * * @return {boolean} */ -AnnotatedMarcSerializer.varFieldMatches = function (field, rule) { - const fieldMarcIndicator = `${field.marcTag}${field.ind1 || ' '}${field.ind2 || ' '}` - return rule.marcIndicatorRegExp.test(fieldMarcIndicator) && - rule.fieldTag === field.fieldTag -} +AnnotatedMarcSerializer.varFieldMatches = varFieldMatches /** * Given a varField, returns a copy with any hidden subfield content replaced * with "[redacted]" based on given rule */ -AnnotatedMarcSerializer.buildSourceWithMasking = function (field, rule) { - return Object.assign({}, field, { - subfields: (field.subfields || []) - .map((subfield) => { - let subfieldContent = subfield.content - // If directive is 'include' and subfield not included - // .. or directive is 'exclude', but subfield included, - // [redact] it: - if ((rule.subfieldSpec.directive === 'include' && rule.subfieldSpec.subfields.indexOf(subfield.tag) < 0) || - (rule.subfieldSpec.directive === 'exclude' && rule.subfieldSpec.subfields.indexOf(subfield.tag) >= 0)) { - subfieldContent = '[redacted]' - } - return Object.assign({}, subfield, { content: subfieldContent }) - }) - }) -} +AnnotatedMarcSerializer.buildSourceWithMasking = buildSourceWithMasking /** * Get prefix for a marctag & subfield, given a previous subfield (if avail.) diff --git a/lib/marc-serializer.js b/lib/marc-serializer.js index d85fc770..3609b380 100644 --- a/lib/marc-serializer.js +++ b/lib/marc-serializer.js @@ -1,240 +1,152 @@ -const arrayUnique = require('./util').arrayUnique - -class MarcSerializer {} - -// Load rules from disc serialization: -MarcSerializer.mappingRules = require('../data/annotated-marc-rules.json') - .map((rule) => ({ - ...rule, - marcIndicatorRegExp: new RegExp(rule.marcIndicatorRegExp) - })) - -MarcSerializer.orderedFieldTags = arrayUnique( - MarcSerializer.mappingRules.map((rule) => rule.fieldTag) -) +/** + * @typedef {object} MarcRuleSubfieldSpec + * @property {array} subfields - Array of subfields to match for suppression + * @property {string} directive - Indicates whether the matching subfields + * should be "include"d or "exclude"d + */ +/** + * @typedef {object} MarcRule + * @property {string} fieldTag - Single character tag broadly classifying tag (e.g. 'y') + * @property {string} marcIndicatorRegExp - Stringified regex for matching a + * VarField tag joined to 1st and 2nd indicators + * @property {MarcRuleSubfieldSpec} subfieldSpec - How to match subfields + * @property {string} directive - Whether to include/exclude if matched. + */ /** - * Given a Sierra MARC document, returns an array of varField blocks matching - * the given rule + * @typedef {object} SubField + * @property {string} tag - Identifying tag (e.g. '6', 'a') + * @property {string} content - Value of subfield */ -MarcSerializer.matchingMarcFields = function (bib, rule) { - return bib.varFields.filter((field) => - MarcSerializer.varFieldMatches(field, rule) - ) -} /** - * Given a {VarField} and a {AnnotatedMarcRule}, returns true if matched. + * @typedef {object} VarField + * * @property {string} marcTag - Three digit number classifying field (e.g. '100') + * @property {string} fieldTag - Single character tag broadly classifying tag (e.g. 'y') + * @property {string} content - Root level content (usually null/ignored) + * @property {array} subfields + * @property {string|null} ind1 - First indicator character (space if blank) + * @property {string|null} ind2 - Second indicator character (space if blank) */ -MarcSerializer.varFieldMatches = function (field, rule) { - const indicator = `${field.marcTag}${field.ind1 || ' '}${field.ind2 || ' '}` - return ( - rule.fieldTag === field.fieldTag && - rule.marcIndicatorRegExp.test(indicator) - ) -} + /** - * Given a varField, returns a copy with any hidden subfield content replaced - * with '[redacted]' based on given rule + * @typedef {object} SerializedBib + * @property {string} id - Bib ID + * @property {string} nyplSource - MARC source + * @property {array} varFields - Array of varFields after suppression */ -MarcSerializer.buildSourceWithMasking = function (field, rule) { - return { - ...field, - subfields: (field.subfields || []).map((subfield) => { - let content = subfield.content - - if ( - (rule.subfieldSpec.directive === 'include' && - !rule.subfieldSpec.subfields.includes(subfield.tag)) || - (rule.subfieldSpec.directive === 'exclude' && - rule.subfieldSpec.subfields.includes(subfield.tag)) - ) { - content = '[redacted]' - } - - return { ...subfield, content } - }) - } -} /** - * Given a varfield block, returns a structured annotated match + * @typedef {object} SerializedMarc + * @property {SerializedBib} bib - The serialized bib object containing varFields */ -MarcSerializer.formatVarFieldMatch = function (matchingVarField, rule) { - const matchedSubfields = (matchingVarField.subfields || []).filter( - (subfield) => { - if (!rule.subfieldSpec) return false - if (rule.subfieldSpec.directive === 'exclude') { - return !rule.subfieldSpec.subfields.includes(subfield.tag) - } +const { varFieldMatches, buildSourceWithMasking } = require('./marc-util') - return rule.subfieldSpec.subfields.includes(subfield.tag) - } - ) +class MarcSerializer {} - if (!matchedSubfields.length) return null +// Load rules +MarcSerializer.mappingRules = require('../data/annotated-marc-rules.json') + .map((rule) => ({ + ...rule, + marcIndicatorRegExp: new RegExp(rule.marcIndicatorRegExp) + })) - return { - label: rule.label, - marcTag: matchingVarField.marcTag, - fieldTag: matchingVarField.fieldTag, - indicators: [ - matchingVarField.ind1 || ' ', - matchingVarField.ind2 || ' ' - ], - subfields: matchedSubfields, - source: MarcSerializer.buildSourceWithMasking( - matchingVarField, - rule - ) - } -} +/** + * Returns true if a field matches a given MARC rule + * @param {VarField} field - MARC field to test + * @param {MarcRule} rule - Rule to match against + * @returns {boolean} + */ +MarcSerializer.varFieldMatches = varFieldMatches /** - * Given an array of varfield blocks and a rule, - * returns structured matches + * Returns a copy of a varField with masked subfields according to the rule + * @param {VarField} field - MARC field to mask + * @param {MarcRule} rule - Rule defining subfields to mask + * @returns {VarField} Masked field */ -MarcSerializer.formatVarFieldMatches = function ( - matchingVarFields, - rule -) { - return matchingVarFields - .map((field) => - MarcSerializer.formatVarFieldMatch(field, rule) - ) - .filter(Boolean) -} +MarcSerializer.buildSourceWithMasking = buildSourceWithMasking /** - * Given a document, a rule, and an array of values, adds them to doc + * Check if a field is the LEADER + * @param {VarField} field - Field to check + * @returns {boolean} */ -MarcSerializer.addStatementsToDoc = function (doc, rule, values) { - const fields = doc[rule.fieldTag] - const last = fields[fields.length - 1] - - if (last && last.label === rule.label) { - last.values = last.values.concat(values) - } else { - fields.push({ - label: rule.label, - values - }) - } +MarcSerializer.isLeaderField = function (field) { + return field.fieldTag === '_' && field.marcTag === null && typeof field.content === 'string' +} - return doc +MarcSerializer.describeField = function (field) { + return `${field.marcTag}${field.ind1 || ' '}${field.ind2 || ' '}` } /** - * Given a doc and a matching rule, writes statement to doc for given varField - * - * @param {object} doc - The plainobject doc to write to - * @param {Bib} bib - Bib document (for use in looking up parallel fields) - * @param {VarField} varField - VarField from which to extract content. - * @param {MarcRule} rule - Rule to apply when extracting content - * (and looking up parallel fields) - * + * Finds linked 880 fields (parallel scripts) for a given field + * @param {Bib} bib - Bib object containing varFields + * @param {VarField} varField - Field to find parallels for + * @returns {Array} Array of parallel 880 fields */ -MarcSerializer.addStatementsForVarFieldForRule = function ( - doc, - bib, - varField, - rule -) { - const content = MarcSerializer.formatVarFieldMatch( - varField, - rule - ) - - if (content) { - MarcSerializer.addStatementsToDoc(doc, rule, [content]) - } - - const parallelNumbers = (varField.subfields || []) +MarcSerializer.findParallelFields = function (bib, varField) { + const linkNumbers = (varField.subfields || []) .filter((s) => s.tag === '6') .map((s) => s.content.replace(/^880-/, '')) - if (parallelNumbers.length > 0) { - const matchingParallels = MarcSerializer.matchingMarcFields( - bib, - { ...rule, fieldTag: 'y', marcIndicatorRegExp: /^880/ } - ) - .map((vf) => ({ - field: vf, - linkingValue: (vf.subfields || []) - .filter((s) => s.tag === '6') - .map((s) => s.content) - .pop() - })) - .filter((p) => - parallelNumbers.some( - (n) => p.linkingValue?.indexOf(n) === 4 - ) - ) - .map((p) => p.field) - - if (matchingParallels.length) { - const parallelLabel = `Alternate Script for ${rule.label}` - const parallelContent = - MarcSerializer.formatVarFieldMatches( - matchingParallels, - rule - ) - - MarcSerializer.addStatementsToDoc( - doc, - { ...rule, label: parallelLabel }, - parallelContent.map((v) => ({ ...v, isParallel: true })) - ) - } - } + if (!linkNumbers.length) return [] - return doc + return bib.varFields.filter((f) => { + if (!f.subfields || f.marcTag !== '880') return false + const fLinks = f.subfields + .filter((s) => s.tag === '6') + .map((s) => s.content) + return fLinks.some((link) => linkNumbers.some((n) => link.indexOf(n) === 4)) + }) } /** - * Rule setup + * Serializes a bib with suppressed fields + * @param {Bib} bib - Bib to serialize + * @returns {SerializedMarc} Serialized bib with masked varFields */ -MarcSerializer.setRules = function (rules) { - MarcSerializer.mappingRules = rules -} +MarcSerializer.serialize = function (bib) { + const suppressedVarFields = bib.varFields.map((field) => { + // pass LDR through + if (MarcSerializer.isLeaderField(field)) return field -MarcSerializer.initialStateObjectForSerialization = function () { - return MarcSerializer.orderedFieldTags.reduce((acc, tag) => { - acc[tag] = [] - return acc - }, {}) -} + const matchingRule = MarcSerializer.mappingRules.find((rule) => + MarcSerializer.varFieldMatches(field, rule) + ) -MarcSerializer.setRules(MarcSerializer.mappingRules) + if (!matchingRule) return field + if (matchingRule.directive !== 'include') return field -MarcSerializer.serialize = function (bib) { - const doc = bib.varFields.reduce((doc, field) => { - let foundMatch = false - - MarcSerializer.mappingRules.forEach((rule) => { - if (!foundMatch && MarcSerializer.varFieldMatches(field, rule)) { - if (rule.directive === 'include') { - MarcSerializer.addStatementsForVarFieldForRule( - doc, - bib, - field, - rule - ) - } - foundMatch = true - } + console.log( + 'MARC suppressing', + MarcSerializer.describeField(field), + `rule=${matchingRule.fieldTag}` + ) + + const maskedField = MarcSerializer.buildSourceWithMasking(field, matchingRule) + + // Handle parallel 880 fields + const parallels = MarcSerializer.findParallelFields(bib, field) + parallels.forEach((p) => { + console.log( + 'MARC suppressing parallel 880', + MarcSerializer.describeField(p), + `linked to ${MarcSerializer.describeField(field)}` + ) + Object.assign(p, MarcSerializer.buildSourceWithMasking(p, matchingRule)) }) - return doc - }, MarcSerializer.initialStateObjectForSerialization()) + return maskedField + }) return { bib: { - unserialized: bib, id: bib.id, nyplSource: bib.nyplSource, - fields: Object.values(doc).flat() + varFields: suppressedVarFields } } } diff --git a/lib/marc-util.js b/lib/marc-util.js new file mode 100644 index 00000000..b0a332d6 --- /dev/null +++ b/lib/marc-util.js @@ -0,0 +1,39 @@ +/** + * * Returns true if a field matches a given MARC rule + * @param {VarField} field + * @param {MarcRule} rule + * @returns {boolean} + */ +function varFieldMatches (field, rule) { + const indicator = `${field.marcTag || ''}${field.ind1 || ' '}${field.ind2 || ' '}` + return rule.fieldTag === field.fieldTag && rule.marcIndicatorRegExp.test(indicator) +} + +/** + * Returns a copy of a varField with masked subfields according to the rule + * @param {VarField} field + * @param {MarcRule} rule + * @returns {VarField} + */ +function buildSourceWithMasking (field, rule) { + return { + ...field, + subfields: (field.subfields || []).map((subfield) => { + let content = subfield.content + if ( + (rule.subfieldSpec.directive === 'include' && + !rule.subfieldSpec.subfields.includes(subfield.tag)) || + (rule.subfieldSpec.directive === 'exclude' && + rule.subfieldSpec.subfields.includes(subfield.tag)) + ) { + content = '[redacted]' + } + return { ...subfield, content } + }) + } +} + +module.exports = { + varFieldMatches, + buildSourceWithMasking +} diff --git a/package-lock.json b/package-lock.json index c8f601d9..cd0d3b06 100644 --- a/package-lock.json +++ b/package-lock.json @@ -3689,6 +3689,7 @@ "version": "0.11.0", "resolved": "https://registry.npmjs.org/@pkgjs/parseargs/-/parseargs-0.11.0.tgz", "integrity": "sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==", + "dev": true, "optional": true, "engines": { "node": ">=14" From 51a71bf075f41c4220bf277482916252ef842e5d Mon Sep 17 00:00:00 2001 From: Emma Mansell <73774046+7emansell@users.noreply.github.com> Date: Tue, 30 Dec 2025 11:35:13 -0500 Subject: [PATCH 04/15] sort --- lib/marc-serializer.js | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/lib/marc-serializer.js b/lib/marc-serializer.js index 3609b380..1c1b48a5 100644 --- a/lib/marc-serializer.js +++ b/lib/marc-serializer.js @@ -103,6 +103,23 @@ MarcSerializer.findParallelFields = function (bib, varField) { }) } +/** + * Sorts varFields numerically by marcTag, with LEADER first + * @param {Array} fields + * @returns {Array} + */ +function sortVarFields (fields) { + return fields.slice().sort((a, b) => { + // LDR first + if (a.marcTag === null) return -1 + if (b.marcTag === null) return 1 + + const tagA = parseInt(a.marcTag, 10) + const tagB = parseInt(b.marcTag, 10) + return tagA - tagB + }) +} + /** * Serializes a bib with suppressed fields * @param {Bib} bib - Bib to serialize @@ -146,7 +163,7 @@ MarcSerializer.serialize = function (bib) { bib: { id: bib.id, nyplSource: bib.nyplSource, - varFields: suppressedVarFields + varFields: sortVarFields(suppressedVarFields) } } } From e17ec22e689538d75ac9d4414f3c303f715b841e Mon Sep 17 00:00:00 2001 From: Emma Mansell <73774046+7emansell@users.noreply.github.com> Date: Tue, 30 Dec 2025 12:09:26 -0500 Subject: [PATCH 05/15] Unit tests --- test/marc-serializer.test.js | 276 +++++++++++++++++++++++++++++++++++ 1 file changed, 276 insertions(+) create mode 100644 test/marc-serializer.test.js diff --git a/test/marc-serializer.test.js b/test/marc-serializer.test.js new file mode 100644 index 00000000..9ae0a61c --- /dev/null +++ b/test/marc-serializer.test.js @@ -0,0 +1,276 @@ +const { expect } = require('chai') +const MarcSerializer = require('../lib/marc-serializer') + +const sampleBibNoParallels = { + id: 'testId', + nyplSource: 'testSource', + varFields: [ + { + fieldTag: 'a', + marcTag: '100', + content: null, + ind1: '1', + ind2: ' ', + subfields: [ + { tag: 'a', content: 'Porter, Bertha,' }, + { tag: 'd', content: '1852-1941.' } + ] + }, + { + fieldTag: 't', + marcTag: '245', + content: null, + ind1: '1', + ind2: '0', + subfields: [ + { tag: 'a', content: 'Topographical bibliography of ancient Egyptian hieroglyphic texts, reliefs, and paintings /' }, + { tag: 'c', content: 'by Bertha Porter and Rosalind L.B. Moss.' } + ] + }, + { + fieldTag: 'b', + marcTag: '700', + content: null, + ind1: '1', + ind2: ' ', + subfields: [ + { tag: 'a', content: 'Moss, Rosalind L. B.' }, + { tag: 'q', content: '(Rosalind Louisa Beaufort)' } + ] + }, + { + fieldTag: 'y', + marcTag: '005', + content: '20150416154259.0', + subfields: [], + ind1: null, + ind2: null + }, + { + fieldTag: 'y', + marcTag: '856', + content: null, + subfields: [ + { tag: 'u', content: 'This should be suppressed' }, + { tag: 'z', content: 'This is ok' } + ], + ind1: '4', + ind2: '0' + }, + { + fieldTag: '_', + marcTag: null, + content: '00000cam 2200769Ia 4500', + subfields: [], + ind1: null, + ind2: null + } + ] +} + +const sampleBibWithParallels = { + id: 'testId', + nyplSource: 'testSource', + varFields: [ + { + fieldTag: 'a', + marcTag: '100', + content: null, + ind1: '1', + ind2: ' ', + subfields: [ + { tag: 'a', content: 'Porter, Bertha,' }, + { tag: 'd', content: '1852-1941.' } + ] + }, + { + fieldTag: 't', + marcTag: '245', + content: null, + ind1: '1', + ind2: '0', + subfields: [ + { tag: 'a', content: 'Topographical bibliography of ancient Egyptian hieroglyphic texts, reliefs, and paintings /' }, + { tag: 'c', content: 'by Bertha Porter and Rosalind L.B. Moss.' } + ] + }, + { + fieldTag: 'b', + marcTag: '700', + content: null, + ind1: '1', + ind2: ' ', + subfields: [ + { tag: 'a', content: 'Moss, Rosalind L. B.' }, + { tag: 'q', content: '(Rosalind Louisa Beaufort)' } + ] + }, + { + fieldTag: 'y', + marcTag: '005', + content: '20150416154259.0', + subfields: [], + ind1: null, + ind2: null + }, + { + fieldTag: 'y', + marcTag: '856', + content: null, + subfields: [ + { tag: 'u', content: 'This should be suppressed' }, + { tag: 'z', content: 'This is ok' } + ], + ind1: '4', + ind2: '0' + }, + { + fieldTag: 'y', + marcTag: '880', + content: null, + subfields: [ + { tag: '6', content: '856-01' }, + { tag: 'u', content: 'Parallel to suppressed' }, + { tag: 'z', content: 'Parallel to ok' } + ], + ind1: '4', + ind2: '0' + }, + { + fieldTag: '_', + marcTag: null, + content: '00000cam 2200769Ia 4500', + subfields: [], + ind1: null, + ind2: null + } + ] +} + +describe('MarcSerializer', () => { + describe('serialize', () => { + let serialized + + before(() => { + serialized = MarcSerializer.serialize(sampleBibNoParallels) + }) + + it('preserves leader field', () => { + const leader = serialized.bib.varFields.find(f => f.fieldTag === '_') + expect(leader).to.exist() + expect(leader.content).to.equal('00000cam 2200769Ia 4500') + }) + + it('preserves non-suppressed fields', () => { + const field100 = serialized.bib.varFields.find(f => f.marcTag === '100') + expect(field100).to.exist() + expect(field100.subfields.map(sf => sf.content)).to.include('Porter, Bertha,') + }) + + it('masks included subfields according to rules', () => { + // Find the suppressed field 856 + const field856 = serialized.bib.varFields.find(f => f.marcTag === '856') + expect(field856).to.exist() + + // 856$u should be blanked + const subfieldU = field856.subfields.find(s => s.tag === 'u') + expect(subfieldU).to.exist() + expect(subfieldU.content).to.satisfy(c => c === null || c === '') + + // 856$z should remain unchanged + const subfieldZ = field856.subfields.find(s => s.tag === 'z') + expect(subfieldZ).to.exist() + expect(subfieldZ.content).to.equal('This is ok') + }) + + it('sorts varFields with leader first', () => { + expect(serialized.bib.varFields[0].fieldTag).to.equal('_') + }) + + it('keeps all other varFields present', () => { + const tags = serialized.bib.varFields.map(f => f.marcTag) + expect(tags).to.include.members(['100', '245', '700', '005']) + }) + }) + + describe('serialize with parallel 880 fields', () => { + let serialized + + before(() => { + serialized = MarcSerializer.serialize(sampleBibWithParallels) + }) + + it('suppresses included subfields in both main and parallel 880 fields', () => { + // Find the original 856 field that should be suppressed + const field856 = serialized.bib.varFields.find( + (f) => f.marcTag === '856' + ) + expect(field856).to.exist() + + // 856$u should be blanked + const subfieldU856 = field856.subfields.find((s) => s.tag === 'u') + expect(subfieldU856).to.exist() + expect(subfieldU856.content).to.satisfy((c) => c === null || c === '') + + // 856$z should remain unchanged + const subfieldZ856 = field856.subfields.find((s) => s.tag === 'z') + expect(subfieldZ856.content).to.equal('This is ok') + + // Find the parallel 880 field linked to 856 + const field880 = serialized.bib.varFields.find( + (f) => f.marcTag === '880' + ) + expect(field880).to.exist() + + // 880$u (parallel to suppressed) should also be blanked + const subfieldU880 = field880.subfields.find((s) => s.tag === 'u') + expect(subfieldU880).to.exist() + expect(subfieldU880.content).to.satisfy((c) => c === null || c === '') + + // 880$z (parallel to ok) should remain unchanged + const subfieldZ880 = field880.subfields.find((s) => s.tag === 'z') + expect(subfieldZ880.content).to.equal('Parallel to ok') + }) + }) + + describe('findParallelFields', () => { + it('returns empty array when no 880 fields are present', () => { + const field100 = sampleBibNoParallels.varFields.find(f => f.marcTag === '100') + const parallels = MarcSerializer.findParallelFields(sampleBibNoParallels, field100) + expect(parallels).to.be.an('array').that.is.empty() + }) + it('returns correct parallel 880 for a field', () => { + const field856 = sampleBibWithParallels.varFields.find(f => f.marcTag === '856') + const parallels = MarcSerializer.findParallelFields(sampleBibWithParallels, field856) + expect(parallels).to.have.lengthOf(1) + expect(parallels[0].marcTag).to.equal('880') + }) + }) + + describe('varFields sort order', () => { + it('places leader first and sorts other fields numerically by marcTag', () => { + const serialized = MarcSerializer.serialize(sampleBibWithParallels) + const varFields = serialized.bib.varFields + + // Leader should be first + expect(varFields[0].fieldTag).to.equal('_') + + // Remaining fields should be sorted ascending by marcTag + const marcTags = varFields.slice(1).map(f => parseInt(f.marcTag, 10)) + const sortedTags = [...marcTags].sort((a, b) => a - b) + expect(marcTags).to.deep.equal(sortedTags) + }) + }) + + describe('isLeaderField', () => { + it('correctly identifies leader field', () => { + const leader = sampleBibNoParallels.varFields[0] + expect(MarcSerializer.isLeaderField(leader)).to.be.true() + }) + + it('returns false for non-leader fields', () => { + const field100 = sampleBibNoParallels.varFields[1] + expect(MarcSerializer.isLeaderField(field100)).to.be.false() + }) + }) +}) From a999561750150c1e08e17312ef1658a05216c5a2 Mon Sep 17 00:00:00 2001 From: Emma Mansell <73774046+7emansell@users.noreply.github.com> Date: Tue, 30 Dec 2025 12:26:03 -0500 Subject: [PATCH 06/15] correct test syntax to run on GH --- lib/marc-serializer.js | 2 +- test/marc-serializer.test.js | 21 +++++++++------------ 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/lib/marc-serializer.js b/lib/marc-serializer.js index 1c1b48a5..c1a2a7bb 100644 --- a/lib/marc-serializer.js +++ b/lib/marc-serializer.js @@ -104,7 +104,7 @@ MarcSerializer.findParallelFields = function (bib, varField) { } /** - * Sorts varFields numerically by marcTag, with LEADER first + * Sorts varFields numerically by marcTag, with leader first * @param {Array} fields * @returns {Array} */ diff --git a/test/marc-serializer.test.js b/test/marc-serializer.test.js index 9ae0a61c..dff9a3a9 100644 --- a/test/marc-serializer.test.js +++ b/test/marc-serializer.test.js @@ -157,29 +157,27 @@ describe('MarcSerializer', () => { it('preserves leader field', () => { const leader = serialized.bib.varFields.find(f => f.fieldTag === '_') - expect(leader).to.exist() expect(leader.content).to.equal('00000cam 2200769Ia 4500') }) it('preserves non-suppressed fields', () => { const field100 = serialized.bib.varFields.find(f => f.marcTag === '100') - expect(field100).to.exist() expect(field100.subfields.map(sf => sf.content)).to.include('Porter, Bertha,') }) it('masks included subfields according to rules', () => { // Find the suppressed field 856 const field856 = serialized.bib.varFields.find(f => f.marcTag === '856') - expect(field856).to.exist() + expect(field856).to.not.equal(undefined) // 856$u should be blanked const subfieldU = field856.subfields.find(s => s.tag === 'u') - expect(subfieldU).to.exist() + expect(subfieldU).to.not.equal(undefined) expect(subfieldU.content).to.satisfy(c => c === null || c === '') // 856$z should remain unchanged const subfieldZ = field856.subfields.find(s => s.tag === 'z') - expect(subfieldZ).to.exist() + expect(subfieldZ).to.not.equal(undefined) expect(subfieldZ.content).to.equal('This is ok') }) @@ -193,7 +191,7 @@ describe('MarcSerializer', () => { }) }) - describe('serialize with parallel 880 fields', () => { + describe('serialize with parallels', () => { let serialized before(() => { @@ -205,11 +203,10 @@ describe('MarcSerializer', () => { const field856 = serialized.bib.varFields.find( (f) => f.marcTag === '856' ) - expect(field856).to.exist() // 856$u should be blanked const subfieldU856 = field856.subfields.find((s) => s.tag === 'u') - expect(subfieldU856).to.exist() + expect(subfieldU856).to.not.equal(undefined) expect(subfieldU856.content).to.satisfy((c) => c === null || c === '') // 856$z should remain unchanged @@ -220,11 +217,11 @@ describe('MarcSerializer', () => { const field880 = serialized.bib.varFields.find( (f) => f.marcTag === '880' ) - expect(field880).to.exist() + expect(field880).to.not.equal(undefined) // 880$u (parallel to suppressed) should also be blanked const subfieldU880 = field880.subfields.find((s) => s.tag === 'u') - expect(subfieldU880).to.exist() + expect(subfieldU880).to.not.equal(undefined) expect(subfieldU880.content).to.satisfy((c) => c === null || c === '') // 880$z (parallel to ok) should remain unchanged @@ -265,12 +262,12 @@ describe('MarcSerializer', () => { describe('isLeaderField', () => { it('correctly identifies leader field', () => { const leader = sampleBibNoParallels.varFields[0] - expect(MarcSerializer.isLeaderField(leader)).to.be.true() + expect(MarcSerializer.isLeaderField(leader)).to.equal(true) }) it('returns false for non-leader fields', () => { const field100 = sampleBibNoParallels.varFields[1] - expect(MarcSerializer.isLeaderField(field100)).to.be.false() + expect(MarcSerializer.isLeaderField(field100)).to.equal(false) }) }) }) From b60340c66487a5038b31dbfa0c8f6f81422b958d Mon Sep 17 00:00:00 2001 From: Emma Mansell <73774046+7emansell@users.noreply.github.com> Date: Mon, 5 Jan 2026 11:41:54 -0500 Subject: [PATCH 07/15] Fighting marc serializer unit test demons --- lib/marc-serializer.js | 65 ++++++++--------- test/marc-serializer.test.js | 137 +++++++++++++++-------------------- 2 files changed, 90 insertions(+), 112 deletions(-) diff --git a/lib/marc-serializer.js b/lib/marc-serializer.js index c1a2a7bb..d8ca10c0 100644 --- a/lib/marc-serializer.js +++ b/lib/marc-serializer.js @@ -67,7 +67,6 @@ MarcSerializer.varFieldMatches = varFieldMatches * @returns {VarField} Masked field */ MarcSerializer.buildSourceWithMasking = buildSourceWithMasking - /** * Check if a field is the LEADER * @param {VarField} field - Field to check @@ -110,7 +109,6 @@ MarcSerializer.findParallelFields = function (bib, varField) { */ function sortVarFields (fields) { return fields.slice().sort((a, b) => { - // LDR first if (a.marcTag === null) return -1 if (b.marcTag === null) return 1 @@ -121,43 +119,42 @@ function sortVarFields (fields) { } /** - * Serializes a bib with suppressed fields + * Serializes a bib with excluded fields and redacted subfields * @param {Bib} bib - Bib to serialize - * @returns {SerializedMarc} Serialized bib with masked varFields + * @returns {SerializedMarc} Serialized bib */ MarcSerializer.serialize = function (bib) { - const suppressedVarFields = bib.varFields.map((field) => { - // pass LDR through - if (MarcSerializer.isLeaderField(field)) return field - - const matchingRule = MarcSerializer.mappingRules.find((rule) => - MarcSerializer.varFieldMatches(field, rule) - ) - - if (!matchingRule) return field - if (matchingRule.directive !== 'include') return field - - console.log( - 'MARC suppressing', - MarcSerializer.describeField(field), - `rule=${matchingRule.fieldTag}` - ) - - const maskedField = MarcSerializer.buildSourceWithMasking(field, matchingRule) - - // Handle parallel 880 fields - const parallels = MarcSerializer.findParallelFields(bib, field) - parallels.forEach((p) => { - console.log( - 'MARC suppressing parallel 880', - MarcSerializer.describeField(p), - `linked to ${MarcSerializer.describeField(field)}` + const suppressedVarFields = bib.varFields + .map((field) => { + // Pass leader through + if (MarcSerializer.isLeaderField(field)) return field + + // Find matching rule + const matchingRule = MarcSerializer.mappingRules.find((rule) => + MarcSerializer.varFieldMatches(field, rule) ) - Object.assign(p, MarcSerializer.buildSourceWithMasking(p, matchingRule)) - }) - return maskedField - }) + // If no rule, leave as is + if (!matchingRule) return field + + // Handle field-level exclusion + if (matchingRule.directive === 'exclude') { + return null + } + + // Mask field according to rule (handles subfield-level include/exclude) + const maskedField = MarcSerializer.buildSourceWithMasking(field, matchingRule) + + // Handle parallel 880 fields + const parallels = MarcSerializer.findParallelFields(bib, field) + parallels.forEach((p) => { + Object.assign(p, MarcSerializer.buildSourceWithMasking(p, matchingRule)) + }) + + return maskedField + }) + // Remove any nulls from excluded fields + .filter(Boolean) return { bib: { diff --git a/test/marc-serializer.test.js b/test/marc-serializer.test.js index dff9a3a9..101a3c9c 100644 --- a/test/marc-serializer.test.js +++ b/test/marc-serializer.test.js @@ -1,7 +1,28 @@ const { expect } = require('chai') const MarcSerializer = require('../lib/marc-serializer') -const sampleBibNoParallels = { +// Mock mapping rules +MarcSerializer.mappingRules = [ + // 700 should be removed entirely + { + fieldTag: 'b', + marcIndicatorRegExp: /^700/, + directive: 'exclude', + subfieldSpec: null + }, + // 856$u should be blanked, 856$z kept + { + fieldTag: 'y', + marcIndicatorRegExp: /^856/, + directive: 'include', + subfieldSpec: { + subfields: ['u'], // redact only $u + directive: 'exclude' + } + } +] + +const sampleBib = { id: 'testId', nyplSource: 'testSource', varFields: [ @@ -38,20 +59,12 @@ const sampleBibNoParallels = { { tag: 'q', content: '(Rosalind Louisa Beaufort)' } ] }, - { - fieldTag: 'y', - marcTag: '005', - content: '20150416154259.0', - subfields: [], - ind1: null, - ind2: null - }, { fieldTag: 'y', marcTag: '856', content: null, subfields: [ - { tag: 'u', content: 'This should be suppressed' }, + { tag: 'u', content: 'This should be redacted' }, { tag: 'z', content: 'This is ok' } ], ind1: '4', @@ -105,44 +118,37 @@ const sampleBibWithParallels = { { tag: 'q', content: '(Rosalind Louisa Beaufort)' } ] }, - { - fieldTag: 'y', - marcTag: '005', - content: '20150416154259.0', - subfields: [], - ind1: null, - ind2: null - }, { fieldTag: 'y', marcTag: '856', content: null, subfields: [ - { tag: 'u', content: 'This should be suppressed' }, - { tag: 'z', content: 'This is ok' } + { tag: 'u', content: 'This should be redacted' }, + { tag: 'z', content: 'This is ok' }, + { tag: '6', content: '880-01' } ], ind1: '4', ind2: '0' }, + { + fieldTag: '_', + marcTag: null, + content: '00000cam 2200769Ia 4500', + subfields: [], + ind1: null, + ind2: null + }, { fieldTag: 'y', marcTag: '880', content: null, subfields: [ - { tag: '6', content: '856-01' }, - { tag: 'u', content: 'Parallel to suppressed' }, + { tag: '6', content: '856-01' }, // links to 856 + { tag: 'u', content: 'Parallel to redacted' }, { tag: 'z', content: 'Parallel to ok' } ], ind1: '4', ind2: '0' - }, - { - fieldTag: '_', - marcTag: null, - content: '00000cam 2200769Ia 4500', - subfields: [], - ind1: null, - ind2: null } ] } @@ -150,9 +156,8 @@ const sampleBibWithParallels = { describe('MarcSerializer', () => { describe('serialize', () => { let serialized - before(() => { - serialized = MarcSerializer.serialize(sampleBibNoParallels) + serialized = MarcSerializer.serialize(sampleBib) }) it('preserves leader field', () => { @@ -165,76 +170,52 @@ describe('MarcSerializer', () => { expect(field100.subfields.map(sf => sf.content)).to.include('Porter, Bertha,') }) - it('masks included subfields according to rules', () => { - // Find the suppressed field 856 + it('blanks subfields marked for exclusion', () => { const field856 = serialized.bib.varFields.find(f => f.marcTag === '856') - expect(field856).to.not.equal(undefined) - // 856$u should be blanked const subfieldU = field856.subfields.find(s => s.tag === 'u') - expect(subfieldU).to.not.equal(undefined) - expect(subfieldU.content).to.satisfy(c => c === null || c === '') + expect(subfieldU.content).to.equal('[redacted]') - // 856$z should remain unchanged const subfieldZ = field856.subfields.find(s => s.tag === 'z') - expect(subfieldZ).to.not.equal(undefined) expect(subfieldZ.content).to.equal('This is ok') }) - it('sorts varFields with leader first', () => { - expect(serialized.bib.varFields[0].fieldTag).to.equal('_') - }) - - it('keeps all other varFields present', () => { + it('keeps surviving fields present', () => { const tags = serialized.bib.varFields.map(f => f.marcTag) - expect(tags).to.include.members(['100', '245', '700', '005']) + // Null is the leader, 700 is removed + expect(tags).to.include.members([null, '100', '245', '856']) }) }) describe('serialize with parallels', () => { let serialized - before(() => { serialized = MarcSerializer.serialize(sampleBibWithParallels) }) - it('suppresses included subfields in both main and parallel 880 fields', () => { - // Find the original 856 field that should be suppressed - const field856 = serialized.bib.varFields.find( - (f) => f.marcTag === '856' - ) - - // 856$u should be blanked - const subfieldU856 = field856.subfields.find((s) => s.tag === 'u') - expect(subfieldU856).to.not.equal(undefined) - expect(subfieldU856.content).to.satisfy((c) => c === null || c === '') - - // 856$z should remain unchanged - const subfieldZ856 = field856.subfields.find((s) => s.tag === 'z') - expect(subfieldZ856.content).to.equal('This is ok') + it('blanks excluded subfields in main 856 and parallel 880', () => { + const field856 = serialized.bib.varFields.find(f => f.marcTag === '856') + const field880 = serialized.bib.varFields.find(f => f.marcTag === '880') - // Find the parallel 880 field linked to 856 - const field880 = serialized.bib.varFields.find( - (f) => f.marcTag === '880' - ) - expect(field880).to.not.equal(undefined) + // 856$u and 880$u should be redacted + const subU856 = field856.subfields.find(s => s.tag === 'u') + expect(subU856.content).to.equal('[redacted]') - // 880$u (parallel to suppressed) should also be blanked - const subfieldU880 = field880.subfields.find((s) => s.tag === 'u') - expect(subfieldU880).to.not.equal(undefined) - expect(subfieldU880.content).to.satisfy((c) => c === null || c === '') + const subU880 = field880.subfields.find(s => s.tag === 'u') + expect(subU880.content).to.equal('[redacted]') - // 880$z (parallel to ok) should remain unchanged - const subfieldZ880 = field880.subfields.find((s) => s.tag === 'z') - expect(subfieldZ880.content).to.equal('Parallel to ok') + // z subfields remain + expect(field856.subfields.find(s => s.tag === 'z').content).to.equal('This is ok') + expect(field880.subfields.find(s => s.tag === 'z').content).to.equal('Parallel to ok') }) }) describe('findParallelFields', () => { it('returns empty array when no 880 fields are present', () => { - const field100 = sampleBibNoParallels.varFields.find(f => f.marcTag === '100') - const parallels = MarcSerializer.findParallelFields(sampleBibNoParallels, field100) - expect(parallels).to.be.an('array').that.is.empty() + const field100 = sampleBib.varFields.find(f => f.marcTag === '100') + const parallels = MarcSerializer.findParallelFields(sampleBib, field100) + expect(parallels).to.be.an('array') + expect(parallels).to.have.lengthOf(0) }) it('returns correct parallel 880 for a field', () => { const field856 = sampleBibWithParallels.varFields.find(f => f.marcTag === '856') @@ -261,12 +242,12 @@ describe('MarcSerializer', () => { describe('isLeaderField', () => { it('correctly identifies leader field', () => { - const leader = sampleBibNoParallels.varFields[0] + const leader = sampleBib.varFields.find(field => field.fieldTag === '_') expect(MarcSerializer.isLeaderField(leader)).to.equal(true) }) it('returns false for non-leader fields', () => { - const field100 = sampleBibNoParallels.varFields[1] + const field100 = sampleBib.varFields[1] expect(MarcSerializer.isLeaderField(field100)).to.equal(false) }) }) From 5ba17c6a8ab644ea62abba1587c224926c93f6c9 Mon Sep 17 00:00:00 2001 From: Emma Mansell <73774046+7emansell@users.noreply.github.com> Date: Mon, 5 Jan 2026 12:05:21 -0500 Subject: [PATCH 08/15] clean up --- lib/annotated-marc-serializer.js | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/annotated-marc-serializer.js b/lib/annotated-marc-serializer.js index b400570e..24ace60f 100644 --- a/lib/annotated-marc-serializer.js +++ b/lib/annotated-marc-serializer.js @@ -374,7 +374,6 @@ AnnotatedMarcSerializer.serialize = function (bib) { // Format for return to client: return { bib: { - unserialized: bib, id: bib.id, nyplSource: bib.nyplSource, fields: Object.keys(doc) From a3e86b0343e492172d9bc1786c7610de0255ca06 Mon Sep 17 00:00:00 2001 From: Emma Mansell <73774046+7emansell@users.noreply.github.com> Date: Mon, 5 Jan 2026 14:35:45 -0500 Subject: [PATCH 09/15] more clean up --- lib/marc-serializer.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/marc-serializer.js b/lib/marc-serializer.js index d8ca10c0..1dbae51d 100644 --- a/lib/marc-serializer.js +++ b/lib/marc-serializer.js @@ -160,7 +160,7 @@ MarcSerializer.serialize = function (bib) { bib: { id: bib.id, nyplSource: bib.nyplSource, - varFields: sortVarFields(suppressedVarFields) + fields: sortVarFields(suppressedVarFields) } } } From 393cbed07897de55d8fc0512d58fb504ee66017f Mon Sep 17 00:00:00 2001 From: Emma Mansell <73774046+7emansell@users.noreply.github.com> Date: Mon, 5 Jan 2026 14:39:09 -0500 Subject: [PATCH 10/15] Plus tests --- lib/marc-serializer.js | 2 +- test/marc-serializer.test.js | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/lib/marc-serializer.js b/lib/marc-serializer.js index 1dbae51d..0cfbd583 100644 --- a/lib/marc-serializer.js +++ b/lib/marc-serializer.js @@ -33,7 +33,7 @@ * @typedef {object} SerializedBib * @property {string} id - Bib ID * @property {string} nyplSource - MARC source - * @property {array} varFields - Array of varFields after suppression + * @property {array} fields - Array of varFields after suppression */ /** diff --git a/test/marc-serializer.test.js b/test/marc-serializer.test.js index 101a3c9c..8ede98c8 100644 --- a/test/marc-serializer.test.js +++ b/test/marc-serializer.test.js @@ -161,17 +161,17 @@ describe('MarcSerializer', () => { }) it('preserves leader field', () => { - const leader = serialized.bib.varFields.find(f => f.fieldTag === '_') + const leader = serialized.bib.fields.find(f => f.fieldTag === '_') expect(leader.content).to.equal('00000cam 2200769Ia 4500') }) it('preserves non-suppressed fields', () => { - const field100 = serialized.bib.varFields.find(f => f.marcTag === '100') + const field100 = serialized.bib.fields.find(f => f.marcTag === '100') expect(field100.subfields.map(sf => sf.content)).to.include('Porter, Bertha,') }) it('blanks subfields marked for exclusion', () => { - const field856 = serialized.bib.varFields.find(f => f.marcTag === '856') + const field856 = serialized.bib.fields.find(f => f.marcTag === '856') const subfieldU = field856.subfields.find(s => s.tag === 'u') expect(subfieldU.content).to.equal('[redacted]') @@ -181,7 +181,7 @@ describe('MarcSerializer', () => { }) it('keeps surviving fields present', () => { - const tags = serialized.bib.varFields.map(f => f.marcTag) + const tags = serialized.bib.fields.map(f => f.marcTag) // Null is the leader, 700 is removed expect(tags).to.include.members([null, '100', '245', '856']) }) @@ -194,8 +194,8 @@ describe('MarcSerializer', () => { }) it('blanks excluded subfields in main 856 and parallel 880', () => { - const field856 = serialized.bib.varFields.find(f => f.marcTag === '856') - const field880 = serialized.bib.varFields.find(f => f.marcTag === '880') + const field856 = serialized.bib.fields.find(f => f.marcTag === '856') + const field880 = serialized.bib.fields.find(f => f.marcTag === '880') // 856$u and 880$u should be redacted const subU856 = field856.subfields.find(s => s.tag === 'u') @@ -225,16 +225,16 @@ describe('MarcSerializer', () => { }) }) - describe('varFields sort order', () => { + describe('fields sort order', () => { it('places leader first and sorts other fields numerically by marcTag', () => { const serialized = MarcSerializer.serialize(sampleBibWithParallels) - const varFields = serialized.bib.varFields + const fields = serialized.bib.fields // Leader should be first - expect(varFields[0].fieldTag).to.equal('_') + expect(fields[0].fieldTag).to.equal('_') // Remaining fields should be sorted ascending by marcTag - const marcTags = varFields.slice(1).map(f => parseInt(f.marcTag, 10)) + const marcTags = fields.slice(1).map(f => parseInt(f.marcTag, 10)) const sortedTags = [...marcTags].sort((a, b) => a - b) expect(marcTags).to.deep.equal(sortedTags) }) From 90a50f38b4b6dca3868302a1a32443cf5c029eed Mon Sep 17 00:00:00 2001 From: Emma Mansell <73774046+7emansell@users.noreply.github.com> Date: Mon, 5 Jan 2026 15:03:32 -0500 Subject: [PATCH 11/15] Pass control fields through --- lib/marc-serializer.js | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/lib/marc-serializer.js b/lib/marc-serializer.js index 0cfbd583..d18d08de 100644 --- a/lib/marc-serializer.js +++ b/lib/marc-serializer.js @@ -47,10 +47,11 @@ class MarcSerializer {} // Load rules MarcSerializer.mappingRules = require('../data/annotated-marc-rules.json') - .map((rule) => ({ - ...rule, - marcIndicatorRegExp: new RegExp(rule.marcIndicatorRegExp) - })) + .map((rule) => { + return Object.assign({}, rule, { + marcIndicatorRegExp: new RegExp(rule.marcIndicatorRegExp) + }) + }) /** * Returns true if a field matches a given MARC rule @@ -67,6 +68,7 @@ MarcSerializer.varFieldMatches = varFieldMatches * @returns {VarField} Masked field */ MarcSerializer.buildSourceWithMasking = buildSourceWithMasking + /** * Check if a field is the LEADER * @param {VarField} field - Field to check @@ -76,6 +78,15 @@ MarcSerializer.isLeaderField = function (field) { return field.fieldTag === '_' && field.marcTag === null && typeof field.content === 'string' } +/** + * Check if a field is a control field (MARC tags 001–009 and no subfields) + * @param {VarField} field - Field to check + * @returns {boolean} + */ +MarcSerializer.isControlField = function (field) { + return field.marcTag && /^[0][0-9][0-9]$/.test(field.marcTag) && (!field.subfields || field.subfields.length === 0) +} + MarcSerializer.describeField = function (field) { return `${field.marcTag}${field.ind1 || ' '}${field.ind2 || ' '}` } @@ -129,6 +140,9 @@ MarcSerializer.serialize = function (bib) { // Pass leader through if (MarcSerializer.isLeaderField(field)) return field + // Pass control fields through + if (MarcSerializer.isControlField(field)) return field + // Find matching rule const matchingRule = MarcSerializer.mappingRules.find((rule) => MarcSerializer.varFieldMatches(field, rule) @@ -157,6 +171,7 @@ MarcSerializer.serialize = function (bib) { .filter(Boolean) return { + unserialized: bib, bib: { id: bib.id, nyplSource: bib.nyplSource, From 145a2cde8099aaea322067c10fe5298dc901508f Mon Sep 17 00:00:00 2001 From: Emma Mansell <73774046+7emansell@users.noreply.github.com> Date: Tue, 6 Jan 2026 09:56:02 -0500 Subject: [PATCH 12/15] Remove sorting i reconsidered --- lib/marc-serializer.js | 19 +------------------ test/marc-serializer.test.js | 35 ++++++++++++++++++++--------------- 2 files changed, 21 insertions(+), 33 deletions(-) diff --git a/lib/marc-serializer.js b/lib/marc-serializer.js index d18d08de..ee554659 100644 --- a/lib/marc-serializer.js +++ b/lib/marc-serializer.js @@ -113,22 +113,6 @@ MarcSerializer.findParallelFields = function (bib, varField) { }) } -/** - * Sorts varFields numerically by marcTag, with leader first - * @param {Array} fields - * @returns {Array} - */ -function sortVarFields (fields) { - return fields.slice().sort((a, b) => { - if (a.marcTag === null) return -1 - if (b.marcTag === null) return 1 - - const tagA = parseInt(a.marcTag, 10) - const tagB = parseInt(b.marcTag, 10) - return tagA - tagB - }) -} - /** * Serializes a bib with excluded fields and redacted subfields * @param {Bib} bib - Bib to serialize @@ -171,11 +155,10 @@ MarcSerializer.serialize = function (bib) { .filter(Boolean) return { - unserialized: bib, bib: { id: bib.id, nyplSource: bib.nyplSource, - fields: sortVarFields(suppressedVarFields) + fields: suppressedVarFields } } } diff --git a/test/marc-serializer.test.js b/test/marc-serializer.test.js index 8ede98c8..d876bdaf 100644 --- a/test/marc-serializer.test.js +++ b/test/marc-serializer.test.js @@ -37,6 +37,14 @@ const sampleBib = { { tag: 'd', content: '1852-1941.' } ] }, + { + fieldTag: 'y', + marcTag: '008', + content: ' cyyyy2011nyua f 000 faeng dnam a ', + ind1: '', + ind2: '', + subfields: [] + }, { fieldTag: 't', marcTag: '245', @@ -225,21 +233,6 @@ describe('MarcSerializer', () => { }) }) - describe('fields sort order', () => { - it('places leader first and sorts other fields numerically by marcTag', () => { - const serialized = MarcSerializer.serialize(sampleBibWithParallels) - const fields = serialized.bib.fields - - // Leader should be first - expect(fields[0].fieldTag).to.equal('_') - - // Remaining fields should be sorted ascending by marcTag - const marcTags = fields.slice(1).map(f => parseInt(f.marcTag, 10)) - const sortedTags = [...marcTags].sort((a, b) => a - b) - expect(marcTags).to.deep.equal(sortedTags) - }) - }) - describe('isLeaderField', () => { it('correctly identifies leader field', () => { const leader = sampleBib.varFields.find(field => field.fieldTag === '_') @@ -251,4 +244,16 @@ describe('MarcSerializer', () => { expect(MarcSerializer.isLeaderField(field100)).to.equal(false) }) }) + + describe('isControlField', () => { + it('correctly identifies control field', () => { + const control = sampleBib.varFields.find(field => field.marcTag === '008') + expect(MarcSerializer.isControlField(control)).to.equal(true) + }) + + it('returns false for data fields', () => { + const field100 = sampleBib.varFields[1] + expect(MarcSerializer.isControlField(field100)).to.equal(false) + }) + }) }) From 3ebf6a133889619977976526eae3fe0dad20a086 Mon Sep 17 00:00:00 2001 From: Emma Mansell <73774046+7emansell@users.noreply.github.com> Date: Tue, 6 Jan 2026 09:56:34 -0500 Subject: [PATCH 13/15] test --- test/marc-serializer.test.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/marc-serializer.test.js b/test/marc-serializer.test.js index d876bdaf..e29b62df 100644 --- a/test/marc-serializer.test.js +++ b/test/marc-serializer.test.js @@ -252,7 +252,7 @@ describe('MarcSerializer', () => { }) it('returns false for data fields', () => { - const field100 = sampleBib.varFields[1] + const field100 = sampleBib.varFields[0] expect(MarcSerializer.isControlField(field100)).to.equal(false) }) }) From 61b57ebad7dcf89688e5a2f2ccb30d1a5de18362 Mon Sep 17 00:00:00 2001 From: Emma Mansell <73774046+7emansell@users.noreply.github.com> Date: Tue, 6 Jan 2026 14:10:39 -0500 Subject: [PATCH 14/15] break down findParallelFields into readable chunks, rename suppressedVarFields --- lib/marc-serializer.js | 60 ++++++++++++++++++++++++++++++------------ 1 file changed, 43 insertions(+), 17 deletions(-) diff --git a/lib/marc-serializer.js b/lib/marc-serializer.js index ee554659..949a2291 100644 --- a/lib/marc-serializer.js +++ b/lib/marc-serializer.js @@ -94,23 +94,49 @@ MarcSerializer.describeField = function (field) { /** * Finds linked 880 fields (parallel scripts) for a given field * @param {Bib} bib - Bib object containing varFields - * @param {VarField} varField - Field to find parallels for + * @param {VarField} sourceField - Field to find parallels for * @returns {Array} Array of parallel 880 fields */ -MarcSerializer.findParallelFields = function (bib, varField) { - const linkNumbers = (varField.subfields || []) - .filter((s) => s.tag === '6') - .map((s) => s.content.replace(/^880-/, '')) - - if (!linkNumbers.length) return [] - - return bib.varFields.filter((f) => { - if (!f.subfields || f.marcTag !== '880') return false - const fLinks = f.subfields - .filter((s) => s.tag === '6') - .map((s) => s.content) - return fLinks.some((link) => linkNumbers.some((n) => link.indexOf(n) === 4)) - }) +MarcSerializer.findParallelFields = function (bib, sourceField) { + const linkNumbers = extractLinkingNumbers(sourceField) + if (linkNumbers.length === 0) return [] + + return bib.varFields.filter((field) => + isLinked880Field(field, linkNumbers) + ) +} + +/** + * Extracts linking numbers from subfield 6, removing the 880- prefix + */ +function extractLinkingNumbers (varField) { + return (varField.subfields || []) + // Is a MARC linking subfield ($6)? + .filter((subfield) => subfield.tag === '6') + .map((subfield) => subfield.content.replace(/^880-/, '')) +} + +/** + * Determines whether a field is an 880 field linked to any of the given numbers + */ +function isLinked880Field (field, linkNumbers) { + if (field.marcTag !== '880' || !field.subfields) return false + + const fieldLinks = field.subfields + // Is a MARC linking subfield ($6)? + .filter((subfield) => subfield.tag === '6') + .map((subfield) => subfield.content) + + return fieldLinks.some((link) => + linkNumbers.some((linkNumber) => isMatchingLink(link, linkNumber)) + ) +} + +/** + * Checks whether a link contains the link number at position 4 + */ +function isMatchingLink (link, linkNumber) { + return link.indexOf(linkNumber) === 4 } /** @@ -119,7 +145,7 @@ MarcSerializer.findParallelFields = function (bib, varField) { * @returns {SerializedMarc} Serialized bib */ MarcSerializer.serialize = function (bib) { - const suppressedVarFields = bib.varFields + const serializedVarFields = bib.varFields .map((field) => { // Pass leader through if (MarcSerializer.isLeaderField(field)) return field @@ -158,7 +184,7 @@ MarcSerializer.serialize = function (bib) { bib: { id: bib.id, nyplSource: bib.nyplSource, - fields: suppressedVarFields + fields: serializedVarFields } } } From a60b0dc1ae721c3a61a7ddd00c9d6874f6a83422 Mon Sep 17 00:00:00 2001 From: Emma Mansell <73774046+7emansell@users.noreply.github.com> Date: Wed, 7 Jan 2026 13:32:25 -0500 Subject: [PATCH 15/15] Removing subfields instead of writing them as "redacted" --- lib/marc-serializer.js | 2 +- lib/marc-util.js | 9 ++++----- test/annotated-marc-rules.test.js | 14 ++++---------- test/marc-serializer.test.js | 12 ++++++------ 4 files changed, 15 insertions(+), 22 deletions(-) diff --git a/lib/marc-serializer.js b/lib/marc-serializer.js index 949a2291..a30b8e79 100644 --- a/lib/marc-serializer.js +++ b/lib/marc-serializer.js @@ -140,7 +140,7 @@ function isMatchingLink (link, linkNumber) { } /** - * Serializes a bib with excluded fields and redacted subfields + * Serializes a bib with excluded fields and subfields * @param {Bib} bib - Bib to serialize * @returns {SerializedMarc} Serialized bib */ diff --git a/lib/marc-util.js b/lib/marc-util.js index b0a332d6..aec9dd31 100644 --- a/lib/marc-util.js +++ b/lib/marc-util.js @@ -10,7 +10,7 @@ function varFieldMatches (field, rule) { } /** - * Returns a copy of a varField with masked subfields according to the rule + * Returns a copy of a varField with removed subfields according to the rule * @param {VarField} field * @param {MarcRule} rule * @returns {VarField} @@ -18,17 +18,16 @@ function varFieldMatches (field, rule) { function buildSourceWithMasking (field, rule) { return { ...field, - subfields: (field.subfields || []).map((subfield) => { - let content = subfield.content + subfields: (field.subfields || []).filter((subfield) => { if ( (rule.subfieldSpec.directive === 'include' && !rule.subfieldSpec.subfields.includes(subfield.tag)) || (rule.subfieldSpec.directive === 'exclude' && rule.subfieldSpec.subfields.includes(subfield.tag)) ) { - content = '[redacted]' + return false } - return { ...subfield, content } + return true }) } } diff --git a/test/annotated-marc-rules.test.js b/test/annotated-marc-rules.test.js index 18723665..a3086611 100644 --- a/test/annotated-marc-rules.test.js +++ b/test/annotated-marc-rules.test.js @@ -329,13 +329,10 @@ describe('Annotated Marc Rules', function () { expect(maskedSource).to.be.a('object') expect(maskedSource.marcTag).to.equal('245') expect(maskedSource.subfields).to.be.a('array') - expect(maskedSource.subfields).to.have.lengthOf(2) + expect(maskedSource.subfields).to.have.lengthOf(1) expect(maskedSource.subfields[0]).to.be.a('object') expect(maskedSource.subfields[0].tag).to.equal('a') expect(maskedSource.subfields[0].content).to.equal('Razvedchik') - expect(maskedSource.subfields[1]).to.be.a('object') - expect(maskedSource.subfields[1].tag).to.equal('h') - expect(maskedSource.subfields[1].content).to.equal('[redacted]') }) it('should mask subfields not included', function () { @@ -360,13 +357,10 @@ describe('Annotated Marc Rules', function () { expect(maskedSource).to.be.a('object') expect(maskedSource.marcTag).to.equal('245') expect(maskedSource.subfields).to.be.a('array') - expect(maskedSource.subfields).to.have.lengthOf(2) + expect(maskedSource.subfields).to.have.lengthOf(1) expect(maskedSource.subfields[0]).to.be.a('object') - expect(maskedSource.subfields[0].tag).to.equal('a') - expect(maskedSource.subfields[0].content).to.equal('[redacted]') - expect(maskedSource.subfields[1]).to.be.a('object') - expect(maskedSource.subfields[1].tag).to.equal('h') - expect(maskedSource.subfields[1].content).to.equal('[microform] :') + expect(maskedSource.subfields[0].tag).to.equal('h') + expect(maskedSource.subfields[0].content).to.equal('[microform] :') }) }) diff --git a/test/marc-serializer.test.js b/test/marc-serializer.test.js index e29b62df..cdb079da 100644 --- a/test/marc-serializer.test.js +++ b/test/marc-serializer.test.js @@ -178,11 +178,11 @@ describe('MarcSerializer', () => { expect(field100.subfields.map(sf => sf.content)).to.include('Porter, Bertha,') }) - it('blanks subfields marked for exclusion', () => { + it('removes subfields marked for exclusion', () => { const field856 = serialized.bib.fields.find(f => f.marcTag === '856') const subfieldU = field856.subfields.find(s => s.tag === 'u') - expect(subfieldU.content).to.equal('[redacted]') + expect(subfieldU).to.equal(undefined) const subfieldZ = field856.subfields.find(s => s.tag === 'z') expect(subfieldZ.content).to.equal('This is ok') @@ -201,16 +201,16 @@ describe('MarcSerializer', () => { serialized = MarcSerializer.serialize(sampleBibWithParallels) }) - it('blanks excluded subfields in main 856 and parallel 880', () => { + it('removes excluded subfields in main 856 and parallel 880', () => { const field856 = serialized.bib.fields.find(f => f.marcTag === '856') const field880 = serialized.bib.fields.find(f => f.marcTag === '880') - // 856$u and 880$u should be redacted + // 856$u and 880$u should be removed const subU856 = field856.subfields.find(s => s.tag === 'u') - expect(subU856.content).to.equal('[redacted]') + expect(subU856).to.equal(undefined) const subU880 = field880.subfields.find(s => s.tag === 'u') - expect(subU880.content).to.equal('[redacted]') + expect(subU880).to.equal(undefined) // z subfields remain expect(field856.subfields.find(s => s.tag === 'z').content).to.equal('This is ok')