diff --git a/CMakeLists.txt b/CMakeLists.txt index 99491e97d..520c55ab2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -45,6 +45,8 @@ target_include_directories(pugixml PUBLIC ${PRIVATE_DIR}) # own directory, so generator-owned code is recognizable at a glance) and # arrive via sources.cmake (emitted by `python3 -m gen gen/cpp/config.toml`). set(MX_CORE_RUNTIME_SOURCES + ${PRIVATE_DIR}/mx/core/Attribution.cpp + ${PRIVATE_DIR}/mx/core/Attribution.h ${PRIVATE_DIR}/mx/core/Decimal.cpp ${PRIVATE_DIR}/mx/core/Decimal.h ${PRIVATE_DIR}/mx/core/Error.h @@ -60,7 +62,31 @@ set(MX_CORE_RUNTIME_SOURCES ${PRIVATE_DIR}/mx/core/Xml.h) set(MX_CORE_GENERATED_SOURCES "") include(${PRIVATE_DIR}/mx/core/generated/sources.cmake OPTIONAL) -add_library(mx_core STATIC ${MX_CORE_RUNTIME_SOURCES} ${MX_CORE_GENERATED_SOURCES}) + +# mx::core::gitSha, captured at BUILD time. A configure-time capture would go +# stale the moment HEAD moved, because an IDE rebuild (Xcode/Ninja/Make) never +# re-runs CMake. So the capture is a build step baked into the project: it +# re-checks git every build and rewrites the TU only when the value changed. +# See cmake/GitSha.cmake. mx_core depends on the target so the file exists +# before it compiles; GENERATED lets CMake accept a path that the first +# configure has not produced yet. +find_package(Git QUIET) +set(MX_GIT_SHA_CPP "${CMAKE_BINARY_DIR}/generated/GitSha.cpp") +add_custom_target(mx_git_sha ALL + BYPRODUCTS "${MX_GIT_SHA_CPP}" + COMMAND "${CMAKE_COMMAND}" + -DGIT_EXECUTABLE=${GIT_EXECUTABLE} + -DMX_GIT_SHA_SRC_DIR=${CMAKE_CURRENT_SOURCE_DIR} + -DMX_GIT_SHA_TEMPLATE=${PRIVATE_DIR}/mx/core/Version.cpp.in + -DMX_GIT_SHA_OUT=${MX_GIT_SHA_CPP} + -P "${CMAKE_CURRENT_SOURCE_DIR}/cmake/GitSha.cmake" + COMMENT "Capturing git SHA" + VERBATIM) +set_source_files_properties("${MX_GIT_SHA_CPP}" PROPERTIES GENERATED TRUE) + +add_library(mx_core STATIC + ${MX_CORE_RUNTIME_SOURCES} ${MX_CORE_GENERATED_SOURCES} ${MX_GIT_SHA_CPP}) +add_dependencies(mx_core mx_git_sha) target_include_directories(mx_core PUBLIC ${PRIVATE_DIR}) target_link_libraries(mx_core PUBLIC pugixml) target_compile_features(mx_core PUBLIC cxx_std_20) diff --git a/cmake/GitSha.cmake b/cmake/GitSha.cmake new file mode 100644 index 000000000..2e61c7455 --- /dev/null +++ b/cmake/GitSha.cmake @@ -0,0 +1,41 @@ +# MusicXML Class Library +# Copyright (c) by Matthew James Briggs +# Distributed under the MIT License + +# Build-time git SHA capture, run via `cmake -P` as a build step so the value +# survives an IDE rebuild that never re-runs CMake. Rewrites the output TU only +# when the SHA changes (copy_if_different), keeping incremental builds warm. +# Falls back to "unknown" with no git or no checkout, so source-archive builds +# still compile. +# +# Inputs (-D): GIT_EXECUTABLE, MX_GIT_SHA_SRC_DIR, MX_GIT_SHA_TEMPLATE, +# MX_GIT_SHA_OUT. + +set(MX_GIT_SHA "unknown") + +if(GIT_EXECUTABLE) + execute_process( + COMMAND "${GIT_EXECUTABLE}" -C "${MX_GIT_SHA_SRC_DIR}" rev-parse --short=12 HEAD + OUTPUT_VARIABLE _sha + OUTPUT_STRIP_TRAILING_WHITESPACE + RESULT_VARIABLE _rc + ERROR_QUIET) + if(_rc EQUAL 0 AND _sha) + set(MX_GIT_SHA "${_sha}") + # -dirty marks a build from an unclean tree: any modified tracked file + # or any untracked file (gitignored build artifacts never appear here). + execute_process( + COMMAND "${GIT_EXECUTABLE}" -C "${MX_GIT_SHA_SRC_DIR}" status --porcelain + OUTPUT_VARIABLE _dirty + OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_QUIET) + if(_dirty) + set(MX_GIT_SHA "${MX_GIT_SHA}-dirty") + endif() + endif() +endif() + +configure_file("${MX_GIT_SHA_TEMPLATE}" "${MX_GIT_SHA_OUT}.tmp" @ONLY) +execute_process(COMMAND "${CMAKE_COMMAND}" -E copy_if_different + "${MX_GIT_SHA_OUT}.tmp" "${MX_GIT_SHA_OUT}") +file(REMOVE "${MX_GIT_SHA_OUT}.tmp") diff --git a/src/private/mx/api/DocumentManager.cpp b/src/private/mx/api/DocumentManager.cpp index 7711c9a68..e0686ada3 100644 --- a/src/private/mx/api/DocumentManager.cpp +++ b/src/private/mx/api/DocumentManager.cpp @@ -3,6 +3,7 @@ // Distributed under the MIT License #include "mx/api/DocumentManager.h" +#include "mx/core/Attribution.h" #include "mx/core/Error.h" #include "mx/core/generated/Document.h" #include "mx/impl/ScoreConversions.h" @@ -256,7 +257,7 @@ Result DocumentManager::writeToFile(int documentId, const std::string &fil } pugi::xml_document xdoc; - core::serialize(withWriteVersion(*it->second), xdoc); + core::serializeWithAttribution(withWriteVersion(*it->second), xdoc); if (!xdoc.save_file(filePath.c_str(), " ")) { return ApiError{ResultCode::ioError, filePath, "writeToFile: could not write the file"}; @@ -286,7 +287,7 @@ Result DocumentManager::writeToStream(int documentId, std::ostream &stream } pugi::xml_document xdoc; - core::serialize(withWriteVersion(*it->second), xdoc); + core::serializeWithAttribution(withWriteVersion(*it->second), xdoc); xdoc.save(stream, " "); return Result{}; } diff --git a/src/private/mx/core/Attribution.cpp b/src/private/mx/core/Attribution.cpp new file mode 100644 index 000000000..6ae92970c --- /dev/null +++ b/src/private/mx/core/Attribution.cpp @@ -0,0 +1,79 @@ +// MusicXML Class Library +// Copyright (c) by Matthew James Briggs +// Distributed under the MIT License + +#include "mx/core/Attribution.h" + +#include "mx/core/Version.h" +#include "mx/core/generated/Document.h" +#include "mx/core/generated/Encoding.h" +#include "mx/core/generated/EncodingChoice.h" +#include "mx/core/generated/Identification.h" +#include "mx/core/generated/ScoreHeaderGroup.h" +#include "mx/core/generated/ScorePartwise.h" +#include "mx/core/generated/ScoreTimewise.h" + +#include "pugixml/pugixml.hpp" + +#include +#include + +namespace mx::core +{ + +namespace +{ + +// Rebuild the score header's with exactly one mx stamp: drop any +// prior mx stamp (matched by prefix) and append the current one. Preserves the +// user's own software entries and every other encoding child. Works on either +// root because both expose scoreHeader()/setScoreHeader(). +template Score withStamp(Score score) +{ + ScoreHeaderGroup header = score.scoreHeader(); + Identification identification = header.identification().value_or(Identification{}); + Encoding encoding = identification.encoding().value_or(Encoding{}); + + std::vector choice; + for (const auto &existing : encoding.choice()) + { + const bool isMxStamp = + existing.isSoftware() && std::string_view{existing.asSoftware()}.starts_with(kMxSoftwareMarker); + if (!isMxStamp) + { + choice.push_back(existing); + } + } + choice.push_back(EncodingChoice::software(mxSoftwareAttribution())); + encoding.setChoice(std::move(choice)); + + identification.setEncoding(std::move(encoding)); + header.setIdentification(std::move(identification)); + score.setScoreHeader(std::move(header)); + return score; +} + +} // namespace + +std::string mxSoftwareAttribution() +{ + return std::string{kMxSoftwareMarker} + " version=" + gitSha; +} + +void serializeWithAttribution(const Document &d, pugi::xml_document &out) +{ + // Stamp the typed model and let the generated serializer place + // / in their schema-correct slots for free. + Document stamped = d; + if (stamped.isScorePartwise()) + { + stamped.setRoot(Document::Root{withStamp(stamped.asScorePartwise())}); + } + else + { + stamped.setRoot(Document::Root{withStamp(stamped.asScoreTimewise())}); + } + serialize(stamped, out); +} + +} // namespace mx::core diff --git a/src/private/mx/core/Attribution.h b/src/private/mx/core/Attribution.h new file mode 100644 index 000000000..3300f6e03 --- /dev/null +++ b/src/private/mx/core/Attribution.h @@ -0,0 +1,39 @@ +// MusicXML Class Library +// Copyright (c) by Matthew James Briggs +// Distributed under the MIT License + +// Hand-written runtime for the generated mx::core model. Never contains +// generated content (regen-safe split, see docs/ai/design/mx-core-plan.md §2.6). + +#pragma once + +#include +#include + +namespace pugi +{ +class xml_document; +} + +namespace mx::core +{ +class Document; + +/// Prefix that identifies mx's own provenance stamp. A +/// whose text starts with this is mx attribution, not user content: the writer +/// replaces it (so re-serializing never accumulates stamps) and the api read +/// path drops it. +inline constexpr std::string_view kMxSoftwareMarker = "https://github.com/webern/mx"; + +/// The text mx stamps onto every file it writes, e.g. +/// "https://github.com/webern/mx version=b987bfa16671". The version component +/// is the build's gitSha, carrying "-dirty"/"unknown" in the edge cases. +std::string mxSoftwareAttribution(); + +/// Serialize, stamping mx's provenance into (creating +/// / when absent, replacing any prior mx stamp). The +/// product/api write path goes through here; the generated serialize() stays +/// pure so the roundtrip fidelity tests are unaffected. +void serializeWithAttribution(const Document &d, pugi::xml_document &out); + +} // namespace mx::core diff --git a/src/private/mx/core/Version.cpp.in b/src/private/mx/core/Version.cpp.in new file mode 100644 index 000000000..0b3316499 --- /dev/null +++ b/src/private/mx/core/Version.cpp.in @@ -0,0 +1,15 @@ +// MusicXML Class Library +// Copyright (c) by Matthew James Briggs +// Distributed under the MIT License + +// Generated at build time from this template by cmake/GitSha.cmake. +// Do not edit and do not commit the output; it lives under the build tree. + +#include "mx/core/Version.h" + +namespace mx::core +{ + +const char *const gitSha = "@MX_GIT_SHA@"; + +} // namespace mx::core diff --git a/src/private/mx/core/Version.h b/src/private/mx/core/Version.h new file mode 100644 index 000000000..168728fe9 --- /dev/null +++ b/src/private/mx/core/Version.h @@ -0,0 +1,22 @@ +// MusicXML Class Library +// Copyright (c) by Matthew James Briggs +// Distributed under the MIT License + +// Hand-written runtime for the generated mx::core model. Never contains +// generated content (regen-safe split, see docs/ai/design/mx-core-plan.md §2.6). + +#pragma once + +namespace mx::core +{ + +/// The git commit the library was built from, e.g. "b987bfa16671", with a +/// "-dirty" suffix when the working tree had uncommitted changes, or "unknown" +/// when built outside a git checkout (e.g. an exported source archive). +/// Captured at build time rather than CMake-configure time, so the value stays +/// correct even when an IDE rebuild bypasses the Makefile and never re-runs +/// CMake (cmake/GitSha.cmake). This declaration never changes, so including it +/// costs nothing; only the generated definition's TU recompiles on a new SHA. +extern const char *const gitSha; + +} // namespace mx::core diff --git a/src/private/mx/impl/EncodingFunctions.cpp b/src/private/mx/impl/EncodingFunctions.cpp index 443e7c17a..bccc63c5a 100644 --- a/src/private/mx/impl/EncodingFunctions.cpp +++ b/src/private/mx/impl/EncodingFunctions.cpp @@ -3,6 +3,7 @@ // Distributed under the MIT License #include "mx/impl/EncodingFunctions.h" +#include "mx/core/Attribution.h" #include "mx/core/generated/EncodingChoice.h" #include "mx/core/generated/Identification.h" #include "mx/core/generated/Miscellaneous.h" @@ -148,7 +149,12 @@ api::EncodingData createEncoding(const core::Encoding &inEncoding) break; } case core::EncodingChoice::Kind::software: { - outEncoding.software.emplace_back(ec.asSoftware()); + // Drop mx's own provenance stamp: it is mx attribution, not user + // content, and the writer re-adds the current one on every write. + if (!std::string_view{ec.asSoftware()}.starts_with(core::kMxSoftwareMarker)) + { + outEncoding.software.emplace_back(ec.asSoftware()); + } break; } case core::EncodingChoice::Kind::supports: { diff --git a/src/private/mxtest/api/CorpusRoundtripMain.cpp b/src/private/mxtest/api/CorpusRoundtripMain.cpp index e5581f0f3..ac090fdc4 100644 --- a/src/private/mxtest/api/CorpusRoundtripMain.cpp +++ b/src/private/mxtest/api/CorpusRoundtripMain.cpp @@ -20,6 +20,7 @@ // Exit 0 always. Use to grow the pinned list. #include "mx/api/DocumentManager.h" +#include "mx/core/Attribution.h" #include "mxtest/corert/Compare.h" #include "mxtest/corert/Fixer.h" #include "pugixml/pugixml.hpp" @@ -50,6 +51,27 @@ bool isExcludedPath(const std::filesystem::path &p) return false; } +// Remove mx's provenance from the written output. Every api write +// stamps it, but the original input never has it, so the fidelity comparison +// must ignore it (the stamp's own correctness is covered by AttributionTest). +void stripMxAttribution(pugi::xml_node node) +{ + for (pugi::xml_node child = node.first_child(); child;) + { + const pugi::xml_node next = child.next_sibling(); + if (std::string_view{child.name()} == "software" && + std::string_view{child.text().get()}.starts_with(mx::core::kMxSoftwareMarker)) + { + node.remove_child(child); + } + else + { + stripMxAttribution(child); + } + child = next; + } +} + bool isFixupSidecar(const std::filesystem::path &p) { const std::string name = p.filename().string(); @@ -190,6 +212,9 @@ RoundtripResult runRoundtrip(const std::string &absolutePath) return r; } + // Drop mx's provenance stamp from the written output before comparing. + stripMxAttribution(actualDoc.document_element()); + // Normalize both and apply fixups to expected mxtest::corert::normalizeForComparison(expectedDoc); mxtest::corert::normalizeForComparison(actualDoc); diff --git a/src/private/mxtest/core/AttributionTest.cpp b/src/private/mxtest/core/AttributionTest.cpp new file mode 100644 index 000000000..472cd08ec --- /dev/null +++ b/src/private/mxtest/core/AttributionTest.cpp @@ -0,0 +1,137 @@ +// MusicXML Class Library +// Copyright (c) by Matthew James Briggs +// Distributed under the MIT License + +// serializeWithAttribution stamps mx's provenance into : +// it creates / when absent, preserves the user's own +// software, and never accumulates stamps across round-trips. + +#include "cpul/cpulTestHarness.h" + +#include "mx/core/Attribution.h" +#include "mx/core/Version.h" +#include "mx/core/generated/Document.h" +#include "mx/core/generated/Encoding.h" +#include "mx/core/generated/EncodingChoice.h" +#include "mx/core/generated/Identification.h" +#include "mx/core/generated/ScoreHeaderGroup.h" +#include "mx/core/generated/ScorePartwise.h" + +#include "pugixml/pugixml.hpp" + +#include +#include + +using namespace mx::core; + +namespace +{ + +// Count descendants whose text equals the given value. +int countSoftware(const pugi::xml_node &node, std::string_view text) +{ + int count = 0; + for (pugi::xml_node child : node.children()) + { + if (std::string_view{child.name()} == "software" && std::string_view{child.text().get()} == text) + { + ++count; + } + count += countSoftware(child, text); + } + return count; +} + +// Count mx stamps regardless of which version they carry (prefix match). +int countMxStamps(const pugi::xml_node &node) +{ + int count = 0; + for (pugi::xml_node child : node.children()) + { + if (std::string_view{child.name()} == "software" && + std::string_view{child.text().get()}.starts_with(kMxSoftwareMarker)) + { + ++count; + } + count += countMxStamps(child); + } + return count; +} + +} // namespace + +TEST(StampsIntoEmptyDocument, Attribution) +{ + Document doc; // default ScorePartwise, no + pugi::xml_document out; + serializeWithAttribution(doc, out); + + const std::string expected = mxSoftwareAttribution(); + CHECK(expected == std::string{"https://github.com/webern/mx version="} + gitSha); + + // Stamped at the schema-correct path, exactly once. + const pugi::xml_node encoding = out.document_element().child("identification").child("encoding"); + CHECK(std::string_view{encoding.child("software").text().get()} == expected); + CHECK(countSoftware(out.document_element(), expected) == 1); +} + +TEST(PreservesUserSoftware, Attribution) +{ + Document doc; + ScorePartwise score = doc.asScorePartwise(); + ScoreHeaderGroup header = score.scoreHeader(); + Identification identification; + Encoding encoding; + encoding.addChoice(EncodingChoice::software("Finale")); + identification.setEncoding(encoding); + header.setIdentification(identification); + score.setScoreHeader(header); + doc.setRoot(Document::Root{score}); + + pugi::xml_document out; + serializeWithAttribution(doc, out); + + CHECK(countSoftware(out.document_element(), "Finale") == 1); + CHECK(countSoftware(out.document_element(), mxSoftwareAttribution()) == 1); +} + +TEST(DoesNotAccumulateAcrossRoundTrips, Attribution) +{ + Document doc; + pugi::xml_document first; + serializeWithAttribution(doc, first); + + // Parsing mx's own output, then re-writing, must still yield one stamp. + const auto reparsed = parse(first); + CHECK(reparsed.ok()); + if (reparsed.ok()) + { + pugi::xml_document second; + serializeWithAttribution(reparsed.value(), second); + CHECK(countSoftware(second.document_element(), mxSoftwareAttribution()) == 1); + } +} + +TEST(ReplacesAStampFromADifferentVersion, Attribution) +{ + // A file written by some other mx version carries a different-sha stamp. + Document doc; + ScorePartwise score = doc.asScorePartwise(); + ScoreHeaderGroup header = score.scoreHeader(); + Identification identification; + Encoding encoding; + const std::string staleStamp = "https://github.com/webern/mx version=0000staleomx0"; + encoding.addChoice(EncodingChoice::software(staleStamp)); + identification.setEncoding(encoding); + header.setIdentification(identification); + score.setScoreHeader(header); + doc.setRoot(Document::Root{score}); + + pugi::xml_document out; + serializeWithAttribution(doc, out); + + // The stale stamp is gone, replaced by exactly one current-version stamp. + CHECK(countSoftware(out.document_element(), staleStamp) == 0); + CHECK(countSoftware(out.document_element(), mxSoftwareAttribution()) == 1); + CHECK(countMxStamps(out.document_element()) == 1); +}