diff --git a/.github/workflows/dev_pr.js b/.github/workflows/dev_pr.js
index 13acc946e1..05c3a0c8dc 100644
--- a/.github/workflows/dev_pr.js
+++ b/.github/workflows/dev_pr.js
@@ -122,15 +122,20 @@ module.exports = {
const title = context.payload.pull_request.title;
if (title.startsWith("MINOR: ")) {
console.log("PR is a minor PR");
- return {"issue": null};
+ return {"issue": null, "type": "minor"};
}
- const match = title.match(/^GH-([0-9]+): .*$/);
+ const match = title.match(/^(GH|DX)-([0-9]+): .*$/);
if (match === null) {
- core.setFailed("Invalid PR title format. Must either be MINOR: or GH-NNN:");
- return {"issue": null};
+ core.setFailed("Invalid PR title format. Must either be MINOR:, GH-NNN:, or DX-NNN:");
+ return {"issue": null, "type": null};
}
- return {"issue": parseInt(match[1], 10)};
+
+ const issueType = match[1]; // "GH" or "DX"
+ const issueNumber = parseInt(match[2], 10);
+
+ console.log(`PR references ${issueType}-${issueNumber}`);
+ return {"issue": issueNumber, "type": issueType};
},
apply_labels: async function({core, github, context}) {
@@ -203,9 +208,28 @@ See [CONTRIBUTING.md](https://github.com/apache/arrow-java/blob/main/CONTRIBUTIN
console.log("This is a MINOR PR");
return;
}
- const expected = `https://github.com/apache/arrow-java/issues/${issue.issue}`;
- const query = `
+ // Handle Jira tickets (DX-NNN)
+ if (issue.type === "DX") {
+ const jiraUrl = `https://dremio.atlassian.net/browse/DX-${issue.issue}`;
+ console.log(`This PR references Jira ticket: ${jiraUrl}`);
+
+ // Add a comment with the Jira link
+ const comment_tag = "jira_link_comment";
+ const maybe_comment_id = await have_comment(github, context, context.payload.pull_request.number, comment_tag);
+ const body_text = `
+**Related Jira Ticket:** [DX-${issue.issue}](${jiraUrl})`;
+
+ await upsert_comment(github, maybe_comment_id, body_text, true);
+ console.log("Added/updated Jira link comment");
+ return;
+ }
+
+ // Handle GitHub issues (GH-NNN)
+ if (issue.type === "GH") {
+ const expected = `https://github.com/apache/arrow-java/issues/${issue.issue}`;
+
+ const query = `
query($owner: String!, $name: String!, $number: Int!) {
repository(owner: $owner, name: $name) {
pullRequest(number: $number) {
@@ -220,22 +244,23 @@ query($owner: String!, $name: String!, $number: Int!) {
}
}`;
- const result = await github.graphql(query, {
- owner: context.repo.owner,
- name: context.repo.repo,
- number: context.payload.pull_request.number,
- });
- const issues = result.repository.pullRequest.closingIssuesReferences.edges;
- console.log(issues);
-
- for (const link of issues) {
- console.log(`PR is linked to ${link.node.number}`);
- if (link.node.number === issue.issue) {
- console.log(`Found link to ${expected}`);
- return;
+ const result = await github.graphql(query, {
+ owner: context.repo.owner,
+ name: context.repo.repo,
+ number: context.payload.pull_request.number,
+ });
+ const issues = result.repository.pullRequest.closingIssuesReferences.edges;
+ console.log(issues);
+
+ for (const link of issues) {
+ console.log(`PR is linked to ${link.node.number}`);
+ if (link.node.number === issue.issue) {
+ console.log(`Found link to ${expected}`);
+ return;
+ }
}
+ console.log(`Did not find link to ${expected}`);
+ core.setFailed("Missing link to issue in title");
}
- console.log(`Did not find link to ${expected}`);
- core.setFailed("Missing link to issue in title");
},
};
diff --git a/.github/workflows/jarbuild.yml b/.github/workflows/jarbuild.yml
index e93fc4d439..3c78dd0639 100644
--- a/.github/workflows/jarbuild.yml
+++ b/.github/workflows/jarbuild.yml
@@ -16,7 +16,7 @@
# under the License.
name: JarBuild
-on:
+on:
workflow_dispatch:
inputs:
arrow_branch:
@@ -44,16 +44,35 @@ jobs:
runs-on: ubuntu-latest
timeout-minutes: 5
steps:
+ - name: Trim workflow inputs
+ run: |
+ echo "ARROW_BRANCH=$(echo '${{github.event.inputs.arrow_branch}}' | xargs)" >> $GITHUB_ENV
+ echo "ARROW_REPO=$(echo '${{github.event.inputs.arrow_repo}}' | xargs)" >> $GITHUB_ENV
+ echo "RELEASE_TAG_NAME=$(echo '${{github.event.inputs.release_tag_name}}' | xargs)" >> $GITHUB_ENV
+ - name: Print workflow input parameters
+ run: |
+ echo "=========================================="
+ echo "Workflow Input Parameters"
+ echo "=========================================="
+ echo "arrow_branch: ${{env.ARROW_BRANCH}}"
+ echo "arrow_repo: ${{env.ARROW_REPO}}"
+ echo "release_tag_name: ${{env.RELEASE_TAG_NAME}}"
+ echo "arrow-java branch: ${{github.ref_name}}"
+ echo ""
+ echo "Direct Links:"
+ echo "----------------------------------------"
+ echo "Arrow C++ repo/branch: https://github.com/${{env.ARROW_REPO}}/tree/${{env.ARROW_BRANCH}}"
+ echo "Arrow Java repo/branch: https://github.com/${{github.repository}}/tree/${{github.ref_name}}"
+ echo "Release tag: https://github.com/${{github.repository}}/releases/tag/${{env.RELEASE_TAG_NAME}}"
+ echo "=========================================="
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: recursive
- - name: Set env
- run: echo "release_tag_name=$(echo $release_tag_name)" >> $GITHUB_ENV
- name: Prepare for tag
run: |
- echo "${{github.event.inputs.release_tag_name}}"
- ver=$(echo ${{github.event.inputs.release_tag_name}})
+ echo "${{env.RELEASE_TAG_NAME}}"
+ ver=$(echo ${{env.RELEASE_TAG_NAME}})
version=${ver%-rc*}
version=${version#v}
rc=${ver#*-rc}
@@ -81,7 +100,7 @@ jobs:
jni-linux:
name: JNI ${{ matrix.platform.runs_on }} ${{ matrix.platform.arch }}
runs-on: ${{ matrix.platform.runs_on }}
- timeout-minutes: 120
+ timeout-minutes: 240
needs:
- source
strategy:
@@ -158,17 +177,16 @@ jobs:
jni-macos:
name: JNI ${{ matrix.platform.runs_on }} ${{ matrix.platform.arch }}
runs-on: ${{ matrix.platform.runs_on }}
- timeout-minutes: 45
+ timeout-minutes: 445
needs:
- source
strategy:
fail-fast: false
matrix:
platform:
- - { runs_on: macos-13, arch: "x86_64"}
- - { runs_on: macos-14, arch: "aarch_64" }
+ - { runs_on: macos-15, arch: "aarch_64" }
env:
- MACOSX_DEPLOYMENT_TARGET: "14.0"
+ MACOSX_DEPLOYMENT_TARGET: "15.0"
steps:
- name: Download source archive
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
@@ -205,20 +223,42 @@ jobs:
python-version: 3.12
- name: Install Archery
run: pip install -e arrow/dev/archery[all]
+ - name: Checkout vcpkg
+ uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ repository: Microsoft/vcpkg
+ path: arrow/vcpkg
+ fetch-depth: 0
+ - name: Install vcpkg
+ run: |
+ cd arrow/vcpkg
+ ./bootstrap-vcpkg.sh
+ echo "VCPKG_ROOT_LOCAL=${PWD}" >> ${GITHUB_ENV}
+ echo "${PWD}" >> ${GITHUB_PATH}
+ - name: Clean up disk space
+ run: |
+ echo "=== Free disk space before cleanup ==="
+ df -h /
+
+ echo ""
+ echo "=== Removing Xcode simulators ==="
+ sudo rm -rf /Library/Developer/CoreSimulator/Caches || :
+ echo "Removed /Library/Developer/CoreSimulator/Caches"
+
+ echo ""
+ echo "=== Removing user simulator data ==="
+ rm -rf ~/Library/Developer/CoreSimulator || :
+ echo "Removed ~/Library/Developer/CoreSimulator"
+
+ echo ""
+ echo "=== Free disk space after cleanup ==="
+ df -h /
- name: Install dependencies
run: |
- # We want to use llvm@14 to avoid shared z3
- # dependency. llvm@14 doesn't depend on z3 and llvm depends
- # on z3. And Homebrew's z3 provides only shared library. It
- # doesn't provides static z3 because z3's CMake doesn't accept
- # building both shared and static libraries at once.
- # See also: Z3_BUILD_LIBZ3_SHARED in
- # https://github.com/Z3Prover/z3/blob/master/README-CMake.md
- #
- # If llvm is installed, Apache Arrow C++ uses llvm rather than
- # llvm@14 because llvm is newer than llvm@14.
- brew uninstall llvm || :
+ echo "=== Free disk space at start of dependency installation ==="
+ df -h /
+ echo ""
# Ensure updating python@XXX with the "--overwrite" option.
# If python@XXX is updated without "--overwrite", it causes
# a conflict error. Because Python 3 installed not by
@@ -240,25 +280,53 @@ jobs:
brew uninstall pkg-config@0.29.2 || :
fi
+ # Install basic build tools via brew (vcpkg needs these)
+ brew install cmake ninja pkg-config
brew bundle --file=arrow/cpp/Brewfile
+
+ # Clean up any existing LLVM installations in favor of vcpkg.
+ # Need to uninstall all versioned LLVM packages (llvm@18, llvm@17, etc.)
+ for llvm_pkg in $(brew list | grep -E '^llvm(@[0-9]+)?$'); do
+ brew uninstall "${llvm_pkg}" || :
+ done
+
# We want to link aws-sdk-cpp statically but Homebrew's
# aws-sdk-cpp provides only shared library. If we have
# Homebrew's aws-sdk-cpp, our build mix Homebrew's
# aws-sdk-cpp and bundled aws-sdk-cpp. We uninstall Homebrew's
# aws-sdk-cpp to ensure using only bundled aws-sdk-cpp.
- brew uninstall aws-sdk-cpp
+ brew uninstall aws-sdk-cpp || :
# We want to use bundled RE2 for static linking. If
# Homebrew's RE2 is installed, its header file may be used.
# We uninstall Homebrew's RE2 to ensure using bundled RE2.
brew uninstall grpc || : # gRPC depends on RE2
brew uninstall grpc@1.54 || : # gRPC 1.54 may be installed too
- brew uninstall re2
+ brew uninstall re2 || :
# We want to use bundled Protobuf for static linking. If
# Homebrew's Protobuf is installed, its library file may be
# used on test We uninstall Homebrew's Protobuf to ensure using
# bundled Protobuf.
- brew uninstall protobuf
+ brew uninstall protobuf || :
+
+ echo ""
+ echo "=== Free disk space before LLVM build ==="
+ df -h /
+
+ echo ""
+ # Use vcpkg to install LLVM.
+ vcpkg install \
+ --clean-after-build \
+ --vcpkg-root=${VCPKG_ROOT_LOCAL} \
+ --x-install-root=${VCPKG_ROOT_LOCAL}/installed \
+ --x-manifest-root=arrow/ci/vcpkg \
+ --overlay-ports=arrow/ci/vcpkg/overlay/llvm/ \
+ --x-feature=gandiva-llvm
+
+ echo ""
+ echo "=== Free disk space after LLVM build ==="
+ df -h /
+ echo ""
brew bundle --file=Brewfile
- name: Prepare ccache
run: |
@@ -271,10 +339,18 @@ jobs:
restore-keys: jni-macos-${{ matrix.platform.arch }}-
- name: Build
run: |
+ echo "=== Free disk space at start of build ==="
+ df -h /
+
+ echo ""
set -e
# make brew Java available to CMake
export JAVA_HOME=$(brew --prefix openjdk@11)/libexec/openjdk.jdk/Contents/Home
ci/scripts/jni_macos_build.sh . arrow build jni
+
+ echo ""
+ echo "=== Free disk space at end of build ==="
+ df -h /
- name: Compress into single artifact to keep directory structure
run: tar -cvzf jni-macos-${{ matrix.platform.arch }}.tar.gz jni/
- name: Upload artifacts
@@ -299,7 +375,6 @@ jobs:
tar -xf apache-arrow-java-*.tar.gz --strip-components=1
tar -xvzf jni-linux-x86_64.tar.gz
tar -xvzf jni-linux-aarch_64.tar.gz
- tar -xvzf jni-macos-x86_64.tar.gz
tar -xvzf jni-macos-aarch_64.tar.gz
- name: Test that shared libraries exist
run: |
@@ -315,11 +390,6 @@ jobs:
test -f jni/arrow_orc_jni/aarch_64/libarrow_orc_jni.so
test -f jni/gandiva_jni/aarch_64/libgandiva_jni.so
- test -f jni/arrow_cdata_jni/x86_64/libarrow_cdata_jni.dylib
- test -f jni/arrow_dataset_jni/x86_64/libarrow_dataset_jni.dylib
- test -f jni/arrow_orc_jni/x86_64/libarrow_orc_jni.dylib
- test -f jni/gandiva_jni/x86_64/libgandiva_jni.dylib
-
test -f jni/arrow_cdata_jni/aarch_64/libarrow_cdata_jni.dylib
test -f jni/arrow_dataset_jni/aarch_64/libarrow_dataset_jni.dylib
test -f jni/arrow_orc_jni/aarch_64/libarrow_orc_jni.dylib
@@ -437,30 +507,71 @@ jobs:
permissions:
contents: write
steps:
+ - name: Checkout arrow-java repository
+ uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ - name: Checkout Apache Arrow C++ repository
+ uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ repository: ${{github.event.inputs.arrow_repo}}
+ ref: ${{github.event.inputs.arrow_branch}}
+ path: arrow
+ - name: Get commit IDs
+ id: commit_ids
+ run: |
+ # Get short commit ID for arrow-java
+ arrow_java_commit=$(git rev-parse --short HEAD)
+ echo "arrow_java_commit=${arrow_java_commit}" >> $GITHUB_OUTPUT
+
+ # Get short commit ID for arrow
+ cd arrow
+ arrow_commit=$(git rev-parse --short HEAD)
+ echo "arrow_commit=${arrow_commit}" >> $GITHUB_OUTPUT
+ cd ..
+
+ # Parse version from release tag
+ ver=$(echo ${{github.event.inputs.release_tag_name}})
+ version=${ver%-rc*}
+ version=${version#v}
+ rc=${ver#*-rc}
+
+ # Create release name with both commit IDs
+ release_name="${version}-${arrow_java_commit}-${arrow_commit}"
+ release_tag="v${release_name}"
+ echo "release_name=${release_name}" >> $GITHUB_OUTPUT
+ echo "release_tag=${release_tag}" >> $GITHUB_OUTPUT
+ echo "version=${version}" >> $GITHUB_OUTPUT
+ echo "rc=${rc}" >> $GITHUB_OUTPUT
+
+ echo "Arrow Java commit: ${arrow_java_commit}"
+ echo "Arrow commit: ${arrow_commit}"
+ echo "Release tag: ${release_tag}"
- name: Download release artifacts
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
with:
pattern: release-*
path: artifacts
+ - name: Create and push tag
+ run: |
+ git config user.name "github-actions[bot]"
+ git config user.email "github-actions[bot]@users.noreply.github.com"
+ git tag -a "${{ steps.commit_ids.outputs.release_tag }}" -m "Release ${{ steps.commit_ids.outputs.release_name }} RC${{ steps.commit_ids.outputs.rc }}" -m "Action URL: $GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID"
+ git push origin "${{ steps.commit_ids.outputs.release_tag }}"
+ env:
+ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Upload
run: |
# GH-499: How to create release notes?
- echo "${{github.event.inputs.release_tag_name}}"
- ver=$(echo ${{github.event.inputs.release_tag_name}})
- version=${ver%-rc*}
- version=${version#v}
- rc=${ver#*-rc}
- gh release create ${{github.event.inputs.release_tag_name}} \
- --generate-notes \
+ echo "Creating release: ${{ steps.commit_ids.outputs.release_tag }}"
+ gh release create "${{ steps.commit_ids.outputs.release_tag }}" \
+ -n "Release ${{ steps.commit_ids.outputs.release_name }} RC${{ steps.commit_ids.outputs.rc }}
Triggered by: ${{ github.actor }}
Action URL: $GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID
arrow_branch: ${{github.event.inputs.ARROW_BRANCH}}
arrow_repo: ${{github.event.inputs.ARROW_REPO}}
release_tag_name: ${{github.event.inputs.RELEASE_TAG_NAME}}
arrow-java branch: ${{github.ref_name}}" \
--prerelease \
--repo ${GITHUB_REPOSITORY} \
- --title "Apache Arrow Java ${version} RC${rc}" \
- --verify-tag
+ --title "Apache Arrow Java ${{ steps.commit_ids.outputs.version }} RC${{ steps.commit_ids.outputs.rc }} (arrow-java: ${{ steps.commit_ids.outputs.arrow_java_commit }}, arrow: ${{ steps.commit_ids.outputs.arrow_commit }})"
# GitHub CLI does not respect their own rate limits
# https://github.com/cli/cli/issues/9586
for artifact in artifacts/*/*; do
sleep 1
- gh release upload ${{github.event.inputs.release_tag_name}} \
+ gh release upload "${{ steps.commit_ids.outputs.release_tag }}" \
--repo ${GITHUB_REPOSITORY} \
$artifact
done
diff --git a/.github/workflows/rc.yml b/.github/workflows/rc.yml
index 7e3cf5f6f2..9319a18ecf 100644
--- a/.github/workflows/rc.yml
+++ b/.github/workflows/rc.yml
@@ -380,7 +380,6 @@ jobs:
tar -xf apache-arrow-java-*.tar.gz --strip-components=1
tar -xvzf jni-linux-x86_64.tar.gz
tar -xvzf jni-linux-aarch_64.tar.gz
- tar -xvzf jni-macos-x86_64.tar.gz
tar -xvzf jni-macos-aarch_64.tar.gz
tar -xvzf jni-windows-x86_64.tar.gz
- name: Test that shared libraries exist
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 5db5c988eb..313496e652 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -89,7 +89,7 @@ jobs:
include:
- arch: AMD64
jdk: 11
- macos: 13
+ macos: 15-intel
- arch: AArch64
jdk: 11
macos: latest
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 8e8faf3cc0..1efb47ed70 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -44,8 +44,8 @@ repos:
rm -f apache-arrow-java.tar.gz"
always_run: true
pass_filenames: false
- - repo: https://github.com/koalaman/shellcheck-precommit
- rev: v0.10.0
+ - repo: https://github.com/shellcheck-py/shellcheck-py
+ rev: v0.10.0.1
hooks:
- id: shellcheck
args:
diff --git a/ci/scripts/jni_macos_build.sh b/ci/scripts/jni_macos_build.sh
index f7543b6f7a..77f367a37e 100755
--- a/ci/scripts/jni_macos_build.sh
+++ b/ci/scripts/jni_macos_build.sh
@@ -78,6 +78,55 @@ export ARROW_TEST_DATA="${arrow_dir}/testing/data"
export PARQUET_TEST_DATA="${arrow_dir}/cpp/submodules/parquet-testing/data"
export AWS_EC2_METADATA_DISABLED=TRUE
+# Determine vcpkg triplet based on architecture
+vcpkg_arch="$(arch)"
+case "${vcpkg_arch}" in
+arm64)
+ vcpkg_triplet="arm64-osx"
+ ;;
+i386 | x86_64)
+ vcpkg_triplet="x64-osx"
+ ;;
+*)
+ vcpkg_triplet="arm64-osx"
+ ;;
+esac
+
+# Set LLVM_DIR to point to vcpkg-installed LLVM if VCPKG_ROOT_LOCAL is set
+llvm_dir_arg=""
+gandiva_cxx_flags=""
+osx_sysroot_arg=""
+re2_source_arg="-Dre2_SOURCE=BUNDLED"
+if [ -n "${VCPKG_ROOT_LOCAL:-}" ]; then
+ vcpkg_installed="${VCPKG_ROOT_LOCAL}/installed/${vcpkg_triplet}"
+ llvm_cmake_dir="${vcpkg_installed}/share/llvm"
+ if [ -d "${llvm_cmake_dir}" ]; then
+ llvm_dir_arg="-DLLVM_DIR=${llvm_cmake_dir}"
+
+ # vcpkg's clang needs to know where to find system headers
+ # Arrow's GandivaAddBitcode.cmake uses CMAKE_OSX_SYSROOT to set SDKROOT env var
+ sdk_path="$(xcrun --show-sdk-path)"
+ if [ -d "${sdk_path}" ]; then
+ osx_sysroot_arg="-DCMAKE_OSX_SYSROOT=${sdk_path}"
+ fi
+
+ # Also pass the C++ standard library include path via ARROW_GANDIVA_PC_CXX_FLAGS
+ xcode_path="$(xcode-select -p)"
+ cxx_include_path="${xcode_path}/Toolchains/XcodeDefault.xctoolchain/usr/include/c++/v1"
+ if [ -d "${cxx_include_path}" ]; then
+ gandiva_cxx_flags="-DARROW_GANDIVA_PC_CXX_FLAGS=-stdlib=libc++;-isystem;${cxx_include_path}"
+ fi
+
+ # Use vcpkg's RE2 since it's installed as a dependency of LLVM
+ # This ensures ABI compatibility - vcpkg's RE2 uses std::string_view API
+ # which matches what vcpkg's LLVM and Abseil expect
+ re2_cmake_dir="${vcpkg_installed}/share/re2"
+ if [ -d "${re2_cmake_dir}" ]; then
+ re2_source_arg="-Dre2_ROOT=${vcpkg_installed}"
+ fi
+ fi
+fi
+
cmake \
-S "${arrow_dir}/cpp" \
-B "${build_dir}/cpp" \
@@ -100,10 +149,13 @@ cmake \
-DCMAKE_INSTALL_PREFIX="${install_dir}" \
-DCMAKE_UNITY_BUILD="${CMAKE_UNITY_BUILD}" \
-DGTest_SOURCE=BUNDLED \
+ "${llvm_dir_arg}" \
+ "${osx_sysroot_arg}" \
+ "${gandiva_cxx_flags}" \
-DPARQUET_BUILD_EXAMPLES=OFF \
-DPARQUET_BUILD_EXECUTABLES=OFF \
-DPARQUET_REQUIRE_ENCRYPTION=OFF \
- -Dre2_SOURCE=BUNDLED \
+ "${re2_source_arg}" \
-GNinja
cmake --build "${build_dir}/cpp" --target install
github_actions_group_end
@@ -125,7 +177,27 @@ if [ "${ARROW_RUN_TESTS:-}" == "ON" ]; then
github_actions_group_end
fi
-export JAVA_JNI_CMAKE_ARGS="-DProtobuf_ROOT=${build_dir}/cpp/protobuf_ep-install"
+# Pass paths to dependencies so the JNI build can find them
+# Build up the JNI CMake args based on what's available
+jni_cmake_args="${llvm_dir_arg}"
+
+# Add Protobuf path if bundled, otherwise CMake will find system Protobuf
+if [ -d "${build_dir}/cpp/protobuf_ep-install" ]; then
+ jni_cmake_args="${jni_cmake_args} -DProtobuf_ROOT=${build_dir}/cpp/protobuf_ep-install"
+fi
+
+# RE2 path for the JNI build - prefer vcpkg's RE2 if we used it for the C++ build,
+# otherwise fall back to bundled RE2 if available
+if [ -n "${VCPKG_ROOT_LOCAL:-}" ]; then
+ vcpkg_re2_dir="${VCPKG_ROOT_LOCAL}/installed/${vcpkg_triplet}"
+ if [ -d "${vcpkg_re2_dir}/share/re2" ]; then
+ jni_cmake_args="${jni_cmake_args} -Dre2_ROOT=${vcpkg_re2_dir}"
+ fi
+elif [ -d "${build_dir}/cpp/re2_ep-install" ]; then
+ jni_cmake_args="${jni_cmake_args} -Dre2_ROOT=${build_dir}/cpp/re2_ep-install"
+fi
+
+export JAVA_JNI_CMAKE_ARGS="${jni_cmake_args}"
"${source_dir}/ci/scripts/jni_build.sh" \
"${source_dir}" \
"${install_dir}" \
diff --git a/ci/scripts/jni_manylinux_build.sh b/ci/scripts/jni_manylinux_build.sh
index a34ec0f420..b097a1d2e5 100755
--- a/ci/scripts/jni_manylinux_build.sh
+++ b/ci/scripts/jni_manylinux_build.sh
@@ -25,6 +25,22 @@ set -euo pipefail
# shellcheck source=ci/scripts/util_log.sh
. "$(dirname "${0}")/util_log.sh"
+github_actions_group_begin "Update llvm"
+vcpkg install \
+ --debug \
+ --clean-after-build \
+ --x-install-root="${VCPKG_ROOT}/installed" \
+ --x-manifest-root=/arrow/ci/vcpkg \
+ --overlay-ports=/arrow/ci/vcpkg/overlay/llvm/ \
+ --x-feature=dev \
+ --x-feature=flight \
+ --x-feature=gcs \
+ --x-feature=json \
+ --x-feature=parquet \
+ --x-feature=gandiva \
+ --x-feature=s3
+github_actions_group_end
+
github_actions_group_begin "Prepare arguments"
source_dir="$(cd "${1}" && pwd)"
arrow_dir="$(cd "${2}" && pwd)"
diff --git a/vector/src/main/codegen/templates/UnionListWriter.java b/vector/src/main/codegen/templates/UnionListWriter.java
index 8844f27296..4b54739230 100644
--- a/vector/src/main/codegen/templates/UnionListWriter.java
+++ b/vector/src/main/codegen/templates/UnionListWriter.java
@@ -53,6 +53,7 @@ public class Union${listName}Writer extends AbstractFieldWriter {
private boolean inStruct = false;
private boolean listStarted = false;
private String structName;
+ private ArrowType extensionType;
<#if listName == "LargeList" || listName == "LargeListView">
private static final long OFFSET_WIDTH = 8;
<#else>
@@ -203,12 +204,13 @@ public MapWriter map(String name, boolean keysSorted) {
@Override
public ExtensionWriter extension(ArrowType arrowType) {
+ extensionType = arrowType;
return this;
}
+
@Override
public ExtensionWriter extension(String name, ArrowType arrowType) {
- ExtensionWriter extensionWriter = writer.extension(name, arrowType);
- return extensionWriter;
+ return writer.extension(name, arrowType);
}
<#if listName == "LargeList">
@@ -335,13 +337,15 @@ public void writeNull() {
@Override
public void writeExtension(Object value) {
- writer.writeExtension(value);
+ writer.writeExtension(value, extensionType);
writer.setPosition(writer.idx() + 1);
}
+
@Override
- public void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory var1) {
- writer.addExtensionTypeWriterFactory(var1);
+ public void writeExtension(Object value, ArrowType type) {
+ writeExtension(value);
}
+
public void write(ExtensionHolder var1) {
writer.write(var1);
writer.setPosition(writer.idx() + 1);