diff --git a/java-bigquery/google-cloud-bigquery-jdbc/.gitignore b/java-bigquery/google-cloud-bigquery-jdbc/.gitignore index 2c4fba513e11..40a3476b598a 100644 --- a/java-bigquery/google-cloud-bigquery-jdbc/.gitignore +++ b/java-bigquery/google-cloud-bigquery-jdbc/.gitignore @@ -2,6 +2,7 @@ drivers/** target-it/** *logs*/** **/ITBigQueryJDBCLocalTest.java +**/BigQueryStatementE2EBenchmark.java tools/**/*.class tools/**/*.jfr \ No newline at end of file diff --git a/java-bigquery/google-cloud-bigquery-jdbc/src/main/java/com/google/cloud/bigquery/jdbc/BigQueryJdbcUrlUtility.java b/java-bigquery/google-cloud-bigquery-jdbc/src/main/java/com/google/cloud/bigquery/jdbc/BigQueryJdbcUrlUtility.java index 89a2b8b5cb8c..c2ade0928f03 100644 --- a/java-bigquery/google-cloud-bigquery-jdbc/src/main/java/com/google/cloud/bigquery/jdbc/BigQueryJdbcUrlUtility.java +++ b/java-bigquery/google-cloud-bigquery-jdbc/src/main/java/com/google/cloud/bigquery/jdbc/BigQueryJdbcUrlUtility.java @@ -72,7 +72,7 @@ protected boolean removeEldestEntry(Map.Entry> eldes static final String QUERY_PROPERTIES_NAME = "QueryProperties"; static final int DEFAULT_HTAPI_ACTIVATION_RATIO_VALUE = 2; static final String HTAPI_MIN_TABLE_SIZE_PROPERTY_NAME = "HighThroughputMinTableSize"; - static final int DEFAULT_HTAPI_MIN_TABLE_SIZE_VALUE = 100; + static final int DEFAULT_HTAPI_MIN_TABLE_SIZE_VALUE = 10000; static final int DEFAULT_OAUTH_TYPE_VALUE = -1; static final String LOCATION_PROPERTY_NAME = "Location"; static final String ENDPOINT_OVERRIDES_PROPERTY_NAME = "EndpointOverrides"; diff --git a/java-bigquery/google-cloud-bigquery-jdbc/src/main/java/com/google/cloud/bigquery/jdbc/BigQueryStatement.java b/java-bigquery/google-cloud-bigquery-jdbc/src/main/java/com/google/cloud/bigquery/jdbc/BigQueryStatement.java index e2dab7b31678..b224a51b67b1 100644 --- a/java-bigquery/google-cloud-bigquery-jdbc/src/main/java/com/google/cloud/bigquery/jdbc/BigQueryStatement.java +++ b/java-bigquery/google-cloud-bigquery-jdbc/src/main/java/com/google/cloud/bigquery/jdbc/BigQueryStatement.java @@ -55,7 +55,6 @@ import com.google.cloud.bigquery.storage.v1.ReadSession; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableList; -import com.google.common.collect.Iterators; import com.google.common.util.concurrent.Uninterruptibles; import java.lang.ref.ReferenceQueue; import java.sql.Connection; @@ -944,15 +943,21 @@ private boolean meetsReadRatio(TableResult results) { LOG.finest("++enter++"); long totalRows = results.getTotalRows(); - if (totalRows == 0 || totalRows < querySettings.getHighThroughputMinTableSize()) { + // SAFEGUARD: If all data has already been retrieved in the first page, + // NEVER switch to the Read API as it would discard in-memory data and cause a double-fetch. + if (totalRows == 0 + || totalRows < querySettings.getHighThroughputMinTableSize() + || !results.hasNextPage()) { return false; } - // TODO(BQ Team): TableResult doesnt expose the number of records in the current page, hence the - // below log iterates and counts. This is inefficient and we may eventually want to expose - // PageSize with TableResults - // TODO(Obada): Scope for performance optimization. - int pageSize = Iterators.size(results.getValues().iterator()); + long pageSize = querySettings.getMaxResultPerPage(); + + // Prevent division by zero due to potential overflows/empty sets: + if (pageSize <= 0) { + pageSize = 1; + } + return totalRows / pageSize > querySettings.getHighThroughputActivationRatio(); } diff --git a/java-bigquery/google-cloud-bigquery-jdbc/src/test/java/com/google/cloud/bigquery/jdbc/BigQueryStatementTest.java b/java-bigquery/google-cloud-bigquery-jdbc/src/test/java/com/google/cloud/bigquery/jdbc/BigQueryStatementTest.java index 9fef90c69a4d..033cb72dcf8c 100644 --- a/java-bigquery/google-cloud-bigquery-jdbc/src/test/java/com/google/cloud/bigquery/jdbc/BigQueryStatementTest.java +++ b/java-bigquery/google-cloud-bigquery-jdbc/src/test/java/com/google/cloud/bigquery/jdbc/BigQueryStatementTest.java @@ -32,6 +32,7 @@ import com.google.cloud.bigquery.BigQueryOptions; import com.google.cloud.bigquery.Field; import com.google.cloud.bigquery.FieldList; +import com.google.cloud.bigquery.FieldValueList; import com.google.cloud.bigquery.Job; import com.google.cloud.bigquery.JobId; import com.google.cloud.bigquery.JobInfo; @@ -55,6 +56,7 @@ import java.io.IOException; import java.sql.ResultSet; import java.sql.SQLException; +import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -494,4 +496,98 @@ public void testGetStatementType(boolean isReadOnlyTokenUsed) throws Exception { verify(bigquery, isReadOnlyTokenUsed ? Mockito.never() : Mockito.times(1)) .create(any(JobInfo.class)); } + + @Test + public void testUseReadAPI_SafeguardSmallDataset() throws SQLException { + // Setup: totalRows < MinTableSize, so it should not activate the Read API + doReturn(true).when(bigQueryConnection).isEnableHighThroughputAPI(); + doReturn(100).when(bigQueryConnection).getHighThroughputMinTableSize(); + doReturn(2).when(bigQueryConnection).getHighThroughputActivationRatio(); + doReturn(1000L).when(bigQueryConnection).getMaxResults(); + + BigQueryStatement statement = new BigQueryStatement(bigQueryConnection); + TableResult tableResult = mock(TableResult.class); + doReturn(50L).when(tableResult).getTotalRows(); + + // Standard java collection in values + List valuesList = new ArrayList<>(); + for (int i = 0; i < 50; i++) { + valuesList.add(mock(FieldValueList.class)); + } + doReturn(valuesList).when(tableResult).getValues(); + + boolean useReadApi = statement.useReadAPI(tableResult); + assertThat(useReadApi).isFalse(); + } + + @Test + public void testUseReadAPI_SafeguardNoNextPage() throws SQLException { + // Setup: totalRows = 500 > MinTableSize (100), but hasNextPage() is false. + // Safeguard should prevent double-fetching and not activate the Read API. + doReturn(true).when(bigQueryConnection).isEnableHighThroughputAPI(); + doReturn(100).when(bigQueryConnection).getHighThroughputMinTableSize(); + doReturn(2).when(bigQueryConnection).getHighThroughputActivationRatio(); + doReturn(1000L).when(bigQueryConnection).getMaxResults(); + + BigQueryStatement statement = new BigQueryStatement(bigQueryConnection); + TableResult tableResult = mock(TableResult.class); + doReturn(500L).when(tableResult).getTotalRows(); + doReturn(false).when(tableResult).hasNextPage(); + + boolean useReadApi = statement.useReadAPI(tableResult); + assertThat(useReadApi).isFalse(); + } + + @Test + public void testUseReadAPI_MeetsRatio() throws SQLException { + // Setup: totalRows = 500, maxResultPerPage = 100, MinTableSize = 100, ActivationRatio = 2 + // ratio = 5 > 2, should activate Read API + doReturn(true).when(bigQueryConnection).isEnableHighThroughputAPI(); + doReturn(100).when(bigQueryConnection).getHighThroughputMinTableSize(); + doReturn(2).when(bigQueryConnection).getHighThroughputActivationRatio(); + doReturn(100L).when(bigQueryConnection).getMaxResults(); + + BigQueryStatement statement = new BigQueryStatement(bigQueryConnection); + TableResult tableResult = mock(TableResult.class); + doReturn(500L).when(tableResult).getTotalRows(); + doReturn(true).when(tableResult).hasNextPage(); + + boolean useReadApi = statement.useReadAPI(tableResult); + assertThat(useReadApi).isTrue(); + } + + @Test + public void testUseReadAPI_FailsMinTableSize() throws SQLException { + // Setup: totalRows = 80 < MinTableSize (100) + doReturn(true).when(bigQueryConnection).isEnableHighThroughputAPI(); + doReturn(100).when(bigQueryConnection).getHighThroughputMinTableSize(); + doReturn(2).when(bigQueryConnection).getHighThroughputActivationRatio(); + doReturn(1000L).when(bigQueryConnection).getMaxResults(); + + BigQueryStatement statement = new BigQueryStatement(bigQueryConnection); + TableResult tableResult = mock(TableResult.class); + doReturn(80L).when(tableResult).getTotalRows(); + + boolean useReadApi = statement.useReadAPI(tableResult); + assertThat(useReadApi).isFalse(); + } + + @Test + public void testUseReadAPI_ZeroPageSizeDivisionByZeroSafeguard() throws SQLException { + // Setup: totalRows = 500, MinTableSize = 100, ActivationRatio = 2, maxResultPerPage = 0 + // Verify that the division by zero check safely guards and falls back to pageSize = 1 + doReturn(true).when(bigQueryConnection).isEnableHighThroughputAPI(); + doReturn(100).when(bigQueryConnection).getHighThroughputMinTableSize(); + doReturn(2).when(bigQueryConnection).getHighThroughputActivationRatio(); + doReturn(0L).when(bigQueryConnection).getMaxResults(); // maxResultPerPage = 0 + + BigQueryStatement statement = new BigQueryStatement(bigQueryConnection); + TableResult tableResult = mock(TableResult.class); + doReturn(500L).when(tableResult).getTotalRows(); + doReturn(true).when(tableResult).hasNextPage(); + + // This should not throw ArithmeticException (/ by zero) and should evaluate safely + boolean useReadApi = statement.useReadAPI(tableResult); + assertThat(useReadApi).isTrue(); // ratio = 500 / 1 = 500 > 2 -> true + } }