-
Notifications
You must be signed in to change notification settings - Fork 1.1k
fix(bqjdbc): optimize meetsReadRatio latency to achieve faster page counting #13090
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -55,7 +55,6 @@ | |
| import com.google.cloud.bigquery.storage.v1.ReadSession; | ||
| import com.google.common.annotations.VisibleForTesting; | ||
| import com.google.common.collect.ImmutableList; | ||
| import com.google.common.collect.Iterators; | ||
| import com.google.common.util.concurrent.Uninterruptibles; | ||
| import java.lang.ref.ReferenceQueue; | ||
| import java.sql.Connection; | ||
|
|
@@ -944,15 +943,21 @@ private boolean meetsReadRatio(TableResult results) { | |
| LOG.finest("++enter++"); | ||
| long totalRows = results.getTotalRows(); | ||
|
|
||
| if (totalRows == 0 || totalRows < querySettings.getHighThroughputMinTableSize()) { | ||
| // SAFEGUARD: If all data has already been retrieved in the first page, | ||
| // NEVER switch to the Read API as it would discard in-memory data and cause a double-fetch. | ||
| if (totalRows == 0 | ||
| || totalRows < querySettings.getHighThroughputMinTableSize() | ||
| || !results.hasNextPage()) { | ||
| return false; | ||
| } | ||
|
|
||
| // TODO(BQ Team): TableResult doesnt expose the number of records in the current page, hence the | ||
| // below log iterates and counts. This is inefficient and we may eventually want to expose | ||
| // PageSize with TableResults | ||
| // TODO(Obada): Scope for performance optimization. | ||
| int pageSize = Iterators.size(results.getValues().iterator()); | ||
| long pageSize = querySettings.getMaxResultPerPage(); | ||
|
|
||
| // Prevent division by zero due to potential overflows/empty sets: | ||
| if (pageSize <= 0) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's an impossible condition. It can't be < 0 and if totalRows is 0 is already checked
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I still think we don't need this. getMaxResultPerPage() shouldn't be <= 0. We can add this check during connection creation, but we shouldn't check it when we use it |
||
| pageSize = 1; | ||
| } | ||
|
|
||
| return totalRows / pageSize > querySettings.getHighThroughputActivationRatio(); | ||
| } | ||
|
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.