diff --git a/infini_train/src/autograd/comm.cc b/infini_train/src/autograd/comm.cc index d524088a..db62efc3 100644 --- a/infini_train/src/autograd/comm.cc +++ b/infini_train/src/autograd/comm.cc @@ -18,7 +18,6 @@ Scatter::Scatter(const std::vector &target_gpus, int64_t dim, std::vector> Scatter::Forward(const std::vector> &input_tensors) { const auto &input = input_tensors[0]; std::vector> output_tensors; - auto device = input->GetDevice().type(); output_tensors = pg_->Scatter(input, target_gpus_, dim_); return output_tensors; } @@ -51,7 +50,7 @@ std::vector> Gather::Forward(const std::vectorGetDevice().type(); + return {pg_->Gather(input_tensors, target_device_, dim_)}; } diff --git a/infini_train/src/kernels/cpu/linear.cc b/infini_train/src/kernels/cpu/linear.cc index 9d28a92a..bc472416 100644 --- a/infini_train/src/kernels/cpu/linear.cc +++ b/infini_train/src/kernels/cpu/linear.cc @@ -22,7 +22,6 @@ std::shared_ptr LinearForward(const std::shared_ptr &input, cons const auto &input_dims = input->Dims(); CHECK_GE(input_dims.size(), 2); - const int64_t bs = std::accumulate(input_dims.rbegin() + 1, input_dims.rend(), 1, std::multiplies{}); const int64_t in_features = *input_dims.rbegin(); const auto &weight_dims = weight->Dims(); diff --git a/infini_train/src/kernels/cpu/reduction.cc b/infini_train/src/kernels/cpu/reduction.cc index 0aa936ba..602aefb2 100644 --- a/infini_train/src/kernels/cpu/reduction.cc +++ b/infini_train/src/kernels/cpu/reduction.cc @@ -18,7 +18,6 @@ std::shared_ptr ReduceOpForward(const std::shared_ptr &input, co CHECK_LT(actual_dim, input_dims.size()); std::vector output_dims = input_dims; - const int64_t reduce_size = input_dims[dim]; if (keep_dim) { output_dims[actual_dim] = 1; } else { diff --git a/infini_train/src/nn/modules/transformer/causal_self_attention.cc b/infini_train/src/nn/modules/transformer/causal_self_attention.cc index 5ea9eec5..8bed8193 100644 --- a/infini_train/src/nn/modules/transformer/causal_self_attention.cc +++ b/infini_train/src/nn/modules/transformer/causal_self_attention.cc @@ -88,7 +88,6 @@ CausalSelfAttention::ForwardStandard(const std::vectorDims()[0]; // bs - const auto C = x[0]->Dims()[2]; // n_embd const int64_t head_dim = n_embd_ / n_head_; // per-head dim (global) const int64_t local_C = n_embd_ / tp_world_size; // per-rank hidden diff --git a/infini_train/src/nn/modules/transformer/mlp.cc b/infini_train/src/nn/modules/transformer/mlp.cc index 3af341b2..9f1f488c 100644 --- a/infini_train/src/nn/modules/transformer/mlp.cc +++ b/infini_train/src/nn/modules/transformer/mlp.cc @@ -35,7 +35,6 @@ MLP::MLP(const TransformerConfig &config) : CloneableModule(kType) { } // Round up to multiple_of - int64_t before_round = ffn_hidden; ffn_hidden = (ffn_hidden + config.multiple_of - 1) / config.multiple_of * config.multiple_of; // c_fc: ColumnParallel (input full, output parallel) diff --git a/infini_train/src/nn/parallel/ddp/reducer.cc b/infini_train/src/nn/parallel/ddp/reducer.cc index 031fa428..1bdd29e1 100644 --- a/infini_train/src/nn/parallel/ddp/reducer.cc +++ b/infini_train/src/nn/parallel/ddp/reducer.cc @@ -368,7 +368,6 @@ void Reducer::MarkBucketReady(size_t bucket_index) { } // From next_bucket_, launch ready buckets(pending==0) in turn while (next_bucket_ < buckets_.size() && buckets_[next_bucket_].pending == 0) { - auto &bucket = buckets_[next_bucket_]; FinalizeBucketDense(next_bucket_); ++next_bucket_; } diff --git a/infini_train/src/nn/parallel/tensor_parallel.cc b/infini_train/src/nn/parallel/tensor_parallel.cc index 44ab8189..fc01007b 100644 --- a/infini_train/src/nn/parallel/tensor_parallel.cc +++ b/infini_train/src/nn/parallel/tensor_parallel.cc @@ -442,7 +442,6 @@ VocabParallelCrossEntropy::Forward(const std::vector> &i auto logits = std::make_shared(input_tensors[0]->To(DataType::kFLOAT32)); auto target = input_tensors[1]; - auto dtype = logits->Dtype(); auto device = logits->GetDevice(); CHECK(target->Dtype() == DataType::kINT64) << "target must be int64"; diff --git a/infini_train/src/tensor.cc b/infini_train/src/tensor.cc index 3c2ae69b..2b5408d7 100644 --- a/infini_train/src/tensor.cc +++ b/infini_train/src/tensor.cc @@ -786,7 +786,6 @@ void Tensor::Print(std::ostream &os) const { CHECK(dtype_ == DataType::kFLOAT32); const size_t num_elements = NumElements(); - const size_t num_bytes = num_elements * sizeof(float); auto impl = core::GetDeviceGuardImpl(GetDevice().type());