PaddlePaddle · ooooo-create · Jun 18, 2025 · Jun 19, 2025 · Jun 19, 2025 · Jun 19, 2025
diff --git a/paddle/fluid/primitive/decomp_rule/decomp_rule/composite.h b/paddle/fluid/primitive/decomp_rule/decomp_rule/composite.h
@@ -1138,15 +1138,6 @@ Tensor embedding_decomp(const Tensor& x,
   Tensor weight_tmp = weight;
   Tensor res;
   if (has_dynamic_shape(x.shape())) {
-    if (padding_idx != NoPadding) {
-      Tensor put_shape = shape64<T>(sum<T>(weight, {0}, weight.dtype(), true));
-      Tensor padding_idx_tensor =
-          backend::full_with_tensor<T>(put_shape, padding_idx, DataType::INT64);
-      Tensor zeros =
-          backend::full_with_tensor<T>(put_shape, 0.0, weight.dtype());
-      weight_tmp = put_along_axis<T>(weight, padding_idx_tensor, zeros, 0);
-    }
-
     if (x.dims().size() <= 1) {
       res = gather<T>(weight_tmp, x);
       if (x.dims().size() == 0) {
@@ -1162,14 +1153,6 @@ Tensor embedding_decomp(const Tensor& x,
       res = backend::reshape<T>(out, res_t_shape);
     }
   } else {
-    if (padding_idx != NoPadding) {
-      std::vector<int64_t> put_shape{1, weight.dims()[1]};
-      Tensor padding_idx_tensor =
-          full<T>(put_shape, padding_idx, DataType::INT64, x.place());
-      Tensor zeros = full<T>(put_shape, 0.0, weight.dtype(), weight.place());
-      weight_tmp = put_along_axis<T>(weight, padding_idx_tensor, zeros, 0);
-    }
-
     if (x.dims().size() <= 1) {
       res = gather<T>(weight_tmp, x);
       if (x.dims().size() == 0) {

diff --git a/paddle/phi/kernels/cpu/embedding_kernel.cc b/paddle/phi/kernels/cpu/embedding_kernel.cc
@@ -50,40 +50,34 @@ struct EmbeddingCPUFunctor {
     auto* output = out_->data<T>();
 
     for (int64_t i = 0; i < ids_numel; ++i) {
-      if (padding_idx_ == kNoPadding && ids[i] != padding_idx_) {
-        PADDLE_ENFORCE_LT(
-            ids[i],
-            row_number,
-            common::errors::InvalidArgument(
-                "Variable value (input) of OP(fluid.layers.embedding) "
-                "expected >= 0 and < %ld, but got %ld. Please check input "
-                "value.",
-                row_number,
-                ids[i]));
-        PADDLE_ENFORCE_GE(
-            ids[i],
-            0,
-            common::errors::InvalidArgument(
-                "Variable value (input) of OP(fluid.layers.embedding) "
-                "expected >= 0 and < %ld, but got %ld. Please check input "
-                "value.",
-                row_number,
-                ids[i]));
-      }
+      PADDLE_ENFORCE_LT(
+          ids[i],
+          row_number,
+          common::errors::InvalidArgument(
+              "Variable value (input) of OP(paddle.nn.functional.embedding) "
+              "expected >= 0 and < %lld, but got %lld. Please check input "
+              "value.",
+              row_number,
+              ids[i]));
+      PADDLE_ENFORCE_GE(
+          ids[i],
+          0,
+          common::errors::InvalidArgument(
+              "Variable value (input) of OP(paddle.nn.functional.embedding) "
+              "expected >= 0 and < %lld, but got %lld. Please check input "
+              "value.",
+              row_number,
+              ids[i]));
     }
 
 #if defined(_OPENMP) && !defined(PADDLE_WITH_CUDA)
 #pragma omp parallel for
 #endif
 
     for (int64_t i = 0; i < ids_numel; ++i) {
-      if (padding_idx_ != kNoPadding && ids[i] == padding_idx_) {
-        memset(output + i * row_width, 0, row_width * sizeof(T));
-      } else {
-        memcpy(output + i * row_width,
-               table + ids[i] * row_width,
-               row_width * sizeof(T));
-      }
+      memcpy(output + i * row_width,
+             table + ids[i] * row_width,
+             row_width * sizeof(T));
     }
   }
 

diff --git a/paddle/phi/kernels/gpu/embedding_kernel.cu b/paddle/phi/kernels/gpu/embedding_kernel.cu
@@ -21,7 +21,7 @@
 #include "paddle/phi/kernels/funcs/embedding_util.h"
 namespace phi {
 
-template <typename T, typename IdT, bool PaddingFlag>
+template <typename T, typename IdT>
 __global__ void EmbeddingFW(T *output,
                             const T *table,
                             const IdT *ids,
@@ -34,27 +34,18 @@ __global__ void EmbeddingFW(T *output,
 
   while (idy < K) {
     auto id = static_cast<int64_t>(ids[idy]);
-    if (PaddingFlag == false || id != padding_idx) {
-      PADDLE_ENFORCE(id >= 0,
-                     "Id should no less than 0 but received an id value: %lld.",
-                     id);
-      PADDLE_ENFORCE(
-          id < N,
-          "Id should smaller than %lld but received an id value: %lld.",
-          N,
-          id);
-    }
+    PADDLE_ENFORCE(id >= 0,
+                   "Id should no less than 0 but received an id value: %lld.",
+                   id);
+    PADDLE_ENFORCE(
+        id < N,
+        "Id should smaller than %lld but received an id value: %lld.",
+        N,
+        id);
     T *out = output + idy * D;
     const T *tab = table + id * D;
     for (int i = idx; i < D; i += blockDim.x) {
-      if (PaddingFlag) {
-        if (id == padding_idx)
-          out[i] = static_cast<T>(0);
-        else
-          out[i] = tab[i];
-      } else {
-        out[i] = tab[i];
-      }
+      out[i] = tab[i];
     }
     idy += blockDim.y * gridDim.x;
   }
@@ -88,13 +79,8 @@ struct EmbeddingCUDAFunctor {
     auto *output = dev_ctx_.template Alloc<T>(out_);
     auto stream = dev_ctx_.stream();
 
-    if (padding_idx_ == -1) {
-      EmbeddingFW<T, IdT, false><<<grids, threads, 0, stream>>>(
-          output, table, ids, N, K, D, padding_idx_);
-    } else {
-      EmbeddingFW<T, IdT, true><<<grids, threads, 0, stream>>>(
-          output, table, ids, N, K, D, padding_idx_);
-    }
+    EmbeddingFW<T, IdT><<<grids, threads, 0, stream>>>(
+        output, table, ids, N, K, D, padding_idx_);
   }
 
  private:

diff --git a/test/legacy_test/test_lookup_table_v2_bf16_op.py b/test/legacy_test/test_lookup_table_v2_bf16_op.py
@@ -61,7 +61,6 @@ class TestLookupTableBF16OpWithPadding(TestLookupTableV2BF16Op):
     def test_check_output(self):
         ids = np.squeeze(self.inputs['Ids'])
         padding_idx = np.random.choice(ids, 1)[0]
-        self.outputs['Out'][ids == padding_idx] = np.zeros(31)
         self.attrs = {'padding_idx': int(padding_idx)}
         self.check_output_with_place(core.CPUPlace())
 
@@ -71,7 +70,6 @@ def test_check_output(self):
         ids = self.inputs['Ids']
         flatten_idx = ids.flatten()
         padding_idx = np.random.choice(flatten_idx, 1)[0]
-        self.outputs['Out'][np.squeeze(ids == padding_idx)] = np.zeros(31)
         self.attrs = {'padding_idx': int(padding_idx)}
         self.check_output_with_place(core.CPUPlace())
 

diff --git a/test/legacy_test/test_lookup_table_v2_op.py b/test/legacy_test/test_lookup_table_v2_op.py
@@ -124,7 +124,6 @@ class TestLookupTableOpWithPadding(TestLookupTableOp):
     def test_check_output(self):
         ids = np.squeeze(self.inputs['Ids'])
         padding_idx = np.random.choice(ids, 1)[0]
-        self.outputs['Out'][ids == padding_idx] = np.zeros(31)
         self.attrs = {'padding_idx': int(padding_idx)}
         self.check_output(check_cinn=True, check_pir=True, check_prim_pir=True)
 
@@ -139,7 +138,6 @@ def test_check_output(self):
         ids = self.inputs['Ids']
         flatten_idx = ids.flatten()
         padding_idx = np.random.choice(flatten_idx, 1)[0]
-        self.outputs['Out'][np.squeeze(ids == padding_idx)] = np.zeros(31)
         self.attrs = {'padding_idx': padding_idx}
         self.check_output(check_cinn=True, check_pir=True, check_prim_pir=True)