Skip to content

Make TypeId const comparable #142789

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 18 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions compiler/rustc_codegen_cranelift/src/constant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,13 @@ pub(crate) fn codegen_const_value<'tcx>(
fx.module.declare_data_in_func(data_id, &mut fx.bcx.func);
fx.bcx.ins().global_value(fx.pointer_type, local_data_id)
}
GlobalAlloc::TypeId { .. } => {
return CValue::const_val(
fx,
layout,
ScalarInt::try_from_target_usize(offset.bytes(), fx.tcx).unwrap(),
);
}
GlobalAlloc::Static(def_id) => {
assert!(fx.tcx.is_static(def_id));
let data_id = data_id_for_static(
Expand Down Expand Up @@ -360,6 +367,7 @@ fn define_all_allocs(tcx: TyCtxt<'_>, module: &mut dyn Module, cx: &mut Constant
GlobalAlloc::Memory(alloc) => alloc,
GlobalAlloc::Function { .. }
| GlobalAlloc::Static(_)
| GlobalAlloc::TypeId { .. }
| GlobalAlloc::VTable(..) => {
unreachable!()
}
Expand Down Expand Up @@ -471,6 +479,11 @@ fn define_all_allocs(tcx: TyCtxt<'_>, module: &mut dyn Module, cx: &mut Constant
.principal()
.map(|principal| tcx.instantiate_bound_regions_with_erased(principal)),
),
GlobalAlloc::TypeId { .. } => {
// Nothing to do, the bytes/offset of this pointer have already been written together with all other bytes,
// so we just need to drop this provenance.
continue;
}
GlobalAlloc::Static(def_id) => {
if tcx.codegen_fn_attrs(def_id).flags.contains(CodegenFnAttrFlags::THREAD_LOCAL)
{
Expand Down
10 changes: 8 additions & 2 deletions compiler/rustc_codegen_gcc/src/common.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use gccjit::{LValue, RValue, ToRValue, Type};
use rustc_abi as abi;
use rustc_abi::HasDataLayout;
use rustc_abi::Primitive::Pointer;
use rustc_abi::{self as abi, HasDataLayout};
use rustc_codegen_ssa::traits::{
BaseTypeCodegenMethods, ConstCodegenMethods, MiscCodegenMethods, StaticCodegenMethods,
};
Expand Down Expand Up @@ -282,6 +281,13 @@ impl<'gcc, 'tcx> ConstCodegenMethods for CodegenCx<'gcc, 'tcx> {
let init = self.const_data_from_alloc(alloc);
self.static_addr_of(init, alloc.inner().align, None)
}
GlobalAlloc::TypeId { .. } => {
let val = self.const_usize(offset.bytes());
// This is still a variable of pointer type, even though we only use the provenance
// of that pointer in CTFE and Miri. But to make LLVM's type system happy,
// we need an int-to-ptr cast here (it doesn't matter at all which provenance that picks).
return self.context.new_cast(None, val, ty);
}
GlobalAlloc::Static(def_id) => {
assert!(self.tcx.is_static(def_id));
self.get_static(def_id).get_address(None)
Expand Down
18 changes: 11 additions & 7 deletions compiler/rustc_codegen_llvm/src/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,8 @@
use std::borrow::Borrow;

use libc::{c_char, c_uint};
use rustc_abi as abi;
use rustc_abi::HasDataLayout;
use rustc_abi::Primitive::Pointer;
use rustc_abi::{self as abi, HasDataLayout as _};
use rustc_ast::Mutability;
use rustc_codegen_ssa::common::TypeKind;
use rustc_codegen_ssa::traits::*;
Expand Down Expand Up @@ -284,7 +283,8 @@ impl<'ll, 'tcx> ConstCodegenMethods for CodegenCx<'ll, 'tcx> {
self.const_bitcast(llval, llty)
};
} else {
let init = const_alloc_to_llvm(self, alloc, /*static*/ false);
let init =
const_alloc_to_llvm(self, alloc.inner(), /*static*/ false);
let alloc = alloc.inner();
let value = match alloc.mutability {
Mutability::Mut => self.static_addr_of_mut(init, alloc.align, None),
Expand Down Expand Up @@ -316,15 +316,19 @@ impl<'ll, 'tcx> ConstCodegenMethods for CodegenCx<'ll, 'tcx> {
}),
)))
.unwrap_memory();
let init = const_alloc_to_llvm(self, alloc, /*static*/ false);
let value = self.static_addr_of_impl(init, alloc.inner().align, None);
value
let init = const_alloc_to_llvm(self, alloc.inner(), /*static*/ false);
self.static_addr_of_impl(init, alloc.inner().align, None)
}
GlobalAlloc::Static(def_id) => {
assert!(self.tcx.is_static(def_id));
assert!(!self.tcx.is_thread_local_static(def_id));
self.get_static(def_id)
}
GlobalAlloc::TypeId { .. } => {
// Drop the provenance, the offset contains the bytes of the hash
let llval = self.const_usize(offset.bytes());
return unsafe { llvm::LLVMConstIntToPtr(llval, llty) };
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we have to emit an int-to-ptr cast here? I think we should avoid that if at all possible...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

because the types say there are raw pointers inside the TypeId, so I assumed that was necessary

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah right we are constructing an LLVM constant of type "array of pointers"... I guess LLVM won't like is just putting an integer there.

Suggested change
return unsafe { llvm::LLVMConstIntToPtr(llval, llty) };
// This is still a variable of pointer type, even though we only use the provenance
// of that pointer in CTFE and Miri. But to make LLVM's type system happy,
// we need an int-to-ptr cast here (it doesn't matter at all which provenance that picks).
return unsafe { llvm::LLVMConstIntToPtr(llval, llty) };

}
};
let base_addr_space = global_alloc.address_space(self);
let llval = unsafe {
Expand All @@ -346,7 +350,7 @@ impl<'ll, 'tcx> ConstCodegenMethods for CodegenCx<'ll, 'tcx> {
}

fn const_data_from_alloc(&self, alloc: ConstAllocation<'_>) -> Self::Value {
const_alloc_to_llvm(self, alloc, /*static*/ false)
const_alloc_to_llvm(self, alloc.inner(), /*static*/ false)
}

fn const_ptr_byte_offset(&self, base_addr: Self::Value, offset: abi::Size) -> Self::Value {
Expand Down
5 changes: 2 additions & 3 deletions compiler/rustc_codegen_llvm/src/consts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,9 @@ use crate::{base, debuginfo};

pub(crate) fn const_alloc_to_llvm<'ll>(
cx: &CodegenCx<'ll, '_>,
alloc: ConstAllocation<'_>,
alloc: &Allocation,
is_static: bool,
) -> &'ll Value {
let alloc = alloc.inner();
// We expect that callers of const_alloc_to_llvm will instead directly codegen a pointer or
// integer for any &ZST where the ZST is a constant (i.e. not a static). We should never be
// producing empty LLVM allocations as they're just adding noise to binaries and forcing less
Expand Down Expand Up @@ -138,7 +137,7 @@ fn codegen_static_initializer<'ll, 'tcx>(
def_id: DefId,
) -> Result<(&'ll Value, ConstAllocation<'tcx>), ErrorHandled> {
let alloc = cx.tcx.eval_static_initializer(def_id)?;
Ok((const_alloc_to_llvm(cx, alloc, /*static*/ true), alloc))
Ok((const_alloc_to_llvm(cx, alloc.inner(), /*static*/ true), alloc))
}

fn set_global_alignment<'ll>(cx: &CodegenCx<'ll, '_>, gv: &'ll Value, mut align: Align) {
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_codegen_ssa/src/mir/operand.rs
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ impl<'a, 'tcx, V: CodegenObject> OperandRef<'tcx, V> {
offset: Size,
) -> Self {
let alloc_align = alloc.inner().align;
assert!(alloc_align >= layout.align.abi);
assert!(alloc_align >= layout.align.abi, "{alloc_align:?} < {:?}", layout.align.abi);

let read_scalar = |start, size, s: abi::Scalar, ty| {
match alloc.0.read_scalar(
Expand Down
2 changes: 2 additions & 0 deletions compiler/rustc_const_eval/messages.ftl
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ const_eval_dealloc_kind_mismatch =

const_eval_deref_function_pointer =
accessing {$allocation} which contains a function
const_eval_deref_typeid_pointer =
accessing {$allocation} which contains a `TypeId`
const_eval_deref_vtable_pointer =
accessing {$allocation} which contains a vtable
const_eval_division_by_zero =
Expand Down
6 changes: 5 additions & 1 deletion compiler/rustc_const_eval/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,7 @@ impl<'a> ReportErrorExt for UndefinedBehaviorInfo<'a> {
WriteToReadOnly(_) => const_eval_write_to_read_only,
DerefFunctionPointer(_) => const_eval_deref_function_pointer,
DerefVTablePointer(_) => const_eval_deref_vtable_pointer,
DerefTypeIdPointer(_) => const_eval_deref_typeid_pointer,
InvalidBool(_) => const_eval_invalid_bool,
InvalidChar(_) => const_eval_invalid_char,
InvalidTag(_) => const_eval_invalid_tag,
Expand Down Expand Up @@ -588,7 +589,10 @@ impl<'a> ReportErrorExt for UndefinedBehaviorInfo<'a> {
diag.arg("has", has.bytes());
diag.arg("msg", format!("{msg:?}"));
}
WriteToReadOnly(alloc) | DerefFunctionPointer(alloc) | DerefVTablePointer(alloc) => {
WriteToReadOnly(alloc)
| DerefFunctionPointer(alloc)
| DerefVTablePointer(alloc)
| DerefTypeIdPointer(alloc) => {
diag.arg("allocation", alloc);
}
InvalidBool(b) => {
Expand Down
75 changes: 70 additions & 5 deletions compiler/rustc_const_eval/src/interpret/intrinsics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

use std::assert_matches::assert_matches;

use rustc_abi::Size;
use rustc_abi::{FieldIdx, Size};
use rustc_apfloat::ieee::{Double, Half, Quad, Single};
use rustc_middle::mir::{self, BinOp, ConstValue, NonDivergingIntrinsic};
use rustc_middle::ty::layout::TyAndLayout;
Expand All @@ -28,8 +28,35 @@ pub(crate) fn alloc_type_name<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> ConstAll
let alloc = Allocation::from_bytes_byte_aligned_immutable(path.into_bytes(), ());
tcx.mk_const_alloc(alloc)
}

impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
/// Generates a value of `TypeId` for `ty` in-place.
pub(crate) fn write_type_id(
&mut self,
ty: Ty<'tcx>,
dest: &PlaceTy<'tcx, M::Provenance>,
) -> InterpResult<'tcx, ()> {
let tcx = self.tcx;
let type_id_hash = tcx.type_id_hash(ty).as_u128();
let op = self.const_val_to_op(
ConstValue::Scalar(Scalar::from_u128(type_id_hash)),
tcx.types.u128,
None,
)?;
self.copy_op_allow_transmute(&op, dest)?;

// Give the first pointer-size bytes provenance that knows about the type id.
// Here we rely on `TypeId` being a newtype around an array of pointers, so we
// first project to its only field and then the first array element.
let alloc_id = tcx.reserve_and_set_type_id_alloc(ty);
let first = self.project_field(dest, FieldIdx::ZERO)?;
let first = self.project_index(&first, 0)?;
let offset = self.read_scalar(&first)?.to_target_usize(&tcx)?;
let ptr = Pointer::new(alloc_id.into(), Size::from_bytes(offset));
let ptr = self.global_root_pointer(ptr)?;
let val = Scalar::from_pointer(ptr, &tcx);
self.write_scalar(val, &first)
}

/// Returns `true` if emulation happened.
/// Here we implement the intrinsics that are common to all Miri instances; individual machines can add their own
/// intrinsic handling.
Expand Down Expand Up @@ -63,9 +90,47 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
sym::type_id => {
let tp_ty = instance.args.type_at(0);
ensure_monomorphic_enough(tcx, tp_ty)?;
let val = ConstValue::from_u128(tcx.type_id_hash(tp_ty).as_u128());
let val = self.const_val_to_op(val, dest.layout.ty, Some(dest.layout))?;
self.copy_op(&val, dest)?;
self.write_type_id(tp_ty, dest)?;
}
sym::type_id_eq => {
// Both operands are `TypeId`, which is a newtype around an array of pointers.
// Project until we have the array elements.
let a_fields = self.project_field(&args[0], FieldIdx::ZERO)?;
let b_fields = self.project_field(&args[1], FieldIdx::ZERO)?;

let mut a_fields = self.project_array_fields(&a_fields)?;
let mut b_fields = self.project_array_fields(&b_fields)?;

let (_idx, a) = a_fields
.next(self)?
.expect("we know the layout of TypeId has at least 2 array elements");
let a = self.deref_pointer(&a)?;
let (a, offset_a) = self.get_ptr_type_id(a.ptr())?;

let (_idx, b) = b_fields
.next(self)?
.expect("we know the layout of TypeId has at least 2 array elements");
let b = self.deref_pointer(&b)?;
let (b, offset_b) = self.get_ptr_type_id(b.ptr())?;

let provenance_matches = a == b;

if offset_a != offset_b && provenance_matches {
throw_ub_format!("modifying `TypeId` internals is not permitted")
}

while let Some((_, a)) = a_fields.next(self)? {
let (_, b) = b_fields.next(self)?.unwrap();

let a = self.read_target_usize(&a)?;
let b = self.read_target_usize(&b)?;

if a != b && provenance_matches {
throw_ub_format!("modifying `TypeId` internals is not permitted")
}
}
Comment on lines +118 to +131
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if offset_a != offset_b && provenance_matches {
throw_ub_format!("modifying `TypeId` internals is not permitted")
}
while let Some((_, a)) = a_fields.next(self)? {
let (_, b) = b_fields.next(self)?.unwrap();
let a = self.read_target_usize(&a)?;
let b = self.read_target_usize(&b)?;
if a != b && provenance_matches {
throw_ub_format!("modifying `TypeId` internals is not permitted")
}
}
let mut byte_eq = offset_a == offset_b;
while let Some((_, a)) = a_fields.next(self)? {
let (_, b) = b_fields.next(self)?.unwrap();
let a = self.read_target_usize(&a)?;
let b = self.read_target_usize(&b)?;
byte_eq &= a == b;
}
if provenance_matches && !byte_eq {
throw_ub_format!("type_id_eq: one of the TypeId arguments is invalid, the hash does not match the type it represents");
}

Hm... even more ideal might be if we could actually compare against the hash? Then we'd properly UB even if both sides are the same, but they are the same invalid value. But we can also leave that to a future PR.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yea there is more UB we can catch, but technically it is library-UB

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With type_id and type_id_eq both being intrinsics / language primitive, I am basically saying we should view this as library UB.

But I'm happy to implement the more strict checks myself later. The suggestion above just makes the code a bit easier to follow IMO.


self.write_scalar(Scalar::from_bool(provenance_matches), dest)?;
}
sym::variant_count => {
let tp_ty = instance.args.type_at(0);
Expand Down
30 changes: 28 additions & 2 deletions compiler/rustc_const_eval/src/interpret/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ use std::{fmt, ptr};
use rustc_abi::{Align, HasDataLayout, Size};
use rustc_ast::Mutability;
use rustc_data_structures::fx::{FxHashSet, FxIndexMap};
use rustc_middle::bug;
use rustc_middle::mir::display_allocation;
use rustc_middle::ty::{self, Instance, Ty, TyCtxt};
use rustc_middle::{bug, throw_ub_format};
use tracing::{debug, instrument, trace};

use super::{
Expand Down Expand Up @@ -346,6 +346,13 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
kind = "vtable",
)
}
Some(GlobalAlloc::TypeId { .. }) => {
err_ub_custom!(
fluent::const_eval_invalid_dealloc,
alloc_id = alloc_id,
kind = "typeid",
)
}
Some(GlobalAlloc::Static(..) | GlobalAlloc::Memory(..)) => {
err_ub_custom!(
fluent::const_eval_invalid_dealloc,
Expand Down Expand Up @@ -615,6 +622,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
}
Some(GlobalAlloc::Function { .. }) => throw_ub!(DerefFunctionPointer(id)),
Some(GlobalAlloc::VTable(..)) => throw_ub!(DerefVTablePointer(id)),
Some(GlobalAlloc::TypeId { .. }) => throw_ub!(DerefTypeIdPointer(id)),
None => throw_ub!(PointerUseAfterFree(id, CheckInAllocMsg::MemoryAccess)),
Some(GlobalAlloc::Static(def_id)) => {
assert!(self.tcx.is_static(def_id));
Expand Down Expand Up @@ -896,7 +904,9 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
let (size, align) = global_alloc.size_and_align(*self.tcx, self.typing_env);
let mutbl = global_alloc.mutability(*self.tcx, self.typing_env);
let kind = match global_alloc {
GlobalAlloc::Static { .. } | GlobalAlloc::Memory { .. } => AllocKind::LiveData,
GlobalAlloc::TypeId { .. }
| GlobalAlloc::Static { .. }
| GlobalAlloc::Memory { .. } => AllocKind::LiveData,
GlobalAlloc::Function { .. } => bug!("We already checked function pointers above"),
GlobalAlloc::VTable { .. } => AllocKind::VTable,
};
Expand Down Expand Up @@ -936,6 +946,19 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
}
}

/// Assume that the pointer points to the first chunk of a `TypeId` and return the type that its
/// provenance refers to, as well as the segment of the hash that this pointer covers.
pub fn get_ptr_type_id(
&self,
ptr: Pointer<Option<M::Provenance>>,
) -> InterpResult<'tcx, (Ty<'tcx>, Size)> {
let (alloc_id, offset, _meta) = self.ptr_get_alloc_id(ptr, 0)?;
let GlobalAlloc::TypeId { ty } = self.tcx.global_alloc(alloc_id) else {
throw_ub_format!("type_id_eq: `TypeId` provenance is not a type id")
};
interp_ok((ty, offset))
}

pub fn get_ptr_fn(
&self,
ptr: Pointer<Option<M::Provenance>>,
Expand Down Expand Up @@ -1197,6 +1220,9 @@ impl<'a, 'tcx, M: Machine<'tcx>> std::fmt::Debug for DumpAllocs<'a, 'tcx, M> {
Some(GlobalAlloc::VTable(ty, dyn_ty)) => {
write!(fmt, " (vtable: impl {dyn_ty} for {ty})")?;
}
Some(GlobalAlloc::TypeId { ty }) => {
write!(fmt, " (typeid for {ty})")?;
}
Some(GlobalAlloc::Static(did)) => {
write!(fmt, " (static: {})", self.ecx.tcx.def_path_str(did))?;
}
Expand Down
6 changes: 5 additions & 1 deletion compiler/rustc_const_eval/src/interpret/projection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,11 @@ where
base: &'a P,
) -> InterpResult<'tcx, ArrayIterator<'a, 'tcx, M::Provenance, P>> {
let abi::FieldsShape::Array { stride, .. } = base.layout().fields else {
span_bug!(self.cur_span(), "project_array_fields: expected an array layout");
span_bug!(
self.cur_span(),
"project_array_fields: expected an array layout, got {:#?}",
base.layout()
);
};
let len = base.len(self)?;
let field_layout = base.layout().field(self, 0);
Expand Down
Loading