```#include <stdio.h>

/*
* All of this code is basically necessary in order to trigger the problem.
* It seems that if you don't shuffle enough with the floating point registers, this simply won't do anything.
* Note that this problem is being triggered whatever the environment is: 32 / 64 bits with or without SSE.
* I haven't tried anything else than Intel platforms though. See comments on main() for more details.
*/

class Vector3 {
public:
float x, y, z;

Vector3(float x, float y, float z) : x(x), y(y), z(z) { }
Vector3 & operator*=(float a) {
x *= a; y *= a; z *= a;
return *this;
}
Vector3 operator-() const { return Vector3(-x, -y, -z); }
};

class Matrix4;

class Matrix3 {
public:
float a0, a1, a2, b0, b1, b2, c0, c1, c2;

Matrix3(float a0, float a1, float a2,
float b0, float b1, float b2,
float c0, float c1, float c2) :
a0(a0), a1(a1), a2(a2),
b0(b0), b1(b1), b2(b2),
c0(c0), c1(c1), c2(c2) { }

explicit Matrix3(const Matrix4& m);

inline Matrix3 Transpose() const { return Matrix3(a0, b0, c0, a1, b1, c1, a2, b2, c2);}
};

class Matrix4 {
public:
float a0, a1, a2, a3, b0, b1, b2, b3, c0, c1, c2, c3, d0, d1, d2, d3;

Matrix4() :
a0(1.0f), a1(0.0f), a2(0.0f), a3(0.0f),
b0(0.0f), b1(1.0f), b2(0.0f), b3(0.0f),
c0(0.0f), c1(0.0f), c2(1.0f), c3(0.0f),
d0(0.0f), d1(0.0f), d2(0.0f), d3(1.0f) { }

explicit Matrix4(const Matrix3& m) :
a0(m.a0), a1(m.a1), a2(m.a2), a3(0.0f),
b0(m.b0), b1(m.b1), b2(m.b2), b3(0.0f),
c0(m.c0), c1(m.c1), c2(m.c2), c3(0.0f),
d0(0.0f), d1(0.0f), d2(0.0f), d3(1.0f) { }

/* This is the most offending part: this is a bad case of pointer aliasing. This is being used in Scale later. */
Vector3 *Row0AsVec3() { return (Vector3 *) &a0; }
Vector3 *Row1AsVec3() { return (Vector3 *) &b0; }
Vector3 *Row2AsVec3() { return (Vector3 *) &c0; }
Vector3 *Row3AsVec3() { return (Vector3 *) &d0; }
const Vector3 *Row0AsVec3() const { return (Vector3 *) &a0; }
const Vector3 *Row1AsVec3() const { return (Vector3 *) &b0; }
const Vector3 *Row2AsVec3() const { return (Vector3 *) &c0; }
const Vector3 *Row3AsVec3() const { return (Vector3 *) &d0; }

friend Vector3 operator*(const Vector3 & v, const Matrix4 & r);

void Translate(const Vector3 & move);
void Scale(float scale);

Matrix4 AffineInverse(float uniformScale) const;
};

Matrix3::Matrix3(const Matrix4& m) {
a0 = m.a0; a1 = m.a1; a2 = m.a2;
b0 = m.b0; b1 = m.b1; b2 = m.b2;
c0 = m.c0; c1 = m.c1; c2 = m.c2;
}

Vector3 operator*(const Vector3& v, const Matrix4& r) {
return Vector3(
v.x * r.a0 + v.y * r.b0 + v.z * r.c0 + r.d0,
v.x * r.a1 + v.y * r.b1 + v.z * r.c1 + r.d1,
v.x * r.a2 + v.y * r.b2 + v.z * r.c2 + r.d2);
}

void Matrix4::Translate(const Vector3 & move) {
d0 += move.x * a0 + move.y * b0 + move.z * c0;
d1 += move.x * a1 + move.y * b1 + move.z * c1;
d2 += move.x * a2 + move.y * b2 + move.z * c2;
}

/*
* This is the problematic function: when gcc is going to optimize it by inlining Scale in it,
* it seems it's going to think that only a0, b0 and c0 are being modified, and won't taint
* a1, a2, b1, b2, c1, c2 so that the old values will be kept before and after; also note the
* second usage of pointer aliasing there.
*/

Matrix4 Matrix4::AffineInverse(float uniformScale) const {
Matrix4 matrix(Matrix3(*this).Transpose());

matrix.Scale(1.0f / (uniformScale * uniformScale));
matrix.Translate(-*Row3AsVec3());
return matrix;
}

/*
* Here's the biggest problem. Changing this method to avoid pointer aliasing will make the whole thing work.
*/

void Matrix4::Scale(float scale) {
*Row0AsVec3() *= scale;
*Row1AsVec3() *= scale;
*Row2AsVec3() *= scale;
}

/*
* Simple testcase. A buggy gcc (4.2.1 to 4.3.2 or the redhat ones) will display "x = 2.857178, y = 316.570801, z = -24.159454",
* whereas a working one (4.4.0+ or 4.2.0-) will display "x = 2.857178, y = -11.428467, z = 1.904770"
*
* Note that with gcc 4.4.0+, you have to add -fno-strict-aliasing, otherwise you get "x = -247.555176, y = 301.496094, z = -23.009033",
* which isn't the case at all with gcc 4.2.0-, or gcc 4.3.3 and 4.3.4.
*/

int main() {
Vector3 A(2692, -3360, 267), B(2695, -3372, 269);
float s = 1.05f;
Matrix4 matrix;

matrix.Translate(A);
matrix.Scale(s);
matrix = matrix.AffineInverse(s);
Vector3 r = B * matrix;

fprintf(stderr, "x = %f, y = %f, z = %f\n", r.x, r.y, r.z);
}
```

Generated by GNU enscript 1.6.4.