summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'lcms2mt/plugins/fast_float/src/fast_float_matsh.c')
-rw-r--r--lcms2mt/plugins/fast_float/src/fast_float_matsh.c323
1 files changed, 323 insertions, 0 deletions
diff --git a/lcms2mt/plugins/fast_float/src/fast_float_matsh.c b/lcms2mt/plugins/fast_float/src/fast_float_matsh.c
new file mode 100644
index 00000000..07396e59
--- /dev/null
+++ b/lcms2mt/plugins/fast_float/src/fast_float_matsh.c
@@ -0,0 +1,323 @@
+//---------------------------------------------------------------------------------
+//
+// Little Color Management System, fast floating point extensions
+// Copyright (c) 1998-2020 Marti Maria Saguer, all rights reserved
+//
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+//
+//---------------------------------------------------------------------------------
+
+// Optimization for matrix-shaper in float
+
+#include "fast_float_internal.h"
+
+
+// This is the private data container used by this optimization
+typedef struct {
+
+
+ cmsFloat32Number Mat[3][3];
+ cmsFloat32Number Off[3];
+
+ cmsFloat32Number Shaper1R[MAX_NODES_IN_CURVE];
+ cmsFloat32Number Shaper1G[MAX_NODES_IN_CURVE];
+ cmsFloat32Number Shaper1B[MAX_NODES_IN_CURVE];
+
+ cmsFloat32Number Shaper2R[MAX_NODES_IN_CURVE];
+ cmsFloat32Number Shaper2G[MAX_NODES_IN_CURVE];
+ cmsFloat32Number Shaper2B[MAX_NODES_IN_CURVE];
+
+ cmsBool UseOff;
+
+ void * real_ptr;
+
+} VXMatShaperFloatData;
+
+
+static
+VXMatShaperFloatData* malloc_aligned(cmsContext ContextID)
+{
+ cmsUInt8Number* real_ptr = (cmsUInt8Number*) _cmsMallocZero(ContextID, sizeof(VXMatShaperFloatData) + 32);
+ cmsUInt8Number* aligned = (cmsUInt8Number*) (((uintptr_t)real_ptr + 16) & ~0xf);
+ VXMatShaperFloatData* p = (VXMatShaperFloatData*) aligned;
+
+ p ->real_ptr = real_ptr;
+ return p;
+}
+
+
+
+// Free the private data container
+static
+void FreeMatShaper(cmsContext ContextID, void* Data)
+{
+ VXMatShaperFloatData* d = (VXMatShaperFloatData*)Data;
+
+ if (d != NULL)
+ _cmsFree(ContextID, d->real_ptr);
+}
+
+
+static
+void FillShaper(cmsContext ContextID, cmsFloat32Number* Table, cmsToneCurve* Curve)
+{
+ int i;
+ cmsFloat32Number R;
+
+ for (i = 0; i < MAX_NODES_IN_CURVE; i++) {
+
+ R = (cmsFloat32Number) i / (cmsFloat32Number) (MAX_NODES_IN_CURVE - 1);
+
+ Table[i] = cmsEvalToneCurveFloat(ContextID, Curve, R);
+ }
+}
+
+
+// Compute the matrix-shaper structure
+static
+VXMatShaperFloatData* SetMatShaper(cmsContext ContextID, cmsToneCurve* Curve1[3], cmsMAT3* Mat, cmsVEC3* Off, cmsToneCurve* Curve2[3])
+{
+ VXMatShaperFloatData* p;
+ int i, j;
+
+ // Allocate a big chuck of memory to store precomputed tables
+ p = malloc_aligned(ContextID);
+ if (p == NULL) return FALSE;
+
+
+ // Precompute tables
+ FillShaper(ContextID, p->Shaper1R, Curve1[0]);
+ FillShaper(ContextID, p->Shaper1G, Curve1[1]);
+ FillShaper(ContextID, p->Shaper1B, Curve1[2]);
+
+ FillShaper(ContextID, p->Shaper2R, Curve2[0]);
+ FillShaper(ContextID, p->Shaper2G, Curve2[1]);
+ FillShaper(ContextID, p->Shaper2B, Curve2[2]);
+
+
+ for (i=0; i < 3; i++) {
+ for (j=0; j < 3; j++) {
+ p->Mat[i][j] = (cmsFloat32Number) Mat->v[i].n[j];
+ }
+ }
+
+
+ for (i = 0; i < 3; i++) {
+
+ if (Off == NULL) {
+
+ p->UseOff = FALSE;
+ p->Off[i] = 0.0;
+ }
+ else {
+ p->UseOff = TRUE;
+ p->Off[i] = (cmsFloat32Number)Off->n[i];
+
+ }
+ }
+
+
+ return p;
+}
+
+
+
+// A fast matrix-shaper evaluator for floating point
+static
+void MatShaperFloat(cmsContext ContextID, struct _cmstransform_struct *CMMcargo,
+ const cmsFloat32Number* Input,
+ cmsFloat32Number* Output,
+ cmsUInt32Number len,
+ cmsUInt32Number Stride)
+{
+ VXMatShaperFloatData* p = (VXMatShaperFloatData*) _cmsGetTransformUserData(CMMcargo);
+ cmsFloat32Number l1, l2, l3;
+ cmsFloat32Number r, g, b;
+ cmsUInt32Number ii;
+ cmsUInt32Number SourceStartingOrder[cmsMAXCHANNELS];
+ cmsUInt32Number SourceIncrements[cmsMAXCHANNELS];
+ cmsUInt32Number DestStartingOrder[cmsMAXCHANNELS];
+ cmsUInt32Number DestIncrements[cmsMAXCHANNELS];
+
+ const cmsUInt8Number* rin;
+ const cmsUInt8Number* gin;
+ const cmsUInt8Number* bin;
+
+ cmsUInt8Number* rout;
+ cmsUInt8Number* gout;
+ cmsUInt8Number* bout;
+
+ cmsUInt32Number nchans, nalpha;
+
+ _cmsComputeComponentIncrements(cmsGetTransformInputFormat(ContextID, (cmsHTRANSFORM)CMMcargo), Stride, &nchans, &nalpha, SourceStartingOrder, SourceIncrements);
+ _cmsComputeComponentIncrements(cmsGetTransformOutputFormat(ContextID, (cmsHTRANSFORM)CMMcargo), Stride, &nchans, &nalpha, DestStartingOrder, DestIncrements);
+
+ rin = (const cmsUInt8Number*)Input + SourceStartingOrder[0];
+ gin = (const cmsUInt8Number*)Input + SourceStartingOrder[1];
+ bin = (const cmsUInt8Number*)Input + SourceStartingOrder[2];
+
+ rout = (cmsUInt8Number*)Output + DestStartingOrder[0];
+ gout = (cmsUInt8Number*)Output + DestStartingOrder[1];
+ bout = (cmsUInt8Number*)Output + DestStartingOrder[2];
+
+ for (ii=0; ii < len; ii++) {
+
+ r = flerp(p->Shaper1R, *(cmsFloat32Number*)rin);
+ g = flerp(p->Shaper1G, *(cmsFloat32Number*)gin);
+ b = flerp(p->Shaper1B, *(cmsFloat32Number*)bin);
+
+ l1 = p->Mat[0][0] * r + p->Mat[0][1] * g + p->Mat[0][2] * b ;
+ l2 = p->Mat[1][0] * r + p->Mat[1][1] * g + p->Mat[1][2] * b ;
+ l3 = p->Mat[2][0] * r + p->Mat[2][1] * g + p->Mat[2][2] * b ;
+
+ if (p->UseOff) {
+
+ l1 += p->Off[0];
+ l2 += p->Off[1];
+ l3 += p->Off[2];
+ }
+
+ *(cmsFloat32Number*)rout = flerp(p->Shaper2R, l1);
+ *(cmsFloat32Number*)gout = flerp(p->Shaper2G, l2);
+ *(cmsFloat32Number*)bout = flerp(p->Shaper2B, l3);
+
+ rin += SourceIncrements[0];
+ gin += SourceIncrements[1];
+ bin += SourceIncrements[2];
+
+ rout += DestIncrements[0];
+ gout += DestIncrements[1];
+ bout += DestIncrements[2];
+ }
+
+}
+
+
+
+cmsBool OptimizeFloatMatrixShaper(cmsContext ContextID,
+ _cmsTransformFn* TransformFn,
+ void** UserData,
+ _cmsFreeUserDataFn* FreeUserData,
+ cmsPipeline** Lut,
+ cmsUInt32Number* InputFormat,
+ cmsUInt32Number* OutputFormat,
+ cmsUInt32Number* dwFlags)
+{
+ cmsStage* Curve1, *Curve2;
+ cmsStage* Matrix1, *Matrix2;
+ _cmsStageMatrixData* Data1;
+ _cmsStageMatrixData* Data2;
+ cmsMAT3 res;
+ cmsBool IdentityMat = FALSE;
+ cmsPipeline* Dest, *Src;
+ cmsUInt32Number nChans;
+ cmsFloat64Number factor = 1.0;
+
+
+ // Apply only to floating-point cases
+ if (!T_FLOAT(*InputFormat) || !T_FLOAT(*OutputFormat)) return FALSE;
+
+ // Only works on RGB to RGB and gray to gray
+ if ( !( (T_CHANNELS(*InputFormat) == 3 && T_CHANNELS(*OutputFormat) == 3)) &&
+ !( (T_CHANNELS(*InputFormat) == 1 && T_CHANNELS(*OutputFormat) == 1))) return FALSE;
+
+ // Only works on float
+ if (T_BYTES(*InputFormat) != 4 || T_BYTES(*OutputFormat) != 4) return FALSE;
+
+ // Seems suitable, proceed
+ Src = *Lut;
+
+ // Check for shaper-matrix-matrix-shaper structure, that is what this optimizer stands for
+ if (!cmsPipelineCheckAndRetreiveStages(ContextID, Src, 4,
+ cmsSigCurveSetElemType, cmsSigMatrixElemType, cmsSigMatrixElemType, cmsSigCurveSetElemType,
+ &Curve1, &Matrix1, &Matrix2, &Curve2)) return FALSE;
+
+ nChans = T_CHANNELS(*InputFormat);
+
+ // Get both matrices, which are 3x3
+ Data1 = (_cmsStageMatrixData*) cmsStageData(ContextID, Matrix1);
+ Data2 = (_cmsStageMatrixData*) cmsStageData(ContextID, Matrix2);
+
+ // Input offset should be zero
+ if (Data1 ->Offset != NULL) return FALSE;
+
+ if (cmsStageInputChannels(ContextID, Matrix1) == 1 && cmsStageOutputChannels(ContextID, Matrix2) == 1)
+ {
+ // This is a gray to gray. Just multiply
+ factor = Data1->Double[0]*Data2->Double[0] +
+ Data1->Double[1]*Data2->Double[1] +
+ Data1->Double[2]*Data2->Double[2];
+
+ if (fabs(1 - factor) < (1.0 / 65535.0)) IdentityMat = TRUE;
+ }
+ else
+ {
+ // Multiply both matrices to get the result
+ _cmsMAT3per(ContextID, &res, (cmsMAT3*) Data2 ->Double, (cmsMAT3*) Data1 ->Double);
+
+ // Now the result is in res + Data2 -> Offset. Maybe is a plain identity?
+ IdentityMat = FALSE;
+ if (_cmsMAT3isIdentity(ContextID, &res) && Data2 ->Offset == NULL) {
+
+ // We can get rid of full matrix
+ IdentityMat = TRUE;
+ }
+ }
+
+ // Allocate an empty LUT
+ Dest = cmsPipelineAlloc(ContextID, nChans, nChans);
+ if (!Dest) return FALSE;
+
+ // Assamble the new LUT
+ cmsPipelineInsertStage(ContextID, Dest, cmsAT_BEGIN, cmsStageDup(ContextID, Curve1));
+
+ if (!IdentityMat) {
+
+ if (nChans == 1)
+ cmsPipelineInsertStage(ContextID, Dest, cmsAT_END,
+ cmsStageAllocMatrix(ContextID, 1, 1, (const cmsFloat64Number*) &factor, Data2->Offset));
+ else
+ cmsPipelineInsertStage(ContextID, Dest, cmsAT_END,
+ cmsStageAllocMatrix(ContextID, 3, 3, (const cmsFloat64Number*) &res, Data2 ->Offset));
+ }
+
+
+ cmsPipelineInsertStage(ContextID, Dest, cmsAT_END, cmsStageDup(ContextID, Curve2));
+
+ // If identity on matrix, we can further optimize the curves, so call the join curves routine
+ if (IdentityMat) {
+
+ OptimizeFloatByJoiningCurves(ContextID, TransformFn, UserData, FreeUserData, &Dest, InputFormat, OutputFormat, dwFlags);
+ }
+ else {
+ _cmsStageToneCurvesData* mpeC1 = (_cmsStageToneCurvesData*) cmsStageData(ContextID, Curve1);
+ _cmsStageToneCurvesData* mpeC2 = (_cmsStageToneCurvesData*) cmsStageData(ContextID, Curve2);
+
+ // In this particular optimization, caché does not help as it takes more time to deal with
+ // the cachthat with the pixel handling
+ *dwFlags |= cmsFLAGS_NOCACHE;
+
+ // Setup the optimizarion routines
+ *UserData = SetMatShaper(ContextID, mpeC1 ->TheCurves, &res, (cmsVEC3*) Data2 ->Offset, mpeC2->TheCurves);
+ *FreeUserData = FreeMatShaper;
+
+ *TransformFn = (_cmsTransformFn) MatShaperFloat;
+ }
+
+ *dwFlags &= ~cmsFLAGS_CAN_CHANGE_FORMATTER;
+ cmsPipelineFree(ContextID, Src);
+ *Lut = Dest;
+ return TRUE;
+}