yichael
/
image-match


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161
							/*
 * Copyright (c) 1988-1997 Sam Leffler
 * Copyright (c) 1991-1997 Silicon Graphics, Inc.
 *
 * Permission to use, copy, modify, distribute, and sell this software and
 * its documentation for any purpose is hereby granted without fee, provided
 * that (i) the above copyright notices and this permission notice appear in
 * all copies of the software and related documentation, and (ii) the names of
 * Sam Leffler and Silicon Graphics may not be used in any advertising or
 * publicity relating to the software without the specific, prior written
 * permission of Sam Leffler and Silicon Graphics.
 *
 * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
 *
 * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
 * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
 * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF
 * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
 * OF THIS SOFTWARE.
 */

/*
 * TIFF Library.
 *
 * Predictor Tag Support (used by multiple codecs).
 */
#include "tif_predict.h"
#include "tiffiop.h"

#if defined(__x86_64__) || defined(_M_X64)
#include <emmintrin.h>
#endif

#define PredictorState(tif) ((TIFFPredictorState *)(tif)->tif_data)

static int horAcc8(TIFF *tif, uint8_t *cp0, tmsize_t cc);
static int horAcc16(TIFF *tif, uint8_t *cp0, tmsize_t cc);
static int horAcc32(TIFF *tif, uint8_t *cp0, tmsize_t cc);
static int horAcc64(TIFF *tif, uint8_t *cp0, tmsize_t cc);
static int swabHorAcc16(TIFF *tif, uint8_t *cp0, tmsize_t cc);
static int swabHorAcc32(TIFF *tif, uint8_t *cp0, tmsize_t cc);
static int swabHorAcc64(TIFF *tif, uint8_t *cp0, tmsize_t cc);
static int horDiff8(TIFF *tif, uint8_t *cp0, tmsize_t cc);
static int horDiff16(TIFF *tif, uint8_t *cp0, tmsize_t cc);
static int horDiff32(TIFF *tif, uint8_t *cp0, tmsize_t cc);
static int horDiff64(TIFF *tif, uint8_t *cp0, tmsize_t cc);
static int swabHorDiff16(TIFF *tif, uint8_t *cp0, tmsize_t cc);
static int swabHorDiff32(TIFF *tif, uint8_t *cp0, tmsize_t cc);
static int swabHorDiff64(TIFF *tif, uint8_t *cp0, tmsize_t cc);
static int fpAcc(TIFF *tif, uint8_t *cp0, tmsize_t cc);
static int fpDiff(TIFF *tif, uint8_t *cp0, tmsize_t cc);
static int PredictorDecodeRow(TIFF *tif, uint8_t *op0, tmsize_t occ0,
                              uint16_t s);
static int PredictorDecodeTile(TIFF *tif, uint8_t *op0, tmsize_t occ0,
                               uint16_t s);
static int PredictorEncodeRow(TIFF *tif, uint8_t *bp, tmsize_t cc, uint16_t s);
static int PredictorEncodeTile(TIFF *tif, uint8_t *bp0, tmsize_t cc0,
                               uint16_t s);

static int PredictorSetup(TIFF *tif)
{
    static const char module[] = "PredictorSetup";

    TIFFPredictorState *sp = PredictorState(tif);
    TIFFDirectory *td = &tif->tif_dir;

    switch (sp->predictor) /* no differencing */
    {
        case PREDICTOR_NONE:
            return 1;
        case PREDICTOR_HORIZONTAL:
            if (td->td_bitspersample != 8 && td->td_bitspersample != 16 &&
                td->td_bitspersample != 32 && td->td_bitspersample != 64)
            {
                TIFFErrorExtR(tif, module,
                              "Horizontal differencing \"Predictor\" not "
                              "supported with %" PRIu16 "-bit samples",
                              td->td_bitspersample);
                return 0;
            }
            break;
        case PREDICTOR_FLOATINGPOINT:
            if (td->td_sampleformat != SAMPLEFORMAT_IEEEFP)
            {
                TIFFErrorExtR(
                    tif, module,
                    "Floating point \"Predictor\" not supported with %" PRIu16
                    " data format",
                    td->td_sampleformat);
                return 0;
            }
            if (td->td_bitspersample != 16 && td->td_bitspersample != 24 &&
                td->td_bitspersample != 32 && td->td_bitspersample != 64)
            { /* Should 64 be allowed? */
                TIFFErrorExtR(
                    tif, module,
                    "Floating point \"Predictor\" not supported with %" PRIu16
                    "-bit samples",
                    td->td_bitspersample);
                return 0;
            }
            break;
        default:
            TIFFErrorExtR(tif, module, "\"Predictor\" value %d not supported",
                          sp->predictor);
            return 0;
    }
    sp->stride =
        (td->td_planarconfig == PLANARCONFIG_CONTIG ? td->td_samplesperpixel
                                                    : 1);
    /*
     * Calculate the scanline/tile-width size in bytes.
     */
    if (isTiled(tif))
        sp->rowsize = TIFFTileRowSize(tif);
    else
        sp->rowsize = TIFFScanlineSize(tif);
    if (sp->rowsize == 0)
        return 0;

    return 1;
}

static int PredictorSetupDecode(TIFF *tif)
{
    TIFFPredictorState *sp = PredictorState(tif);
    TIFFDirectory *td = &tif->tif_dir;

    /* Note: when PredictorSetup() fails, the effets of setupdecode() */
    /* will not be "canceled" so setupdecode() might be robust to */
    /* be called several times. */
    if (!(*sp->setupdecode)(tif) || !PredictorSetup(tif))
        return 0;

    if (sp->predictor == 2)
    {
        switch (td->td_bitspersample)
        {
            case 8:
                sp->decodepfunc = horAcc8;
                break;
            case 16:
                sp->decodepfunc = horAcc16;
                break;
            case 32:
                sp->decodepfunc = horAcc32;
                break;
            case 64:
                sp->decodepfunc = horAcc64;
                break;
        }
        /*
         * Override default decoding method with one that does the
         * predictor stuff.
         */
        if (tif->tif_decoderow != PredictorDecodeRow)
        {
            sp->decoderow = tif->tif_decoderow;
            tif->tif_decoderow = PredictorDecodeRow;
            sp->decodestrip = tif->tif_decodestrip;
            tif->tif_decodestrip = PredictorDecodeTile;
            sp->decodetile = tif->tif_decodetile;
            tif->tif_decodetile = PredictorDecodeTile;
        }

        /*
         * If the data is horizontally differenced 16-bit data that
         * requires byte-swapping, then it must be byte swapped before
         * the accumulation step.  We do this with a special-purpose
         * routine and override the normal post decoding logic that
         * the library setup when the directory was read.
         */
        if (tif->tif_flags & TIFF_SWAB)
        {
            if (sp->decodepfunc == horAcc16)
            {
                sp->decodepfunc = swabHorAcc16;
                tif->tif_postdecode = _TIFFNoPostDecode;
            }
            else if (sp->decodepfunc == horAcc32)
            {
                sp->decodepfunc = swabHorAcc32;
                tif->tif_postdecode = _TIFFNoPostDecode;
            }
            else if (sp->decodepfunc == horAcc64)
            {
                sp->decodepfunc = swabHorAcc64;
                tif->tif_postdecode = _TIFFNoPostDecode;
            }
        }
    }

    else if (sp->predictor == 3)
    {
        sp->decodepfunc = fpAcc;
        /*
         * Override default decoding method with one that does the
         * predictor stuff.
         */
        if (tif->tif_decoderow != PredictorDecodeRow)
        {
            sp->decoderow = tif->tif_decoderow;
            tif->tif_decoderow = PredictorDecodeRow;
            sp->decodestrip = tif->tif_decodestrip;
            tif->tif_decodestrip = PredictorDecodeTile;
            sp->decodetile = tif->tif_decodetile;
            tif->tif_decodetile = PredictorDecodeTile;
        }
        /*
         * The data should not be swapped outside of the floating
         * point predictor, the accumulation routine should return
         * bytes in the native order.
         */
        if (tif->tif_flags & TIFF_SWAB)
        {
            tif->tif_postdecode = _TIFFNoPostDecode;
        }
    }

    return 1;
}

static int PredictorSetupEncode(TIFF *tif)
{
    TIFFPredictorState *sp = PredictorState(tif);
    TIFFDirectory *td = &tif->tif_dir;

    if (!(*sp->setupencode)(tif) || !PredictorSetup(tif))
        return 0;

    if (sp->predictor == 2)
    {
        switch (td->td_bitspersample)
        {
            case 8:
                sp->encodepfunc = horDiff8;
                break;
            case 16:
                sp->encodepfunc = horDiff16;
                break;
            case 32:
                sp->encodepfunc = horDiff32;
                break;
            case 64:
                sp->encodepfunc = horDiff64;
                break;
        }
        /*
         * Override default encoding method with one that does the
         * predictor stuff.
         */
        if (tif->tif_encoderow != PredictorEncodeRow)
        {
            sp->encoderow = tif->tif_encoderow;
            tif->tif_encoderow = PredictorEncodeRow;
            sp->encodestrip = tif->tif_encodestrip;
            tif->tif_encodestrip = PredictorEncodeTile;
            sp->encodetile = tif->tif_encodetile;
            tif->tif_encodetile = PredictorEncodeTile;
        }

        /*
         * If the data is horizontally differenced 16-bit data that
         * requires byte-swapping, then it must be byte swapped after
         * the differentiation step.  We do this with a special-purpose
         * routine and override the normal post decoding logic that
         * the library setup when the directory was read.
         */
        if (tif->tif_flags & TIFF_SWAB)
        {
            if (sp->encodepfunc == horDiff16)
            {
                sp->encodepfunc = swabHorDiff16;
                tif->tif_postdecode = _TIFFNoPostDecode;
            }
            else if (sp->encodepfunc == horDiff32)
            {
                sp->encodepfunc = swabHorDiff32;
                tif->tif_postdecode = _TIFFNoPostDecode;
            }
            else if (sp->encodepfunc == horDiff64)
            {
                sp->encodepfunc = swabHorDiff64;
                tif->tif_postdecode = _TIFFNoPostDecode;
            }
        }
    }

    else if (sp->predictor == 3)
    {
        sp->encodepfunc = fpDiff;
        /*
         * Override default encoding method with one that does the
         * predictor stuff.
         */
        if (tif->tif_encoderow != PredictorEncodeRow)
        {
            sp->encoderow = tif->tif_encoderow;
            tif->tif_encoderow = PredictorEncodeRow;
            sp->encodestrip = tif->tif_encodestrip;
            tif->tif_encodestrip = PredictorEncodeTile;
            sp->encodetile = tif->tif_encodetile;
            tif->tif_encodetile = PredictorEncodeTile;
        }
        /*
         * The data should not be swapped outside of the floating
         * point predictor, the differentiation routine should return
         * bytes in the native order.
         */
        if (tif->tif_flags & TIFF_SWAB)
        {
            tif->tif_postdecode = _TIFFNoPostDecode;
        }
    }

    return 1;
}

#define REPEAT4(n, op)                                                         \
    switch (n)                                                                 \
    {                                                                          \
        default:                                                               \
        {                                                                      \
            tmsize_t i;                                                        \
            for (i = n - 4; i > 0; i--)                                        \
            {                                                                  \
                op;                                                            \
            }                                                                  \
        } /*-fallthrough*/                                                     \
        case 4:                                                                \
            op; /*-fallthrough*/                                               \
        case 3:                                                                \
            op; /*-fallthrough*/                                               \
        case 2:                                                                \
            op; /*-fallthrough*/                                               \
        case 1:                                                                \
            op; /*-fallthrough*/                                               \
        case 0:;                                                               \
    }

/* Remarks related to C standard compliance in all below functions : */
/* - to avoid any undefined behavior, we only operate on unsigned types */
/*   since the behavior of "overflows" is defined (wrap over) */
/* - when storing into the byte stream, we explicitly mask with 0xff so */
/*   as to make icc -check=conversions happy (not necessary by the standard) */

TIFF_NOSANITIZE_UNSIGNED_INT_OVERFLOW
static int horAcc8(TIFF *tif, uint8_t *cp0, tmsize_t cc)
{
    tmsize_t stride = PredictorState(tif)->stride;

    uint8_t *cp = cp0;
    if ((cc % stride) != 0)
    {
        TIFFErrorExtR(tif, "horAcc8", "%s", "(cc%stride)!=0");
        return 0;
    }

    if (cc > stride)
    {
        /*
         * Pipeline the most common cases.
         */
        if (stride == 1)
        {
            uint32_t acc = cp[0];
            tmsize_t i = stride;
            for (; i < cc - 3; i += 4)
            {
                cp[i + 0] = (uint8_t)((acc += cp[i + 0]) & 0xff);
                cp[i + 1] = (uint8_t)((acc += cp[i + 1]) & 0xff);
                cp[i + 2] = (uint8_t)((acc += cp[i + 2]) & 0xff);
                cp[i + 3] = (uint8_t)((acc += cp[i + 3]) & 0xff);
            }
            for (; i < cc; i++)
            {
                cp[i + 0] = (uint8_t)((acc += cp[i + 0]) & 0xff);
            }
        }
        else if (stride == 3)
        {
            uint32_t cr = cp[0];
            uint32_t cg = cp[1];
            uint32_t cb = cp[2];
            tmsize_t i = stride;
            for (; i < cc; i += stride)
            {
                cp[i + 0] = (uint8_t)((cr += cp[i + 0]) & 0xff);
                cp[i + 1] = (uint8_t)((cg += cp[i + 1]) & 0xff);
                cp[i + 2] = (uint8_t)((cb += cp[i + 2]) & 0xff);
            }
        }
        else if (stride == 4)
        {
            uint32_t cr = cp[0];
            uint32_t cg = cp[1];
            uint32_t cb = cp[2];
            uint32_t ca = cp[3];
            tmsize_t i = stride;
            for (; i < cc; i += stride)
            {
                cp[i + 0] = (uint8_t)((cr += cp[i + 0]) & 0xff);
                cp[i + 1] = (uint8_t)((cg += cp[i + 1]) & 0xff);
                cp[i + 2] = (uint8_t)((cb += cp[i + 2]) & 0xff);
                cp[i + 3] = (uint8_t)((ca += cp[i + 3]) & 0xff);
            }
        }
        else
        {
            cc -= stride;
            do
            {
                REPEAT4(stride,
                        cp[stride] = (uint8_t)((cp[stride] + *cp) & 0xff);
                        cp++)
                cc -= stride;
            } while (cc > 0);
        }
    }
    return 1;
}

static int swabHorAcc16(TIFF *tif, uint8_t *cp0, tmsize_t cc)
{
    uint16_t *wp = (uint16_t *)cp0;
    tmsize_t wc = cc / 2;

    TIFFSwabArrayOfShort(wp, wc);
    return horAcc16(tif, cp0, cc);
}

TIFF_NOSANITIZE_UNSIGNED_INT_OVERFLOW
static int horAcc16(TIFF *tif, uint8_t *cp0, tmsize_t cc)
{
    tmsize_t stride = PredictorState(tif)->stride;
    uint16_t *wp = (uint16_t *)cp0;
    tmsize_t wc = cc / 2;

    if ((cc % (2 * stride)) != 0)
    {
        TIFFErrorExtR(tif, "horAcc16", "%s", "cc%(2*stride))!=0");
        return 0;
    }

    if (wc > stride)
    {
        wc -= stride;
        do
        {
            REPEAT4(stride, wp[stride] = (uint16_t)(((unsigned int)wp[stride] +
                                                     (unsigned int)wp[0]) &
                                                    0xffff);
                    wp++)
            wc -= stride;
        } while (wc > 0);
    }
    return 1;
}

static int swabHorAcc32(TIFF *tif, uint8_t *cp0, tmsize_t cc)
{
    uint32_t *wp = (uint32_t *)cp0;
    tmsize_t wc = cc / 4;

    TIFFSwabArrayOfLong(wp, wc);
    return horAcc32(tif, cp0, cc);
}

TIFF_NOSANITIZE_UNSIGNED_INT_OVERFLOW
static int horAcc32(TIFF *tif, uint8_t *cp0, tmsize_t cc)
{
    tmsize_t stride = PredictorState(tif)->stride;
    uint32_t *wp = (uint32_t *)cp0;
    tmsize_t wc = cc / 4;

    if ((cc % (4 * stride)) != 0)
    {
        TIFFErrorExtR(tif, "horAcc32", "%s", "cc%(4*stride))!=0");
        return 0;
    }

    if (wc > stride)
    {
        wc -= stride;
        do
        {
            REPEAT4(stride, wp[stride] += wp[0]; wp++)
            wc -= stride;
        } while (wc > 0);
    }
    return 1;
}

static int swabHorAcc64(TIFF *tif, uint8_t *cp0, tmsize_t cc)
{
    uint64_t *wp = (uint64_t *)cp0;
    tmsize_t wc = cc / 8;

    TIFFSwabArrayOfLong8(wp, wc);
    return horAcc64(tif, cp0, cc);
}

TIFF_NOSANITIZE_UNSIGNED_INT_OVERFLOW
static int horAcc64(TIFF *tif, uint8_t *cp0, tmsize_t cc)
{
    tmsize_t stride = PredictorState(tif)->stride;
    uint64_t *wp = (uint64_t *)cp0;
    tmsize_t wc = cc / 8;

    if ((cc % (8 * stride)) != 0)
    {
        TIFFErrorExtR(tif, "horAcc64", "%s", "cc%(8*stride))!=0");
        return 0;
    }

    if (wc > stride)
    {
        wc -= stride;
        do
        {
            REPEAT4(stride, wp[stride] += wp[0]; wp++)
            wc -= stride;
        } while (wc > 0);
    }
    return 1;
}

/*
 * Floating point predictor accumulation routine.
 */
static int fpAcc(TIFF *tif, uint8_t *cp0, tmsize_t cc)
{
    tmsize_t stride = PredictorState(tif)->stride;
    uint32_t bps = tif->tif_dir.td_bitspersample / 8;
    tmsize_t wc = cc / bps;
    tmsize_t count = cc;
    uint8_t *cp = cp0;
    uint8_t *tmp;

    if (cc % (bps * stride) != 0)
    {
        TIFFErrorExtR(tif, "fpAcc", "%s", "cc%(bps*stride))!=0");
        return 0;
    }

    tmp = (uint8_t *)_TIFFmallocExt(tif, cc);
    if (!tmp)
        return 0;

    if (stride == 1)
    {
        /* Optimization of general case */
#define OP                                                                     \
    do                                                                         \
    {                                                                          \
        cp[1] = (uint8_t)((cp[1] + cp[0]) & 0xff);                             \
        ++cp;                                                                  \
    } while (0)
        for (; count > 8; count -= 8)
        {
            OP;
            OP;
            OP;
            OP;
            OP;
            OP;
            OP;
            OP;
        }
        for (; count > 1; count -= 1)
        {
            OP;
        }
#undef OP
    }
    else
    {
        while (count > stride)
        {
            REPEAT4(stride, cp[stride] = (uint8_t)((cp[stride] + cp[0]) & 0xff);
                    cp++)
            count -= stride;
        }
    }

    _TIFFmemcpy(tmp, cp0, cc);
    cp = (uint8_t *)cp0;
    count = 0;

#if defined(__x86_64__) || defined(_M_X64)
    if (bps == 4)
    {
        /* Optimization of general case */
        for (; count + 15 < wc; count += 16)
        {
            /* Interlace 4*16 byte values */

            __m128i xmm0 =
                _mm_loadu_si128((const __m128i *)(tmp + count + 3 * wc));
            __m128i xmm1 =
                _mm_loadu_si128((const __m128i *)(tmp + count + 2 * wc));
            __m128i xmm2 =
                _mm_loadu_si128((const __m128i *)(tmp + count + 1 * wc));
            __m128i xmm3 =
                _mm_loadu_si128((const __m128i *)(tmp + count + 0 * wc));
            /* (xmm0_0, xmm1_0, xmm0_1, xmm1_1, xmm0_2, xmm1_2, ...) */
            __m128i tmp0 = _mm_unpacklo_epi8(xmm0, xmm1);
            /* (xmm0_8, xmm1_8, xmm0_9, xmm1_9, xmm0_10, xmm1_10, ...) */
            __m128i tmp1 = _mm_unpackhi_epi8(xmm0, xmm1);
            /* (xmm2_0, xmm3_0, xmm2_1, xmm3_1, xmm2_2, xmm3_2, ...) */
            __m128i tmp2 = _mm_unpacklo_epi8(xmm2, xmm3);
            /* (xmm2_8, xmm3_8, xmm2_9, xmm3_9, xmm2_10, xmm3_10, ...) */
            __m128i tmp3 = _mm_unpackhi_epi8(xmm2, xmm3);
            /* (xmm0_0, xmm1_0, xmm2_0, xmm3_0, xmm0_1, xmm1_1, xmm2_1, xmm3_1,
             * ...) */
            __m128i tmp2_0 = _mm_unpacklo_epi16(tmp0, tmp2);
            __m128i tmp2_1 = _mm_unpackhi_epi16(tmp0, tmp2);
            __m128i tmp2_2 = _mm_unpacklo_epi16(tmp1, tmp3);
            __m128i tmp2_3 = _mm_unpackhi_epi16(tmp1, tmp3);
            _mm_storeu_si128((__m128i *)(cp + 4 * count + 0 * 16), tmp2_0);
            _mm_storeu_si128((__m128i *)(cp + 4 * count + 1 * 16), tmp2_1);
            _mm_storeu_si128((__m128i *)(cp + 4 * count + 2 * 16), tmp2_2);
            _mm_storeu_si128((__m128i *)(cp + 4 * count + 3 * 16), tmp2_3);
        }
    }
#endif

    for (; count < wc; count++)
    {
        uint32_t byte;
        for (byte = 0; byte < bps; byte++)
        {
#if WORDS_BIGENDIAN
            cp[bps * count + byte] = tmp[byte * wc + count];
#else
            cp[bps * count + byte] = tmp[(bps - byte - 1) * wc + count];
#endif
        }
    }
    _TIFFfreeExt(tif, tmp);
    return 1;
}

/*
 * Decode a scanline and apply the predictor routine.
 */
static int PredictorDecodeRow(TIFF *tif, uint8_t *op0, tmsize_t occ0,
                              uint16_t s)
{
    TIFFPredictorState *sp = PredictorState(tif);

    assert(sp != NULL);
    assert(sp->decoderow != NULL);
    assert(sp->decodepfunc != NULL);

    if ((*sp->decoderow)(tif, op0, occ0, s))
    {
        return (*sp->decodepfunc)(tif, op0, occ0);
    }
    else
        return 0;
}

/*
 * Decode a tile/strip and apply the predictor routine.
 * Note that horizontal differencing must be done on a
 * row-by-row basis.  The width of a "row" has already
 * been calculated at pre-decode time according to the
 * strip/tile dimensions.
 */
static int PredictorDecodeTile(TIFF *tif, uint8_t *op0, tmsize_t occ0,
                               uint16_t s)
{
    TIFFPredictorState *sp = PredictorState(tif);

    assert(sp != NULL);
    assert(sp->decodetile != NULL);

    if ((*sp->decodetile)(tif, op0, occ0, s))
    {
        tmsize_t rowsize = sp->rowsize;
        assert(rowsize > 0);
        if ((occ0 % rowsize) != 0)
        {
            TIFFErrorExtR(tif, "PredictorDecodeTile", "%s",
                          "occ0%rowsize != 0");
            return 0;
        }
        assert(sp->decodepfunc != NULL);
        while (occ0 > 0)
        {
            if (!(*sp->decodepfunc)(tif, op0, rowsize))
                return 0;
            occ0 -= rowsize;
            op0 += rowsize;
        }
        return 1;
    }
    else
        return 0;
}

TIFF_NOSANITIZE_UNSIGNED_INT_OVERFLOW
static int horDiff8(TIFF *tif, uint8_t *cp0, tmsize_t cc)
{
    TIFFPredictorState *sp = PredictorState(tif);
    tmsize_t stride = sp->stride;
    unsigned char *cp = (unsigned char *)cp0;

    if ((cc % stride) != 0)
    {
        TIFFErrorExtR(tif, "horDiff8", "%s", "(cc%stride)!=0");
        return 0;
    }

    if (cc > stride)
    {
        cc -= stride;
        /*
         * Pipeline the most common cases.
         */
        if (stride == 3)
        {
            unsigned int r1, g1, b1;
            unsigned int r2 = cp[0];
            unsigned int g2 = cp[1];
            unsigned int b2 = cp[2];
            do
            {
                r1 = cp[3];
                cp[3] = (unsigned char)((r1 - r2) & 0xff);
                r2 = r1;
                g1 = cp[4];
                cp[4] = (unsigned char)((g1 - g2) & 0xff);
                g2 = g1;
                b1 = cp[5];
                cp[5] = (unsigned char)((b1 - b2) & 0xff);
                b2 = b1;
                cp += 3;
            } while ((cc -= 3) > 0);
        }
        else if (stride == 4)
        {
            unsigned int r1, g1, b1, a1;
            unsigned int r2 = cp[0];
            unsigned int g2 = cp[1];
            unsigned int b2 = cp[2];
            unsigned int a2 = cp[3];
            do
            {
                r1 = cp[4];
                cp[4] = (unsigned char)((r1 - r2) & 0xff);
                r2 = r1;
                g1 = cp[5];
                cp[5] = (unsigned char)((g1 - g2) & 0xff);
                g2 = g1;
                b1 = cp[6];
                cp[6] = (unsigned char)((b1 - b2) & 0xff);
                b2 = b1;
                a1 = cp[7];
                cp[7] = (unsigned char)((a1 - a2) & 0xff);
                a2 = a1;
                cp += 4;
            } while ((cc -= 4) > 0);
        }
        else
        {
            cp += cc - 1;
            do
            {
                REPEAT4(stride,
                        cp[stride] =
                            (unsigned char)((cp[stride] - cp[0]) & 0xff);
                        cp--)
            } while ((cc -= stride) > 0);
        }
    }
    return 1;
}

TIFF_NOSANITIZE_UNSIGNED_INT_OVERFLOW
static int horDiff16(TIFF *tif, uint8_t *cp0, tmsize_t cc)
{
    TIFFPredictorState *sp = PredictorState(tif);
    tmsize_t stride = sp->stride;
    uint16_t *wp = (uint16_t *)cp0;
    tmsize_t wc = cc / 2;

    if ((cc % (2 * stride)) != 0)
    {
        TIFFErrorExtR(tif, "horDiff8", "%s", "(cc%(2*stride))!=0");
        return 0;
    }

    if (wc > stride)
    {
        wc -= stride;
        wp += wc - 1;
        do
        {
            REPEAT4(stride, wp[stride] = (uint16_t)(((unsigned int)wp[stride] -
                                                     (unsigned int)wp[0]) &
                                                    0xffff);
                    wp--)
            wc -= stride;
        } while (wc > 0);
    }
    return 1;
}

static int swabHorDiff16(TIFF *tif, uint8_t *cp0, tmsize_t cc)
{
    uint16_t *wp = (uint16_t *)cp0;
    tmsize_t wc = cc / 2;

    if (!horDiff16(tif, cp0, cc))
        return 0;

    TIFFSwabArrayOfShort(wp, wc);
    return 1;
}

TIFF_NOSANITIZE_UNSIGNED_INT_OVERFLOW
static int horDiff32(TIFF *tif, uint8_t *cp0, tmsize_t cc)
{
    TIFFPredictorState *sp = PredictorState(tif);
    tmsize_t stride = sp->stride;
    uint32_t *wp = (uint32_t *)cp0;
    tmsize_t wc = cc / 4;

    if ((cc % (4 * stride)) != 0)
    {
        TIFFErrorExtR(tif, "horDiff32", "%s", "(cc%(4*stride))!=0");
        return 0;
    }

    if (wc > stride)
    {
        wc -= stride;
        wp += wc - 1;
        do
        {
            REPEAT4(stride, wp[stride] -= wp[0]; wp--)
            wc -= stride;
        } while (wc > 0);
    }
    return 1;
}

static int swabHorDiff32(TIFF *tif, uint8_t *cp0, tmsize_t cc)
{
    uint32_t *wp = (uint32_t *)cp0;
    tmsize_t wc = cc / 4;

    if (!horDiff32(tif, cp0, cc))
        return 0;

    TIFFSwabArrayOfLong(wp, wc);
    return 1;
}

TIFF_NOSANITIZE_UNSIGNED_INT_OVERFLOW
static int horDiff64(TIFF *tif, uint8_t *cp0, tmsize_t cc)
{
    TIFFPredictorState *sp = PredictorState(tif);
    tmsize_t stride = sp->stride;
    uint64_t *wp = (uint64_t *)cp0;
    tmsize_t wc = cc / 8;

    if ((cc % (8 * stride)) != 0)
    {
        TIFFErrorExtR(tif, "horDiff64", "%s", "(cc%(8*stride))!=0");
        return 0;
    }

    if (wc > stride)
    {
        wc -= stride;
        wp += wc - 1;
        do
        {
            REPEAT4(stride, wp[stride] -= wp[0]; wp--)
            wc -= stride;
        } while (wc > 0);
    }
    return 1;
}

static int swabHorDiff64(TIFF *tif, uint8_t *cp0, tmsize_t cc)
{
    uint64_t *wp = (uint64_t *)cp0;
    tmsize_t wc = cc / 8;

    if (!horDiff64(tif, cp0, cc))
        return 0;

    TIFFSwabArrayOfLong8(wp, wc);
    return 1;
}

/*
 * Floating point predictor differencing routine.
 */
TIFF_NOSANITIZE_UNSIGNED_INT_OVERFLOW
static int fpDiff(TIFF *tif, uint8_t *cp0, tmsize_t cc)
{
    tmsize_t stride = PredictorState(tif)->stride;
    uint32_t bps = tif->tif_dir.td_bitspersample / 8;
    tmsize_t wc = cc / bps;
    tmsize_t count;
    uint8_t *cp = (uint8_t *)cp0;
    uint8_t *tmp;

    if ((cc % (bps * stride)) != 0)
    {
        TIFFErrorExtR(tif, "fpDiff", "%s", "(cc%(bps*stride))!=0");
        return 0;
    }

    tmp = (uint8_t *)_TIFFmallocExt(tif, cc);
    if (!tmp)
        return 0;

    _TIFFmemcpy(tmp, cp0, cc);
    for (count = 0; count < wc; count++)
    {
        uint32_t byte;
        for (byte = 0; byte < bps; byte++)
        {
#if WORDS_BIGENDIAN
            cp[byte * wc + count] = tmp[bps * count + byte];
#else
            cp[(bps - byte - 1) * wc + count] = tmp[bps * count + byte];
#endif
        }
    }
    _TIFFfreeExt(tif, tmp);

    cp = (uint8_t *)cp0;
    cp += cc - stride - 1;
    for (count = cc; count > stride; count -= stride)
        REPEAT4(stride,
                cp[stride] = (unsigned char)((cp[stride] - cp[0]) & 0xff);
                cp--)
    return 1;
}

static int PredictorEncodeRow(TIFF *tif, uint8_t *bp, tmsize_t cc, uint16_t s)
{
    static const char module[] = "PredictorEncodeRow";
    TIFFPredictorState *sp = PredictorState(tif);
    uint8_t *working_copy;
    int result_code;

    assert(sp != NULL);
    assert(sp->encodepfunc != NULL);
    assert(sp->encoderow != NULL);

    /*
     * Do predictor manipulation in a working buffer to avoid altering
     * the callers buffer, like for PredictorEncodeTile().
     * https://gitlab.com/libtiff/libtiff/-/issues/5
     */
    working_copy = (uint8_t *)_TIFFmallocExt(tif, cc);
    if (working_copy == NULL)
    {
        TIFFErrorExtR(tif, module,
                      "Out of memory allocating %" PRId64 " byte temp buffer.",
                      (int64_t)cc);
        return 0;
    }
    memcpy(working_copy, bp, cc);

    if (!(*sp->encodepfunc)(tif, working_copy, cc))
    {
        _TIFFfreeExt(tif, working_copy);
        return 0;
    }
    result_code = (*sp->encoderow)(tif, working_copy, cc, s);
    _TIFFfreeExt(tif, working_copy);
    return result_code;
}

static int PredictorEncodeTile(TIFF *tif, uint8_t *bp0, tmsize_t cc0,
                               uint16_t s)
{
    static const char module[] = "PredictorEncodeTile";
    TIFFPredictorState *sp = PredictorState(tif);
    uint8_t *working_copy;
    tmsize_t cc = cc0, rowsize;
    unsigned char *bp;
    int result_code;

    assert(sp != NULL);
    assert(sp->encodepfunc != NULL);
    assert(sp->encodetile != NULL);

    /*
     * Do predictor manipulation in a working buffer to avoid altering
     * the callers buffer. http://trac.osgeo.org/gdal/ticket/1965
     */
    working_copy = (uint8_t *)_TIFFmallocExt(tif, cc0);
    if (working_copy == NULL)
    {
        TIFFErrorExtR(tif, module,
                      "Out of memory allocating %" PRId64 " byte temp buffer.",
                      (int64_t)cc0);
        return 0;
    }
    memcpy(working_copy, bp0, cc0);
    bp = working_copy;

    rowsize = sp->rowsize;
    assert(rowsize > 0);
    if ((cc0 % rowsize) != 0)
    {
        TIFFErrorExtR(tif, "PredictorEncodeTile", "%s", "(cc0%rowsize)!=0");
        _TIFFfreeExt(tif, working_copy);
        return 0;
    }
    while (cc > 0)
    {
        (*sp->encodepfunc)(tif, bp, rowsize);
        cc -= rowsize;
        bp += rowsize;
    }
    result_code = (*sp->encodetile)(tif, working_copy, cc0, s);

    _TIFFfreeExt(tif, working_copy);

    return result_code;
}

#define FIELD_PREDICTOR (FIELD_CODEC + 0) /* XXX */

static const TIFFField predictFields[] = {
    {TIFFTAG_PREDICTOR, 1, 1, TIFF_SHORT, 0, TIFF_SETGET_UINT16,
     FIELD_PREDICTOR, FALSE, FALSE, "Predictor", NULL},
};

static int PredictorVSetField(TIFF *tif, uint32_t tag, va_list ap)
{
    TIFFPredictorState *sp = PredictorState(tif);

    assert(sp != NULL);
    assert(sp->vsetparent != NULL);

    switch (tag)
    {
        case TIFFTAG_PREDICTOR:
            sp->predictor = (uint16_t)va_arg(ap, uint16_vap);
            TIFFSetFieldBit(tif, FIELD_PREDICTOR);
            break;
        default:
            return (*sp->vsetparent)(tif, tag, ap);
    }
    tif->tif_flags |= TIFF_DIRTYDIRECT;
    return 1;
}

static int PredictorVGetField(TIFF *tif, uint32_t tag, va_list ap)
{
    TIFFPredictorState *sp = PredictorState(tif);

    assert(sp != NULL);
    assert(sp->vgetparent != NULL);

    switch (tag)
    {
        case TIFFTAG_PREDICTOR:
            *va_arg(ap, uint16_t *) = (uint16_t)sp->predictor;
            break;
        default:
            return (*sp->vgetparent)(tif, tag, ap);
    }
    return 1;
}

static void PredictorPrintDir(TIFF *tif, FILE *fd, long flags)
{
    TIFFPredictorState *sp = PredictorState(tif);

    (void)flags;
    if (TIFFFieldSet(tif, FIELD_PREDICTOR))
    {
        fprintf(fd, "  Predictor: ");
        switch (sp->predictor)
        {
            case 1:
                fprintf(fd, "none ");
                break;
            case 2:
                fprintf(fd, "horizontal differencing ");
                break;
            case 3:
                fprintf(fd, "floating point predictor ");
                break;
        }
        fprintf(fd, "%d (0x%x)\n", sp->predictor, sp->predictor);
    }
    if (sp->printdir)
        (*sp->printdir)(tif, fd, flags);
}

int TIFFPredictorInit(TIFF *tif)
{
    TIFFPredictorState *sp = PredictorState(tif);

    assert(sp != 0);

    /*
     * Merge codec-specific tag information.
     */
    if (!_TIFFMergeFields(tif, predictFields, TIFFArrayCount(predictFields)))
    {
        TIFFErrorExtR(tif, "TIFFPredictorInit",
                      "Merging Predictor codec-specific tags failed");
        return 0;
    }

    /*
     * Override parent get/set field methods.
     */
    sp->vgetparent = tif->tif_tagmethods.vgetfield;
    tif->tif_tagmethods.vgetfield =
        PredictorVGetField; /* hook for predictor tag */
    sp->vsetparent = tif->tif_tagmethods.vsetfield;
    tif->tif_tagmethods.vsetfield =
        PredictorVSetField; /* hook for predictor tag */
    sp->printdir = tif->tif_tagmethods.printdir;
    tif->tif_tagmethods.printdir =
        PredictorPrintDir; /* hook for predictor tag */

    sp->setupdecode = tif->tif_setupdecode;
    tif->tif_setupdecode = PredictorSetupDecode;
    sp->setupencode = tif->tif_setupencode;
    tif->tif_setupencode = PredictorSetupEncode;

    sp->predictor = 1;      /* default value */
    sp->encodepfunc = NULL; /* no predictor routine */
    sp->decodepfunc = NULL; /* no predictor routine */
    return 1;
}

int TIFFPredictorCleanup(TIFF *tif)
{
    TIFFPredictorState *sp = PredictorState(tif);

    assert(sp != 0);

    tif->tif_tagmethods.vgetfield = sp->vgetparent;
    tif->tif_tagmethods.vsetfield = sp->vsetparent;
    tif->tif_tagmethods.printdir = sp->printdir;
    tif->tif_setupdecode = sp->setupdecode;
    tif->tif_setupencode = sp->setupencode;

    return 1;
}