| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543 |
- /*
- * MIPS DSPr2 optimizations for libjpeg-turbo
- *
- * Copyright (C) 2013-2014, MIPS Technologies, Inc., California.
- * All Rights Reserved.
- * Authors: Teodora Novkovic <teodora.novkovic@imgtec.com>
- * Darko Laus <darko.laus@imgtec.com>
- * Copyright (C) 2015, D. R. Commander. All Rights Reserved.
- *
- * This software is provided 'as-is', without any express or implied
- * warranty. In no event will the authors be held liable for any damages
- * arising from the use of this software.
- *
- * Permission is granted to anyone to use this software for any purpose,
- * including commercial applications, and to alter it and redistribute it
- * freely, subject to the following restrictions:
- *
- * 1. The origin of this software must not be misrepresented; you must not
- * claim that you wrote the original software. If you use this software
- * in a product, an acknowledgment in the product documentation would be
- * appreciated but is not required.
- * 2. Altered source versions must be plainly marked as such, and must not be
- * misrepresented as being the original software.
- * 3. This notice may not be removed or altered from any source distribution.
- */
- #include "jsimd_dspr2_asm.h"
- /*****************************************************************************/
- LEAF_DSPR2(jsimd_c_null_convert_dspr2)
- /*
- * a0 = cinfo->image_width
- * a1 = input_buf
- * a2 = output_buf
- * a3 = output_row
- * 16(sp) = num_rows
- * 20(sp) = cinfo->num_components
- *
- * Null conversion for compression
- */
- SAVE_REGS_ON_STACK 8, s0, s1
- lw t9, 24(sp) /* t9 = num_rows */
- lw s0, 28(sp) /* s0 = cinfo->num_components */
- andi t0, a0, 3 /* t0 = cinfo->image_width & 3 */
- beqz t0, 4f /* no residual */
- nop
- 0:
- addiu t9, t9, -1
- bltz t9, 7f
- li t1, 0
- 1:
- sll t3, t1, 2
- lwx t5, t3(a2) /* t5 = outptr = output_buf[ci] */
- lw t2, 0(a1) /* t2 = inptr = *input_buf */
- sll t4, a3, 2
- lwx t5, t4(t5) /* t5 = outptr = output_buf[ci][output_row] */
- addu t2, t2, t1
- addu s1, t5, a0
- addu t6, t5, t0
- 2:
- lbu t3, 0(t2)
- addiu t5, t5, 1
- sb t3, -1(t5)
- bne t6, t5, 2b
- addu t2, t2, s0
- 3:
- lbu t3, 0(t2)
- addu t4, t2, s0
- addu t7, t4, s0
- addu t8, t7, s0
- addu t2, t8, s0
- lbu t4, 0(t4)
- lbu t7, 0(t7)
- lbu t8, 0(t8)
- addiu t5, t5, 4
- sb t3, -4(t5)
- sb t4, -3(t5)
- sb t7, -2(t5)
- bne s1, t5, 3b
- sb t8, -1(t5)
- addiu t1, t1, 1
- bne t1, s0, 1b
- nop
- addiu a1, a1, 4
- bgez t9, 0b
- addiu a3, a3, 1
- b 7f
- nop
- 4:
- addiu t9, t9, -1
- bltz t9, 7f
- li t1, 0
- 5:
- sll t3, t1, 2
- lwx t5, t3(a2) /* t5 = outptr = output_buf[ci] */
- lw t2, 0(a1) /* t2 = inptr = *input_buf */
- sll t4, a3, 2
- lwx t5, t4(t5) /* t5 = outptr = output_buf[ci][output_row] */
- addu t2, t2, t1
- addu s1, t5, a0
- addu t6, t5, t0
- 6:
- lbu t3, 0(t2)
- addu t4, t2, s0
- addu t7, t4, s0
- addu t8, t7, s0
- addu t2, t8, s0
- lbu t4, 0(t4)
- lbu t7, 0(t7)
- lbu t8, 0(t8)
- addiu t5, t5, 4
- sb t3, -4(t5)
- sb t4, -3(t5)
- sb t7, -2(t5)
- bne s1, t5, 6b
- sb t8, -1(t5)
- addiu t1, t1, 1
- bne t1, s0, 5b
- nop
- addiu a1, a1, 4
- bgez t9, 4b
- addiu a3, a3, 1
- 7:
- RESTORE_REGS_FROM_STACK 8, s0, s1
- j ra
- nop
- END(jsimd_c_null_convert_dspr2)
- /*****************************************************************************/
- /*
- * jsimd_extrgb_ycc_convert_dspr2
- * jsimd_extbgr_ycc_convert_dspr2
- * jsimd_extrgbx_ycc_convert_dspr2
- * jsimd_extbgrx_ycc_convert_dspr2
- * jsimd_extxbgr_ycc_convert_dspr2
- * jsimd_extxrgb_ycc_convert_dspr2
- *
- * Colorspace conversion RGB -> YCbCr
- */
- .macro GENERATE_JSIMD_RGB_YCC_CONVERT_DSPR2 colorid, pixel_size, \
- r_offs, g_offs, b_offs
- .macro DO_RGB_TO_YCC r, g, b, inptr
- lbu \r, \r_offs(\inptr)
- lbu \g, \g_offs(\inptr)
- lbu \b, \b_offs(\inptr)
- addiu \inptr, \pixel_size
- .endm
- LEAF_DSPR2(jsimd_\colorid\()_ycc_convert_dspr2)
- /*
- * a0 = cinfo->image_width
- * a1 = input_buf
- * a2 = output_buf
- * a3 = output_row
- * 16(sp) = num_rows
- */
- SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
- lw t7, 48(sp) /* t7 = num_rows */
- li s0, 0x4c8b /* FIX(0.29900) */
- li s1, 0x9646 /* FIX(0.58700) */
- li s2, 0x1d2f /* FIX(0.11400) */
- li s3, 0xffffd4cd /* -FIX(0.16874) */
- li s4, 0xffffab33 /* -FIX(0.33126) */
- li s5, 0x8000 /* FIX(0.50000) */
- li s6, 0xffff94d1 /* -FIX(0.41869) */
- li s7, 0xffffeb2f /* -FIX(0.08131) */
- li t8, 0x807fff /* CBCR_OFFSET + ONE_HALF-1 */
- 0:
- addiu t7, -1 /* --num_rows */
- lw t6, 0(a1) /* t6 = input_buf[0] */
- lw t0, 0(a2)
- lw t1, 4(a2)
- lw t2, 8(a2)
- sll t3, a3, 2
- lwx t0, t3(t0) /* t0 = output_buf[0][output_row] */
- lwx t1, t3(t1) /* t1 = output_buf[1][output_row] */
- lwx t2, t3(t2) /* t2 = output_buf[2][output_row] */
- addu t9, t2, a0 /* t9 = end address */
- addiu a3, 1
- 1:
- DO_RGB_TO_YCC t3, t4, t5, t6
- mtlo s5, $ac0
- mtlo t8, $ac1
- mtlo t8, $ac2
- maddu $ac0, s2, t5
- maddu $ac1, s5, t5
- maddu $ac2, s5, t3
- maddu $ac0, s0, t3
- maddu $ac1, s3, t3
- maddu $ac2, s6, t4
- maddu $ac0, s1, t4
- maddu $ac1, s4, t4
- maddu $ac2, s7, t5
- extr.w t3, $ac0, 16
- extr.w t4, $ac1, 16
- extr.w t5, $ac2, 16
- sb t3, 0(t0)
- sb t4, 0(t1)
- sb t5, 0(t2)
- addiu t0, 1
- addiu t2, 1
- bne t2, t9, 1b
- addiu t1, 1
- bgtz t7, 0b
- addiu a1, 4
- RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
- j ra
- nop
- END(jsimd_\colorid\()_ycc_convert_dspr2)
- .purgem DO_RGB_TO_YCC
- .endm
- /*-------------------------------------id -- pix R G B */
- GENERATE_JSIMD_RGB_YCC_CONVERT_DSPR2 extrgb, 3, 0, 1, 2
- GENERATE_JSIMD_RGB_YCC_CONVERT_DSPR2 extbgr, 3, 2, 1, 0
- GENERATE_JSIMD_RGB_YCC_CONVERT_DSPR2 extrgbx, 4, 0, 1, 2
- GENERATE_JSIMD_RGB_YCC_CONVERT_DSPR2 extbgrx, 4, 2, 1, 0
- GENERATE_JSIMD_RGB_YCC_CONVERT_DSPR2 extxbgr, 4, 3, 2, 1
- GENERATE_JSIMD_RGB_YCC_CONVERT_DSPR2 extxrgb, 4, 1, 2, 3
- /*****************************************************************************/
- /*
- * jsimd_ycc_extrgb_convert_dspr2
- * jsimd_ycc_extbgr_convert_dspr2
- * jsimd_ycc_extrgbx_convert_dspr2
- * jsimd_ycc_extbgrx_convert_dspr2
- * jsimd_ycc_extxbgr_convert_dspr2
- * jsimd_ycc_extxrgb_convert_dspr2
- *
- * Colorspace conversion YCbCr -> RGB
- */
- .macro GENERATE_JSIMD_YCC_RGB_CONVERT_DSPR2 colorid, pixel_size, \
- r_offs, g_offs, b_offs, a_offs
- .macro STORE_YCC_TO_RGB scratch0 scratch1 scratch2 outptr
- sb \scratch0, \r_offs(\outptr)
- sb \scratch1, \g_offs(\outptr)
- sb \scratch2, \b_offs(\outptr)
- .if (\pixel_size == 4)
- li t0, 0xFF
- sb t0, \a_offs(\outptr)
- .endif
- addiu \outptr, \pixel_size
- .endm
- LEAF_DSPR2(jsimd_ycc_\colorid\()_convert_dspr2)
- /*
- * a0 = cinfo->image_width
- * a1 = input_buf
- * a2 = input_row
- * a3 = output_buf
- * 16(sp) = num_rows
- */
- SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
- lw s1, 48(sp)
- li t3, 0x8000
- li t4, 0x166e9 /* FIX(1.40200) */
- li t5, 0x1c5a2 /* FIX(1.77200) */
- li t6, 0xffff492e /* -FIX(0.71414) */
- li t7, 0xffffa7e6 /* -FIX(0.34414) */
- repl.ph t8, 128
- 0:
- lw s0, 0(a3)
- lw t0, 0(a1)
- lw t1, 4(a1)
- lw t2, 8(a1)
- sll s5, a2, 2
- addiu s1, -1
- lwx s2, s5(t0)
- lwx s3, s5(t1)
- lwx s4, s5(t2)
- addu t9, s2, a0
- addiu a2, 1
- 1:
- lbu s7, 0(s4) /* cr */
- lbu s6, 0(s3) /* cb */
- lbu s5, 0(s2) /* y */
- addiu s2, 1
- addiu s4, 1
- addiu s7, -128
- addiu s6, -128
- mul t2, t7, s6
- mul t0, t6, s7 /* Crgtab[cr] */
- sll s7, 15
- mulq_rs.w t1, t4, s7 /* Crrtab[cr] */
- sll s6, 15
- addu t2, t3 /* Cbgtab[cb] */
- addu t2, t0
- mulq_rs.w t0, t5, s6 /* Cbbtab[cb] */
- sra t2, 16
- addu t1, s5
- addu t2, s5 /* add y */
- ins t2, t1, 16, 16
- subu.ph t2, t2, t8
- addu t0, s5
- shll_s.ph t2, t2, 8
- subu t0, 128
- shra.ph t2, t2, 8
- shll_s.w t0, t0, 24
- addu.ph t2, t2, t8 /* clip & store */
- sra t0, t0, 24
- sra t1, t2, 16
- addiu t0, 128
- STORE_YCC_TO_RGB t1, t2, t0, s0
- bne s2, t9, 1b
- addiu s3, 1
- bgtz s1, 0b
- addiu a3, 4
- RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
- j ra
- nop
- END(jsimd_ycc_\colorid\()_convert_dspr2)
- .purgem STORE_YCC_TO_RGB
- .endm
- /*-------------------------------------id -- pix R G B A */
- GENERATE_JSIMD_YCC_RGB_CONVERT_DSPR2 extrgb, 3, 0, 1, 2, 3
- GENERATE_JSIMD_YCC_RGB_CONVERT_DSPR2 extbgr, 3, 2, 1, 0, 3
- GENERATE_JSIMD_YCC_RGB_CONVERT_DSPR2 extrgbx, 4, 0, 1, 2, 3
- GENERATE_JSIMD_YCC_RGB_CONVERT_DSPR2 extbgrx, 4, 2, 1, 0, 3
- GENERATE_JSIMD_YCC_RGB_CONVERT_DSPR2 extxbgr, 4, 3, 2, 1, 0
- GENERATE_JSIMD_YCC_RGB_CONVERT_DSPR2 extxrgb, 4, 1, 2, 3, 0
- /*****************************************************************************/
- /*
- * jsimd_extrgb_gray_convert_dspr2
- * jsimd_extbgr_gray_convert_dspr2
- * jsimd_extrgbx_gray_convert_dspr2
- * jsimd_extbgrx_gray_convert_dspr2
- * jsimd_extxbgr_gray_convert_dspr2
- * jsimd_extxrgb_gray_convert_dspr2
- *
- * Colorspace conversion RGB -> GRAY
- */
- .macro GENERATE_JSIMD_RGB_GRAY_CONVERT_DSPR2 colorid, pixel_size, \
- r_offs, g_offs, b_offs
- .macro DO_RGB_TO_GRAY r, g, b, inptr
- lbu \r, \r_offs(\inptr)
- lbu \g, \g_offs(\inptr)
- lbu \b, \b_offs(\inptr)
- addiu \inptr, \pixel_size
- .endm
- LEAF_DSPR2(jsimd_\colorid\()_gray_convert_dspr2)
- /*
- * a0 = cinfo->image_width
- * a1 = input_buf
- * a2 = output_buf
- * a3 = output_row
- * 16(sp) = num_rows
- */
- SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
- li s0, 0x4c8b /* s0 = FIX(0.29900) */
- li s1, 0x9646 /* s1 = FIX(0.58700) */
- li s2, 0x1d2f /* s2 = FIX(0.11400) */
- li s7, 0x8000 /* s7 = FIX(0.50000) */
- lw s6, 48(sp)
- andi t7, a0, 3
- 0:
- addiu s6, -1 /* s6 = num_rows */
- lw t0, 0(a1)
- lw t1, 0(a2)
- sll t3, a3, 2
- lwx t1, t3(t1)
- addiu a3, 1
- addu t9, t1, a0
- subu t8, t9, t7
- beq t1, t8, 2f
- nop
- 1:
- DO_RGB_TO_GRAY t3, t4, t5, t0
- DO_RGB_TO_GRAY s3, s4, s5, t0
- mtlo s7, $ac0
- maddu $ac0, s2, t5
- maddu $ac0, s1, t4
- maddu $ac0, s0, t3
- mtlo s7, $ac1
- maddu $ac1, s2, s5
- maddu $ac1, s1, s4
- maddu $ac1, s0, s3
- extr.w t6, $ac0, 16
- DO_RGB_TO_GRAY t3, t4, t5, t0
- DO_RGB_TO_GRAY s3, s4, s5, t0
- mtlo s7, $ac0
- maddu $ac0, s2, t5
- maddu $ac0, s1, t4
- extr.w t2, $ac1, 16
- maddu $ac0, s0, t3
- mtlo s7, $ac1
- maddu $ac1, s2, s5
- maddu $ac1, s1, s4
- maddu $ac1, s0, s3
- extr.w t5, $ac0, 16
- sb t6, 0(t1)
- sb t2, 1(t1)
- extr.w t3, $ac1, 16
- addiu t1, 4
- sb t5, -2(t1)
- sb t3, -1(t1)
- bne t1, t8, 1b
- nop
- 2:
- beqz t7, 4f
- nop
- 3:
- DO_RGB_TO_GRAY t3, t4, t5, t0
- mtlo s7, $ac0
- maddu $ac0, s2, t5
- maddu $ac0, s1, t4
- maddu $ac0, s0, t3
- extr.w t6, $ac0, 16
- sb t6, 0(t1)
- addiu t1, 1
- bne t1, t9, 3b
- nop
- 4:
- bgtz s6, 0b
- addiu a1, 4
- RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
- j ra
- nop
- END(jsimd_\colorid\()_gray_convert_dspr2)
- .purgem DO_RGB_TO_GRAY
- .endm
- /*-------------------------------------id -- pix R G B */
- GENERATE_JSIMD_RGB_GRAY_CONVERT_DSPR2 extrgb, 3, 0, 1, 2
- GENERATE_JSIMD_RGB_GRAY_CONVERT_DSPR2 extbgr, 3, 2, 1, 0
- GENERATE_JSIMD_RGB_GRAY_CONVERT_DSPR2 extrgbx, 4, 0, 1, 2
- GENERATE_JSIMD_RGB_GRAY_CONVERT_DSPR2 extbgrx, 4, 2, 1, 0
- GENERATE_JSIMD_RGB_GRAY_CONVERT_DSPR2 extxbgr, 4, 3, 2, 1
- GENERATE_JSIMD_RGB_GRAY_CONVERT_DSPR2 extxrgb, 4, 1, 2, 3
- /*****************************************************************************/
- /*
- * jsimd_h2v2_merged_upsample_dspr2
- * jsimd_h2v2_extrgb_merged_upsample_dspr2
- * jsimd_h2v2_extrgbx_merged_upsample_dspr2
- * jsimd_h2v2_extbgr_merged_upsample_dspr2
- * jsimd_h2v2_extbgrx_merged_upsample_dspr2
- * jsimd_h2v2_extxbgr_merged_upsample_dspr2
- * jsimd_h2v2_extxrgb_merged_upsample_dspr2
- *
- * Merged h2v2 upsample routines
- */
- .macro GENERATE_H2V2_MERGED_UPSAMPLE_DSPR2 colorid, pixel_size, \
- r1_offs, g1_offs, \
- b1_offs, a1_offs, \
- r2_offs, g2_offs, \
- b2_offs, a2_offs
- .macro STORE_H2V2_2_PIXELS scratch0 scratch1 scratch2 scratch3 scratch4 \
- scratch5 outptr
- sb \scratch0, \r1_offs(\outptr)
- sb \scratch1, \g1_offs(\outptr)
- sb \scratch2, \b1_offs(\outptr)
- sb \scratch3, \r2_offs(\outptr)
- sb \scratch4, \g2_offs(\outptr)
- sb \scratch5, \b2_offs(\outptr)
- .if (\pixel_size == 8)
- li \scratch0, 0xFF
- sb \scratch0, \a1_offs(\outptr)
- sb \scratch0, \a2_offs(\outptr)
- .endif
- addiu \outptr, \pixel_size
- .endm
- .macro STORE_H2V2_1_PIXEL scratch0 scratch1 scratch2 outptr
- sb \scratch0, \r1_offs(\outptr)
- sb \scratch1, \g1_offs(\outptr)
- sb \scratch2, \b1_offs(\outptr)
- .if (\pixel_size == 8)
- li t0, 0xFF
- sb t0, \a1_offs(\outptr)
- .endif
- .endm
- LEAF_DSPR2(jsimd_h2v2_\colorid\()_merged_upsample_dspr2)
- /*
- * a0 = cinfo->output_width
- * a1 = input_buf
- * a2 = in_row_group_ctr
- * a3 = output_buf
- * 16(sp) = cinfo->sample_range_limit
- */
- SAVE_REGS_ON_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, ra
- lw t9, 56(sp) /* cinfo->sample_range_limit */
- lw v0, 0(a1)
- lw v1, 4(a1)
- lw t0, 8(a1)
- sll t1, a2, 3
- addiu t2, t1, 4
- sll t3, a2, 2
- lw t4, 0(a3) /* t4 = output_buf[0] */
- lwx t1, t1(v0) /* t1 = input_buf[0][in_row_group_ctr*2] */
- lwx t2, t2(v0) /* t2 = input_buf[0][in_row_group_ctr*2 + 1] */
- lwx t5, t3(v1) /* t5 = input_buf[1][in_row_group_ctr] */
- lwx t6, t3(t0) /* t6 = input_buf[2][in_row_group_ctr] */
- lw t7, 4(a3) /* t7 = output_buf[1] */
- li s1, 0xe6ea
- addiu t8, s1, 0x7fff /* t8 = 0x166e9 [FIX(1.40200)] */
- addiu s0, t8, 0x5eb9 /* s0 = 0x1c5a2 [FIX(1.77200)] */
- addiu s1, zero, 0xa7e6 /* s4 = 0xffffa7e6 [-FIX(0.34414)] */
- xori s2, s1, 0xeec8 /* s3 = 0xffff492e [-FIX(0.71414)] */
- srl t3, a0, 1
- blez t3, 2f
- addu t0, t5, t3 /* t0 = end address */
- 1:
- lbu t3, 0(t5)
- lbu s3, 0(t6)
- addiu t5, t5, 1
- addiu t3, t3, -128 /* (cb - 128) */
- addiu s3, s3, -128 /* (cr - 128) */
- mult $ac1, s1, t3
- madd $ac1, s2, s3
- sll s3, s3, 15
- sll t3, t3, 15
- mulq_rs.w s4, t8, s3 /* s4 = (C1 * cr + ONE_HALF)>> SCALEBITS */
- extr_r.w s5, $ac1, 16
- mulq_rs.w s6, s0, t3 /* s6 = (C2 * cb + ONE_HALF)>> SCALEBITS */
- lbu v0, 0(t1)
- addiu t6, t6, 1
- addiu t1, t1, 2
- addu t3, v0, s4 /* y+cred */
- addu s3, v0, s5 /* y+cgreen */
- addu v1, v0, s6 /* y+cblue */
- addu t3, t9, t3 /* y+cred */
- addu s3, t9, s3 /* y+cgreen */
- addu v1, t9, v1 /* y+cblue */
- lbu AT, 0(t3)
- lbu s7, 0(s3)
- lbu ra, 0(v1)
- lbu v0, -1(t1)
- addu t3, v0, s4 /* y+cred */
- addu s3, v0, s5 /* y+cgreen */
- addu v1, v0, s6 /* y+cblue */
- addu t3, t9, t3 /* y+cred */
- addu s3, t9, s3 /* y+cgreen */
- addu v1, t9, v1 /* y+cblue */
- lbu t3, 0(t3)
- lbu s3, 0(s3)
- lbu v1, 0(v1)
- lbu v0, 0(t2)
- STORE_H2V2_2_PIXELS AT, s7, ra, t3, s3, v1, t4
- addu t3, v0, s4 /* y+cred */
- addu s3, v0, s5 /* y+cgreen */
- addu v1, v0, s6 /* y+cblue */
- addu t3, t9, t3 /* y+cred */
- addu s3, t9, s3 /* y+cgreen */
- addu v1, t9, v1 /* y+cblue */
- lbu AT, 0(t3)
- lbu s7, 0(s3)
- lbu ra, 0(v1)
- lbu v0, 1(t2)
- addiu t2, t2, 2
- addu t3, v0, s4 /* y+cred */
- addu s3, v0, s5 /* y+cgreen */
- addu v1, v0, s6 /* y+cblue */
- addu t3, t9, t3 /* y+cred */
- addu s3, t9, s3 /* y+cgreen */
- addu v1, t9, v1 /* y+cblue */
- lbu t3, 0(t3)
- lbu s3, 0(s3)
- lbu v1, 0(v1)
- STORE_H2V2_2_PIXELS AT, s7, ra, t3, s3, v1, t7
- bne t0, t5, 1b
- nop
- 2:
- andi t0, a0, 1
- beqz t0, 4f
- lbu t3, 0(t5)
- lbu s3, 0(t6)
- addiu t3, t3, -128 /* (cb - 128) */
- addiu s3, s3, -128 /* (cr - 128) */
- mult $ac1, s1, t3
- madd $ac1, s2, s3
- sll s3, s3, 15
- sll t3, t3, 15
- lbu v0, 0(t1)
- extr_r.w s5, $ac1, 16
- mulq_rs.w s4, t8, s3 /* s4 = (C1 * cr + ONE_HALF)>> SCALEBITS */
- mulq_rs.w s6, s0, t3 /* s6 = (C2 * cb + ONE_HALF)>> SCALEBITS */
- addu t3, v0, s4 /* y+cred */
- addu s3, v0, s5 /* y+cgreen */
- addu v1, v0, s6 /* y+cblue */
- addu t3, t9, t3 /* y+cred */
- addu s3, t9, s3 /* y+cgreen */
- addu v1, t9, v1 /* y+cblue */
- lbu t3, 0(t3)
- lbu s3, 0(s3)
- lbu v1, 0(v1)
- lbu v0, 0(t2)
- STORE_H2V2_1_PIXEL t3, s3, v1, t4
- addu t3, v0, s4 /* y+cred */
- addu s3, v0, s5 /* y+cgreen */
- addu v1, v0, s6 /* y+cblue */
- addu t3, t9, t3 /* y+cred */
- addu s3, t9, s3 /* y+cgreen */
- addu v1, t9, v1 /* y+cblue */
- lbu t3, 0(t3)
- lbu s3, 0(s3)
- lbu v1, 0(v1)
- STORE_H2V2_1_PIXEL t3, s3, v1, t7
- 4:
- RESTORE_REGS_FROM_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, ra
- j ra
- nop
- END(jsimd_h2v2_\colorid\()_merged_upsample_dspr2)
- .purgem STORE_H2V2_1_PIXEL
- .purgem STORE_H2V2_2_PIXELS
- .endm
- /*------------------------------------id -- pix R1 G1 B1 A1 R2 G2 B2 A2 */
- GENERATE_H2V2_MERGED_UPSAMPLE_DSPR2 extrgb, 6, 0, 1, 2, 6, 3, 4, 5, 6
- GENERATE_H2V2_MERGED_UPSAMPLE_DSPR2 extbgr, 6, 2, 1, 0, 3, 5, 4, 3, 6
- GENERATE_H2V2_MERGED_UPSAMPLE_DSPR2 extrgbx, 8, 0, 1, 2, 3, 4, 5, 6, 7
- GENERATE_H2V2_MERGED_UPSAMPLE_DSPR2 extbgrx, 8, 2, 1, 0, 3, 6, 5, 4, 7
- GENERATE_H2V2_MERGED_UPSAMPLE_DSPR2 extxbgr, 8, 3, 2, 1, 0, 7, 6, 5, 4
- GENERATE_H2V2_MERGED_UPSAMPLE_DSPR2 extxrgb, 8, 1, 2, 3, 0, 5, 6, 7, 4
- /*****************************************************************************/
- /*
- * jsimd_h2v1_merged_upsample_dspr2
- * jsimd_h2v1_extrgb_merged_upsample_dspr2
- * jsimd_h2v1_extrgbx_merged_upsample_dspr2
- * jsimd_h2v1_extbgr_merged_upsample_dspr2
- * jsimd_h2v1_extbgrx_merged_upsample_dspr2
- * jsimd_h2v1_extxbgr_merged_upsample_dspr2
- * jsimd_h2v1_extxrgb_merged_upsample_dspr2
- *
- * Merged h2v1 upsample routines
- */
- .macro GENERATE_H2V1_MERGED_UPSAMPLE_DSPR2 colorid, pixel_size, \
- r1_offs, g1_offs, \
- b1_offs, a1_offs, \
- r2_offs, g2_offs, \
- b2_offs, a2_offs
- .macro STORE_H2V1_2_PIXELS scratch0 scratch1 scratch2 scratch3 scratch4 \
- scratch5 outptr
- sb \scratch0, \r1_offs(\outptr)
- sb \scratch1, \g1_offs(\outptr)
- sb \scratch2, \b1_offs(\outptr)
- sb \scratch3, \r2_offs(\outptr)
- sb \scratch4, \g2_offs(\outptr)
- sb \scratch5, \b2_offs(\outptr)
- .if (\pixel_size == 8)
- li t0, 0xFF
- sb t0, \a1_offs(\outptr)
- sb t0, \a2_offs(\outptr)
- .endif
- addiu \outptr, \pixel_size
- .endm
- .macro STORE_H2V1_1_PIXEL scratch0 scratch1 scratch2 outptr
- sb \scratch0, \r1_offs(\outptr)
- sb \scratch1, \g1_offs(\outptr)
- sb \scratch2, \b1_offs(\outptr)
- .if (\pixel_size == 8)
- li t0, 0xFF
- sb t0, \a1_offs(\outptr)
- .endif
- .endm
- LEAF_DSPR2(jsimd_h2v1_\colorid\()_merged_upsample_dspr2)
- /*
- * a0 = cinfo->output_width
- * a1 = input_buf
- * a2 = in_row_group_ctr
- * a3 = output_buf
- * 16(sp) = range_limit
- */
- SAVE_REGS_ON_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, ra
- li t0, 0xe6ea
- lw t1, 0(a1) /* t1 = input_buf[0] */
- lw t2, 4(a1) /* t2 = input_buf[1] */
- lw t3, 8(a1) /* t3 = input_buf[2] */
- lw t8, 56(sp) /* t8 = range_limit */
- addiu s1, t0, 0x7fff /* s1 = 0x166e9 [FIX(1.40200)] */
- addiu s2, s1, 0x5eb9 /* s2 = 0x1c5a2 [FIX(1.77200)] */
- addiu s0, t0, 0x9916 /* s0 = 0x8000 */
- addiu s4, zero, 0xa7e6 /* s4 = 0xffffa7e6 [-FIX(0.34414)] */
- xori s3, s4, 0xeec8 /* s3 = 0xffff492e [-FIX(0.71414)] */
- srl t0, a0, 1
- sll t4, a2, 2
- lwx s5, t4(t1) /* s5 = inptr0 */
- lwx s6, t4(t2) /* s6 = inptr1 */
- lwx s7, t4(t3) /* s7 = inptr2 */
- lw t7, 0(a3) /* t7 = outptr */
- blez t0, 2f
- addu t9, s6, t0 /* t9 = end address */
- 1:
- lbu t2, 0(s6) /* t2 = cb */
- lbu t0, 0(s7) /* t0 = cr */
- lbu t1, 0(s5) /* t1 = y */
- addiu t2, t2, -128 /* t2 = cb - 128 */
- addiu t0, t0, -128 /* t0 = cr - 128 */
- mult $ac1, s4, t2
- madd $ac1, s3, t0
- sll t0, t0, 15
- sll t2, t2, 15
- mulq_rs.w t0, s1, t0 /* t0 = (C1*cr + ONE_HALF)>> SCALEBITS */
- extr_r.w t5, $ac1, 16
- mulq_rs.w t6, s2, t2 /* t6 = (C2*cb + ONE_HALF)>> SCALEBITS */
- addiu s7, s7, 1
- addiu s6, s6, 1
- addu t2, t1, t0 /* t2 = y + cred */
- addu t3, t1, t5 /* t3 = y + cgreen */
- addu t4, t1, t6 /* t4 = y + cblue */
- addu t2, t8, t2
- addu t3, t8, t3
- addu t4, t8, t4
- lbu t1, 1(s5)
- lbu v0, 0(t2)
- lbu v1, 0(t3)
- lbu ra, 0(t4)
- addu t2, t1, t0
- addu t3, t1, t5
- addu t4, t1, t6
- addu t2, t8, t2
- addu t3, t8, t3
- addu t4, t8, t4
- lbu t2, 0(t2)
- lbu t3, 0(t3)
- lbu t4, 0(t4)
- STORE_H2V1_2_PIXELS v0, v1, ra, t2, t3, t4, t7
- bne t9, s6, 1b
- addiu s5, s5, 2
- 2:
- andi t0, a0, 1
- beqz t0, 4f
- nop
- 3:
- lbu t2, 0(s6)
- lbu t0, 0(s7)
- lbu t1, 0(s5)
- addiu t2, t2, -128 /* (cb - 128) */
- addiu t0, t0, -128 /* (cr - 128) */
- mul t3, s4, t2
- mul t4, s3, t0
- sll t0, t0, 15
- sll t2, t2, 15
- mulq_rs.w t0, s1, t0 /* (C1*cr + ONE_HALF)>> SCALEBITS */
- mulq_rs.w t6, s2, t2 /* (C2*cb + ONE_HALF)>> SCALEBITS */
- addu t3, t3, s0
- addu t3, t4, t3
- sra t5, t3, 16 /* (C4*cb + ONE_HALF + C3*cr)>> SCALEBITS */
- addu t2, t1, t0 /* y + cred */
- addu t3, t1, t5 /* y + cgreen */
- addu t4, t1, t6 /* y + cblue */
- addu t2, t8, t2
- addu t3, t8, t3
- addu t4, t8, t4
- lbu t2, 0(t2)
- lbu t3, 0(t3)
- lbu t4, 0(t4)
- STORE_H2V1_1_PIXEL t2, t3, t4, t7
- 4:
- RESTORE_REGS_FROM_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, ra
- j ra
- nop
- END(jsimd_h2v1_\colorid\()_merged_upsample_dspr2)
- .purgem STORE_H2V1_1_PIXEL
- .purgem STORE_H2V1_2_PIXELS
- .endm
- /*------------------------------------id -- pix R1 G1 B1 A1 R2 G2 B2 A2 */
- GENERATE_H2V1_MERGED_UPSAMPLE_DSPR2 extrgb, 6, 0, 1, 2, 6, 3, 4, 5, 6
- GENERATE_H2V1_MERGED_UPSAMPLE_DSPR2 extbgr, 6, 2, 1, 0, 3, 5, 4, 3, 6
- GENERATE_H2V1_MERGED_UPSAMPLE_DSPR2 extrgbx, 8, 0, 1, 2, 3, 4, 5, 6, 7
- GENERATE_H2V1_MERGED_UPSAMPLE_DSPR2 extbgrx, 8, 2, 1, 0, 3, 6, 5, 4, 7
- GENERATE_H2V1_MERGED_UPSAMPLE_DSPR2 extxbgr, 8, 3, 2, 1, 0, 7, 6, 5, 4
- GENERATE_H2V1_MERGED_UPSAMPLE_DSPR2 extxrgb, 8, 1, 2, 3, 0, 5, 6, 7, 4
- /*****************************************************************************/
- /*
- * jsimd_h2v2_fancy_upsample_dspr2
- *
- * Fancy processing for the common case of 2:1 horizontal and 2:1 vertical.
- */
- LEAF_DSPR2(jsimd_h2v2_fancy_upsample_dspr2)
- /*
- * a0 = cinfo->max_v_samp_factor
- * a1 = downsampled_width
- * a2 = input_data
- * a3 = output_data_ptr
- */
- SAVE_REGS_ON_STACK 24, s0, s1, s2, s3, s4, s5
- li s4, 0
- lw s2, 0(a3) /* s2 = *output_data_ptr */
- 0:
- li t9, 2
- lw s1, -4(a2) /* s1 = inptr1 */
- 1:
- lw s0, 0(a2) /* s0 = inptr0 */
- lwx s3, s4(s2)
- addiu s5, a1, -2 /* s5 = downsampled_width - 2 */
- srl t4, s5, 1
- sll t4, t4, 1
- lbu t0, 0(s0)
- lbu t1, 1(s0)
- lbu t2, 0(s1)
- lbu t3, 1(s1)
- addiu s0, 2
- addiu s1, 2
- addu t8, s0, t4 /* t8 = end address */
- andi s5, s5, 1 /* s5 = residual */
- sll t4, t0, 1
- sll t6, t1, 1
- addu t0, t0, t4 /* t0 = (*inptr0++) * 3 */
- addu t1, t1, t6 /* t1 = (*inptr0++) * 3 */
- addu t7, t0, t2 /* t7 = thiscolsum */
- addu t6, t1, t3 /* t5 = nextcolsum */
- sll t0, t7, 2 /* t0 = thiscolsum * 4 */
- subu t1, t0, t7 /* t1 = thiscolsum * 3 */
- shra_r.w t0, t0, 4
- addiu t1, 7
- addu t1, t1, t6
- srl t1, t1, 4
- sb t0, 0(s3)
- sb t1, 1(s3)
- beq t8, s0, 22f /* skip to final iteration if width == 3 */
- addiu s3, 2
- 2:
- lh t0, 0(s0) /* t0 = A3|A2 */
- lh t2, 0(s1) /* t2 = B3|B2 */
- addiu s0, 2
- addiu s1, 2
- preceu.ph.qbr t0, t0 /* t0 = 0|A3|0|A2 */
- preceu.ph.qbr t2, t2 /* t2 = 0|B3|0|B2 */
- shll.ph t1, t0, 1
- sll t3, t6, 1
- addu.ph t0, t1, t0 /* t0 = A3*3|A2*3 */
- addu t3, t3, t6 /* t3 = this * 3 */
- addu.ph t0, t0, t2 /* t0 = next2|next1 */
- addu t1, t3, t7
- andi t7, t0, 0xFFFF /* t7 = next1 */
- sll t2, t7, 1
- addu t2, t7, t2 /* t2 = next1*3 */
- addu t4, t2, t6
- srl t6, t0, 16 /* t6 = next2 */
- shra_r.w t1, t1, 4 /* t1 = (this*3 + last + 8) >> 4 */
- addu t0, t3, t7
- addiu t0, 7
- srl t0, t0, 4 /* t0 = (this*3 + next1 + 7) >> 4 */
- shra_r.w t4, t4, 4 /* t3 = (next1*3 + this + 8) >> 4 */
- addu t2, t2, t6
- addiu t2, 7
- srl t2, t2, 4 /* t2 = (next1*3 + next2 + 7) >> 4 */
- sb t1, 0(s3)
- sb t0, 1(s3)
- sb t4, 2(s3)
- sb t2, 3(s3)
- bne t8, s0, 2b
- addiu s3, 4
- 22:
- beqz s5, 4f
- addu t8, s0, s5
- 3:
- lbu t0, 0(s0)
- lbu t2, 0(s1)
- addiu s0, 1
- addiu s1, 1
- sll t3, t6, 1
- sll t1, t0, 1
- addu t1, t0, t1 /* t1 = inptr0 * 3 */
- addu t3, t3, t6 /* t3 = thiscolsum * 3 */
- addu t5, t1, t2
- addu t1, t3, t7
- shra_r.w t1, t1, 4
- addu t0, t3, t5
- addiu t0, 7
- srl t0, t0, 4
- sb t1, 0(s3)
- sb t0, 1(s3)
- addiu s3, 2
- move t7, t6
- bne t8, s0, 3b
- move t6, t5
- 4:
- sll t0, t6, 2 /* t0 = thiscolsum * 4 */
- subu t1, t0, t6 /* t1 = thiscolsum * 3 */
- addu t1, t1, t7
- addiu s4, 4
- shra_r.w t1, t1, 4
- addiu t0, 7
- srl t0, t0, 4
- sb t1, 0(s3)
- sb t0, 1(s3)
- addiu t9, -1
- addiu s3, 2
- bnez t9, 1b
- lw s1, 4(a2)
- srl t0, s4, 2
- subu t0, a0, t0
- bgtz t0, 0b
- addiu a2, 4
- RESTORE_REGS_FROM_STACK 24, s0, s1, s2, s3, s4, s5
- j ra
- nop
- END(jsimd_h2v2_fancy_upsample_dspr2)
- /*****************************************************************************/
- LEAF_DSPR2(jsimd_h2v1_fancy_upsample_dspr2)
- /*
- * a0 = cinfo->max_v_samp_factor
- * a1 = downsampled_width
- * a2 = input_data
- * a3 = output_data_ptr
- */
- SAVE_REGS_ON_STACK 16, s0, s1, s2, s3
- .set at
- beqz a0, 3f
- sll t0, a0, 2
- lw s1, 0(a3)
- li s3, 0x10001
- addu s0, s1, t0
- 0:
- addiu t8, a1, -2
- srl t9, t8, 2
- lw t7, 0(a2)
- lw s2, 0(s1)
- lbu t0, 0(t7)
- lbu t1, 1(t7) /* t1 = inptr[1] */
- sll t2, t0, 1
- addu t2, t2, t0 /* t2 = invalue*3 */
- addu t2, t2, t1
- shra_r.w t2, t2, 2
- sb t0, 0(s2)
- sb t2, 1(s2)
- beqz t9, 11f
- addiu s2, 2
- 1:
- ulw t0, 0(t7) /* t0 = |P3|P2|P1|P0| */
- ulw t1, 1(t7)
- ulh t2, 4(t7) /* t2 = |0|0|P5|P4| */
- preceu.ph.qbl t3, t0 /* t3 = |0|P3|0|P2| */
- preceu.ph.qbr t0, t0 /* t0 = |0|P1|0|P0| */
- preceu.ph.qbr t2, t2 /* t2 = |0|P5|0|P4| */
- preceu.ph.qbl t4, t1 /* t4 = |0|P4|0|P3| */
- preceu.ph.qbr t1, t1 /* t1 = |0|P2|0|P1| */
- shll.ph t5, t4, 1
- shll.ph t6, t1, 1
- addu.ph t5, t5, t4 /* t5 = |P4*3|P3*3| */
- addu.ph t6, t6, t1 /* t6 = |P2*3|P1*3| */
- addu.ph t4, t3, s3
- addu.ph t0, t0, s3
- addu.ph t4, t4, t5
- addu.ph t0, t0, t6
- shrl.ph t4, t4, 2 /* t4 = |0|P3|0|P2| */
- shrl.ph t0, t0, 2 /* t0 = |0|P1|0|P0| */
- addu.ph t2, t2, t5
- addu.ph t3, t3, t6
- shra_r.ph t2, t2, 2 /* t2 = |0|P5|0|P4| */
- shra_r.ph t3, t3, 2 /* t3 = |0|P3|0|P2| */
- shll.ph t2, t2, 8
- shll.ph t3, t3, 8
- or t2, t4, t2
- or t3, t3, t0
- addiu t9, -1
- usw t3, 0(s2)
- usw t2, 4(s2)
- addiu s2, 8
- bgtz t9, 1b
- addiu t7, 4
- 11:
- andi t8, 3
- beqz t8, 22f
- addiu t7, 1
- 2:
- lbu t0, 0(t7)
- addiu t7, 1
- sll t1, t0, 1
- addu t2, t0, t1 /* t2 = invalue */
- lbu t3, -2(t7)
- lbu t4, 0(t7)
- addiu t3, 1
- addiu t4, 2
- addu t3, t3, t2
- addu t4, t4, t2
- srl t3, 2
- srl t4, 2
- sb t3, 0(s2)
- sb t4, 1(s2)
- addiu t8, -1
- bgtz t8, 2b
- addiu s2, 2
- 22:
- lbu t0, 0(t7)
- lbu t2, -1(t7)
- sll t1, t0, 1
- addu t1, t1, t0 /* t1 = invalue * 3 */
- addu t1, t1, t2
- addiu t1, 1
- srl t1, t1, 2
- sb t1, 0(s2)
- sb t0, 1(s2)
- addiu s1, 4
- bne s1, s0, 0b
- addiu a2, 4
- 3:
- RESTORE_REGS_FROM_STACK 16, s0, s1, s2, s3
- j ra
- nop
- END(jsimd_h2v1_fancy_upsample_dspr2)
- /*****************************************************************************/
- LEAF_DSPR2(jsimd_h2v1_downsample_dspr2)
- /*
- * a0 = cinfo->image_width
- * a1 = cinfo->max_v_samp_factor
- * a2 = compptr->v_samp_factor
- * a3 = compptr->width_in_blocks
- * 16(sp) = input_data
- * 20(sp) = output_data
- */
- .set at
- SAVE_REGS_ON_STACK 24, s0, s1, s2, s3, s4
- beqz a2, 7f
- lw s1, 44(sp) /* s1 = output_data */
- lw s0, 40(sp) /* s0 = input_data */
- srl s2, a0, 2
- andi t9, a0, 2
- srl t7, t9, 1
- addu s2, t7, s2
- sll t0, a3, 3 /* t0 = width_in_blocks*DCT */
- srl t7, t0, 1
- subu s2, t7, s2
- 0:
- andi t6, a0, 1 /* t6 = temp_index */
- addiu t6, -1
- lw t4, 0(s1) /* t4 = outptr */
- lw t5, 0(s0) /* t5 = inptr0 */
- li s3, 0 /* s3 = bias */
- srl t7, a0, 1 /* t7 = image_width1 */
- srl s4, t7, 2
- andi t8, t7, 3
- 1:
- ulhu t0, 0(t5)
- ulhu t1, 2(t5)
- ulhu t2, 4(t5)
- ulhu t3, 6(t5)
- raddu.w.qb t0, t0
- raddu.w.qb t1, t1
- raddu.w.qb t2, t2
- raddu.w.qb t3, t3
- shra.ph t0, t0, 1
- shra_r.ph t1, t1, 1
- shra.ph t2, t2, 1
- shra_r.ph t3, t3, 1
- sb t0, 0(t4)
- sb t1, 1(t4)
- sb t2, 2(t4)
- sb t3, 3(t4)
- addiu s4, -1
- addiu t4, 4
- bgtz s4, 1b
- addiu t5, 8
- beqz t8, 3f
- addu s4, t4, t8
- 2:
- ulhu t0, 0(t5)
- raddu.w.qb t0, t0
- addqh.w t0, t0, s3
- xori s3, s3, 1
- sb t0, 0(t4)
- addiu t4, 1
- bne t4, s4, 2b
- addiu t5, 2
- 3:
- lbux t1, t6(t5)
- sll t1, 1
- addqh.w t2, t1, s3 /* t2 = pixval1 */
- xori s3, s3, 1
- addqh.w t3, t1, s3 /* t3 = pixval2 */
- blez s2, 5f
- append t3, t2, 8
- addu t5, t4, s2 /* t5 = loop_end2 */
- 4:
- ush t3, 0(t4)
- addiu s2, -1
- bgtz s2, 4b
- addiu t4, 2
- 5:
- beqz t9, 6f
- nop
- sb t2, 0(t4)
- 6:
- addiu s1, 4
- addiu a2, -1
- bnez a2, 0b
- addiu s0, 4
- 7:
- RESTORE_REGS_FROM_STACK 24, s0, s1, s2, s3, s4
- j ra
- nop
- END(jsimd_h2v1_downsample_dspr2)
- /*****************************************************************************/
- LEAF_DSPR2(jsimd_h2v2_downsample_dspr2)
- /*
- * a0 = cinfo->image_width
- * a1 = cinfo->max_v_samp_factor
- * a2 = compptr->v_samp_factor
- * a3 = compptr->width_in_blocks
- * 16(sp) = input_data
- * 20(sp) = output_data
- */
- .set at
- SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
- beqz a2, 8f
- lw s1, 52(sp) /* s1 = output_data */
- lw s0, 48(sp) /* s0 = input_data */
- andi t6, a0, 1 /* t6 = temp_index */
- addiu t6, -1
- srl t7, a0, 1 /* t7 = image_width1 */
- srl s4, t7, 2
- andi t8, t7, 3
- andi t9, a0, 2
- srl s2, a0, 2
- srl t7, t9, 1
- addu s2, t7, s2
- sll t0, a3, 3 /* s2 = width_in_blocks*DCT */
- srl t7, t0, 1
- subu s2, t7, s2
- 0:
- lw t4, 0(s1) /* t4 = outptr */
- lw t5, 0(s0) /* t5 = inptr0 */
- lw s7, 4(s0) /* s7 = inptr1 */
- li s6, 1 /* s6 = bias */
- 2:
- ulw t0, 0(t5) /* t0 = |P3|P2|P1|P0| */
- ulw t1, 0(s7) /* t1 = |Q3|Q2|Q1|Q0| */
- ulw t2, 4(t5)
- ulw t3, 4(s7)
- precrq.ph.w t7, t0, t1 /* t2 = |P3|P2|Q3|Q2| */
- ins t0, t1, 16, 16 /* t0 = |Q1|Q0|P1|P0| */
- raddu.w.qb t1, t7
- raddu.w.qb t0, t0
- shra_r.w t1, t1, 2
- addiu t0, 1
- srl t0, 2
- precrq.ph.w t7, t2, t3
- ins t2, t3, 16, 16
- raddu.w.qb t7, t7
- raddu.w.qb t2, t2
- shra_r.w t7, t7, 2
- addiu t2, 1
- srl t2, 2
- sb t0, 0(t4)
- sb t1, 1(t4)
- sb t2, 2(t4)
- sb t7, 3(t4)
- addiu t4, 4
- addiu t5, 8
- addiu s4, s4, -1
- bgtz s4, 2b
- addiu s7, 8
- beqz t8, 4f
- addu t8, t4, t8
- 3:
- ulhu t0, 0(t5)
- ulhu t1, 0(s7)
- ins t0, t1, 16, 16
- raddu.w.qb t0, t0
- addu t0, t0, s6
- srl t0, 2
- xori s6, s6, 3
- sb t0, 0(t4)
- addiu t5, 2
- addiu t4, 1
- bne t8, t4, 3b
- addiu s7, 2
- 4:
- lbux t1, t6(t5)
- sll t1, 1
- lbux t0, t6(s7)
- sll t0, 1
- addu t1, t1, t0
- addu t3, t1, s6
- srl t0, t3, 2 /* t2 = pixval1 */
- xori s6, s6, 3
- addu t2, t1, s6
- srl t1, t2, 2 /* t3 = pixval2 */
- blez s2, 6f
- append t1, t0, 8
- 5:
- ush t1, 0(t4)
- addiu s2, -1
- bgtz s2, 5b
- addiu t4, 2
- 6:
- beqz t9, 7f
- nop
- sb t0, 0(t4)
- 7:
- addiu s1, 4
- addiu a2, -1
- bnez a2, 0b
- addiu s0, 8
- 8:
- RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
- j ra
- nop
- END(jsimd_h2v2_downsample_dspr2)
- /*****************************************************************************/
- LEAF_DSPR2(jsimd_h2v2_smooth_downsample_dspr2)
- /*
- * a0 = input_data
- * a1 = output_data
- * a2 = compptr->v_samp_factor
- * a3 = cinfo->max_v_samp_factor
- * 16(sp) = cinfo->smoothing_factor
- * 20(sp) = compptr->width_in_blocks
- * 24(sp) = cinfo->image_width
- */
- .set at
- SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
- lw s7, 52(sp) /* compptr->width_in_blocks */
- lw s0, 56(sp) /* cinfo->image_width */
- lw s6, 48(sp) /* cinfo->smoothing_factor */
- sll s7, 3 /* output_cols = width_in_blocks * DCTSIZE */
- sll v0, s7, 1
- subu v0, v0, s0
- blez v0, 2f
- move v1, zero
- addiu t0, a3, 2 /* t0 = cinfo->max_v_samp_factor + 2 */
- 0:
- addiu t1, a0, -4
- sll t2, v1, 2
- lwx t1, t2(t1)
- move t3, v0
- addu t1, t1, s0
- lbu t2, -1(t1)
- 1:
- addiu t3, t3, -1
- sb t2, 0(t1)
- bgtz t3, 1b
- addiu t1, t1, 1
- addiu v1, v1, 1
- bne v1, t0, 0b
- nop
- 2:
- li v0, 80
- mul v0, s6, v0
- li v1, 16384
- move t4, zero
- move t5, zero
- subu t6, v1, v0 /* t6 = 16384 - tmp_smoot_f * 80 */
- sll t7, s6, 4 /* t7 = tmp_smoot_f * 16 */
- 3:
- /* Special case for first column: pretend column -1 is same as column 0 */
- sll v0, t4, 2
- lwx t8, v0(a1) /* outptr = output_data[outrow] */
- sll v1, t5, 2
- addiu t9, v1, 4
- addiu s0, v1, -4
- addiu s1, v1, 8
- lwx s2, v1(a0) /* inptr0 = input_data[inrow] */
- lwx t9, t9(a0) /* inptr1 = input_data[inrow+1] */
- lwx s0, s0(a0) /* above_ptr = input_data[inrow-1] */
- lwx s1, s1(a0) /* below_ptr = input_data[inrow+2] */
- lh v0, 0(s2)
- lh v1, 0(t9)
- lh t0, 0(s0)
- lh t1, 0(s1)
- ins v0, v1, 16, 16
- ins t0, t1, 16, 16
- raddu.w.qb t2, v0
- raddu.w.qb s3, t0
- lbu v0, 0(s2)
- lbu v1, 2(s2)
- lbu t0, 0(t9)
- lbu t1, 2(t9)
- addu v0, v0, v1
- mult $ac1, t2, t6
- addu t0, t0, t1
- lbu t2, 2(s0)
- addu t0, t0, v0
- lbu t3, 2(s1)
- addu s3, t0, s3
- lbu v0, 0(s0)
- lbu t0, 0(s1)
- sll s3, s3, 1
- addu v0, v0, t2
- addu t0, t0, t3
- addu t0, t0, v0
- addu s3, t0, s3
- madd $ac1, s3, t7
- extr_r.w v0, $ac1, 16
- addiu t8, t8, 1
- addiu s2, s2, 2
- addiu t9, t9, 2
- addiu s0, s0, 2
- addiu s1, s1, 2
- sb v0, -1(t8)
- addiu s4, s7, -2
- and s4, s4, 3
- addu s5, s4, t8 /* end address */
- 4:
- lh v0, 0(s2)
- lh v1, 0(t9)
- lh t0, 0(s0)
- lh t1, 0(s1)
- ins v0, v1, 16, 16
- ins t0, t1, 16, 16
- raddu.w.qb t2, v0
- raddu.w.qb s3, t0
- lbu v0, -1(s2)
- lbu v1, 2(s2)
- lbu t0, -1(t9)
- lbu t1, 2(t9)
- addu v0, v0, v1
- mult $ac1, t2, t6
- addu t0, t0, t1
- lbu t2, 2(s0)
- addu t0, t0, v0
- lbu t3, 2(s1)
- addu s3, t0, s3
- lbu v0, -1(s0)
- lbu t0, -1(s1)
- sll s3, s3, 1
- addu v0, v0, t2
- addu t0, t0, t3
- addu t0, t0, v0
- addu s3, t0, s3
- madd $ac1, s3, t7
- extr_r.w t2, $ac1, 16
- addiu t8, t8, 1
- addiu s2, s2, 2
- addiu t9, t9, 2
- addiu s0, s0, 2
- sb t2, -1(t8)
- bne s5, t8, 4b
- addiu s1, s1, 2
- addiu s5, s7, -2
- subu s5, s5, s4
- addu s5, s5, t8 /* end address */
- 5:
- lh v0, 0(s2)
- lh v1, 0(t9)
- lh t0, 0(s0)
- lh t1, 0(s1)
- ins v0, v1, 16, 16
- ins t0, t1, 16, 16
- raddu.w.qb t2, v0
- raddu.w.qb s3, t0
- lbu v0, -1(s2)
- lbu v1, 2(s2)
- lbu t0, -1(t9)
- lbu t1, 2(t9)
- addu v0, v0, v1
- mult $ac1, t2, t6
- addu t0, t0, t1
- lbu t2, 2(s0)
- addu t0, t0, v0
- lbu t3, 2(s1)
- addu s3, t0, s3
- lbu v0, -1(s0)
- lbu t0, -1(s1)
- sll s3, s3, 1
- addu v0, v0, t2
- addu t0, t0, t3
- lh v1, 2(t9)
- addu t0, t0, v0
- lh v0, 2(s2)
- addu s3, t0, s3
- lh t0, 2(s0)
- lh t1, 2(s1)
- madd $ac1, s3, t7
- extr_r.w t2, $ac1, 16
- ins t0, t1, 16, 16
- ins v0, v1, 16, 16
- raddu.w.qb s3, t0
- lbu v1, 4(s2)
- lbu t0, 1(t9)
- lbu t1, 4(t9)
- sb t2, 0(t8)
- raddu.w.qb t3, v0
- lbu v0, 1(s2)
- addu t0, t0, t1
- mult $ac1, t3, t6
- addu v0, v0, v1
- lbu t2, 4(s0)
- addu t0, t0, v0
- lbu v0, 1(s0)
- addu s3, t0, s3
- lbu t0, 1(s1)
- lbu t3, 4(s1)
- addu v0, v0, t2
- sll s3, s3, 1
- addu t0, t0, t3
- lh v1, 4(t9)
- addu t0, t0, v0
- lh v0, 4(s2)
- addu s3, t0, s3
- lh t0, 4(s0)
- lh t1, 4(s1)
- madd $ac1, s3, t7
- extr_r.w t2, $ac1, 16
- ins t0, t1, 16, 16
- ins v0, v1, 16, 16
- raddu.w.qb s3, t0
- lbu v1, 6(s2)
- lbu t0, 3(t9)
- lbu t1, 6(t9)
- sb t2, 1(t8)
- raddu.w.qb t3, v0
- lbu v0, 3(s2)
- addu t0, t0, t1
- mult $ac1, t3, t6
- addu v0, v0, v1
- lbu t2, 6(s0)
- addu t0, t0, v0
- lbu v0, 3(s0)
- addu s3, t0, s3
- lbu t0, 3(s1)
- lbu t3, 6(s1)
- addu v0, v0, t2
- sll s3, s3, 1
- addu t0, t0, t3
- lh v1, 6(t9)
- addu t0, t0, v0
- lh v0, 6(s2)
- addu s3, t0, s3
- lh t0, 6(s0)
- lh t1, 6(s1)
- madd $ac1, s3, t7
- extr_r.w t3, $ac1, 16
- ins t0, t1, 16, 16
- ins v0, v1, 16, 16
- raddu.w.qb s3, t0
- lbu v1, 8(s2)
- lbu t0, 5(t9)
- lbu t1, 8(t9)
- sb t3, 2(t8)
- raddu.w.qb t2, v0
- lbu v0, 5(s2)
- addu t0, t0, t1
- mult $ac1, t2, t6
- addu v0, v0, v1
- lbu t2, 8(s0)
- addu t0, t0, v0
- lbu v0, 5(s0)
- addu s3, t0, s3
- lbu t0, 5(s1)
- lbu t3, 8(s1)
- addu v0, v0, t2
- sll s3, s3, 1
- addu t0, t0, t3
- addiu t8, t8, 4
- addu t0, t0, v0
- addiu s2, s2, 8
- addu s3, t0, s3
- addiu t9, t9, 8
- madd $ac1, s3, t7
- extr_r.w t1, $ac1, 16
- addiu s0, s0, 8
- addiu s1, s1, 8
- bne s5, t8, 5b
- sb t1, -1(t8)
- /* Special case for last column */
- lh v0, 0(s2)
- lh v1, 0(t9)
- lh t0, 0(s0)
- lh t1, 0(s1)
- ins v0, v1, 16, 16
- ins t0, t1, 16, 16
- raddu.w.qb t2, v0
- raddu.w.qb s3, t0
- lbu v0, -1(s2)
- lbu v1, 1(s2)
- lbu t0, -1(t9)
- lbu t1, 1(t9)
- addu v0, v0, v1
- mult $ac1, t2, t6
- addu t0, t0, t1
- lbu t2, 1(s0)
- addu t0, t0, v0
- lbu t3, 1(s1)
- addu s3, t0, s3
- lbu v0, -1(s0)
- lbu t0, -1(s1)
- sll s3, s3, 1
- addu v0, v0, t2
- addu t0, t0, t3
- addu t0, t0, v0
- addu s3, t0, s3
- madd $ac1, s3, t7
- extr_r.w t0, $ac1, 16
- addiu t5, t5, 2
- sb t0, 0(t8)
- addiu t4, t4, 1
- bne t4, a2, 3b
- addiu t5, t5, 2
- RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
- j ra
- nop
- END(jsimd_h2v2_smooth_downsample_dspr2)
- /*****************************************************************************/
- LEAF_DSPR2(jsimd_int_upsample_dspr2)
- /*
- * a0 = upsample->h_expand[compptr->component_index]
- * a1 = upsample->v_expand[compptr->component_index]
- * a2 = input_data
- * a3 = output_data_ptr
- * 16(sp) = cinfo->output_width
- * 20(sp) = cinfo->max_v_samp_factor
- */
- .set at
- SAVE_REGS_ON_STACK 16, s0, s1, s2, s3
- lw s0, 0(a3) /* s0 = output_data */
- lw s1, 32(sp) /* s1 = cinfo->output_width */
- lw s2, 36(sp) /* s2 = cinfo->max_v_samp_factor */
- li t6, 0 /* t6 = inrow */
- beqz s2, 10f
- li s3, 0 /* s3 = outrow */
- 0:
- addu t0, a2, t6
- addu t7, s0, s3
- lw t3, 0(t0) /* t3 = inptr */
- lw t8, 0(t7) /* t8 = outptr */
- beqz s1, 4f
- addu t5, t8, s1 /* t5 = outend */
- 1:
- lb t2, 0(t3) /* t2 = invalue = *inptr++ */
- addiu t3, 1
- beqz a0, 3f
- move t0, a0 /* t0 = h_expand */
- 2:
- sb t2, 0(t8)
- addiu t0, -1
- bgtz t0, 2b
- addiu t8, 1
- 3:
- bgt t5, t8, 1b
- nop
- 4:
- addiu t9, a1, -1 /* t9 = v_expand - 1 */
- blez t9, 9f
- nop
- 5:
- lw t3, 0(s0)
- lw t4, 4(s0)
- subu t0, s1, 0xF
- blez t0, 7f
- addu t5, t3, s1 /* t5 = end address */
- andi t7, s1, 0xF /* t7 = residual */
- subu t8, t5, t7
- 6:
- ulw t0, 0(t3)
- ulw t1, 4(t3)
- ulw t2, 8(t3)
- usw t0, 0(t4)
- ulw t0, 12(t3)
- usw t1, 4(t4)
- usw t2, 8(t4)
- usw t0, 12(t4)
- addiu t3, 16
- bne t3, t8, 6b
- addiu t4, 16
- beqz t7, 8f
- nop
- 7:
- lbu t0, 0(t3)
- sb t0, 0(t4)
- addiu t3, 1
- bne t3, t5, 7b
- addiu t4, 1
- 8:
- addiu t9, -1
- bgtz t9, 5b
- addiu s0, 8
- 9:
- addu s3, s3, a1
- bne s3, s2, 0b
- addiu t6, 1
- 10:
- RESTORE_REGS_FROM_STACK 16, s0, s1, s2, s3
- j ra
- nop
- END(jsimd_int_upsample_dspr2)
- /*****************************************************************************/
- LEAF_DSPR2(jsimd_h2v1_upsample_dspr2)
- /*
- * a0 = cinfo->max_v_samp_factor
- * a1 = cinfo->output_width
- * a2 = input_data
- * a3 = output_data_ptr
- */
- lw t7, 0(a3) /* t7 = output_data */
- andi t8, a1, 0xf /* t8 = residual */
- sll t0, a0, 2
- blez a0, 4f
- addu t9, t7, t0 /* t9 = output_data end address */
- 0:
- lw t5, 0(t7) /* t5 = outptr */
- lw t6, 0(a2) /* t6 = inptr */
- addu t3, t5, a1 /* t3 = outptr + output_width (end address) */
- subu t3, t8 /* t3 = end address - residual */
- beq t5, t3, 2f
- move t4, t8
- 1:
- ulw t0, 0(t6) /* t0 = |P3|P2|P1|P0| */
- ulw t2, 4(t6) /* t2 = |P7|P6|P5|P4| */
- srl t1, t0, 16 /* t1 = |X|X|P3|P2| */
- ins t0, t0, 16, 16 /* t0 = |P1|P0|P1|P0| */
- ins t1, t1, 16, 16 /* t1 = |P3|P2|P3|P2| */
- ins t0, t0, 8, 16 /* t0 = |P1|P1|P0|P0| */
- ins t1, t1, 8, 16 /* t1 = |P3|P3|P2|P2| */
- usw t0, 0(t5)
- usw t1, 4(t5)
- srl t0, t2, 16 /* t0 = |X|X|P7|P6| */
- ins t2, t2, 16, 16 /* t2 = |P5|P4|P5|P4| */
- ins t0, t0, 16, 16 /* t0 = |P7|P6|P7|P6| */
- ins t2, t2, 8, 16 /* t2 = |P5|P5|P4|P4| */
- ins t0, t0, 8, 16 /* t0 = |P7|P7|P6|P6| */
- usw t2, 8(t5)
- usw t0, 12(t5)
- addiu t5, 16
- bne t5, t3, 1b
- addiu t6, 8
- beqz t8, 3f
- move t4, t8
- 2:
- lbu t1, 0(t6)
- sb t1, 0(t5)
- sb t1, 1(t5)
- addiu t4, -2
- addiu t6, 1
- bgtz t4, 2b
- addiu t5, 2
- 3:
- addiu t7, 4
- bne t9, t7, 0b
- addiu a2, 4
- 4:
- j ra
- nop
- END(jsimd_h2v1_upsample_dspr2)
- /*****************************************************************************/
- LEAF_DSPR2(jsimd_h2v2_upsample_dspr2)
- /*
- * a0 = cinfo->max_v_samp_factor
- * a1 = cinfo->output_width
- * a2 = input_data
- * a3 = output_data_ptr
- */
- lw t7, 0(a3)
- blez a0, 7f
- andi t9, a1, 0xf /* t9 = residual */
- 0:
- lw t6, 0(a2) /* t6 = inptr */
- lw t5, 0(t7) /* t5 = outptr */
- addu t8, t5, a1 /* t8 = outptr end address */
- subu t8, t9 /* t8 = end address - residual */
- beq t5, t8, 2f
- move t4, t9
- 1:
- ulw t0, 0(t6)
- srl t1, t0, 16
- ins t0, t0, 16, 16
- ins t0, t0, 8, 16
- ins t1, t1, 16, 16
- ins t1, t1, 8, 16
- ulw t2, 4(t6)
- usw t0, 0(t5)
- usw t1, 4(t5)
- srl t3, t2, 16
- ins t2, t2, 16, 16
- ins t2, t2, 8, 16
- ins t3, t3, 16, 16
- ins t3, t3, 8, 16
- usw t2, 8(t5)
- usw t3, 12(t5)
- addiu t5, 16
- bne t5, t8, 1b
- addiu t6, 8
- beqz t9, 3f
- move t4, t9
- 2:
- lbu t0, 0(t6)
- sb t0, 0(t5)
- sb t0, 1(t5)
- addiu t4, -2
- addiu t6, 1
- bgtz t4, 2b
- addiu t5, 2
- 3:
- lw t6, 0(t7) /* t6 = outptr[0] */
- lw t5, 4(t7) /* t5 = outptr[1] */
- addu t4, t6, a1 /* t4 = new end address */
- beq a1, t9, 5f
- subu t8, t4, t9
- 4:
- ulw t0, 0(t6)
- ulw t1, 4(t6)
- ulw t2, 8(t6)
- usw t0, 0(t5)
- ulw t0, 12(t6)
- usw t1, 4(t5)
- usw t2, 8(t5)
- usw t0, 12(t5)
- addiu t6, 16
- bne t6, t8, 4b
- addiu t5, 16
- beqz t9, 6f
- nop
- 5:
- lbu t0, 0(t6)
- sb t0, 0(t5)
- addiu t6, 1
- bne t6, t4, 5b
- addiu t5, 1
- 6:
- addiu t7, 8
- addiu a0, -2
- bgtz a0, 0b
- addiu a2, 4
- 7:
- j ra
- nop
- END(jsimd_h2v2_upsample_dspr2)
- /*****************************************************************************/
- LEAF_DSPR2(jsimd_idct_islow_dspr2)
- /*
- * a0 = coef_block
- * a1 = compptr->dcttable
- * a2 = output
- * a3 = range_limit
- */
- SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
- addiu sp, sp, -256
- move v0, sp
- addiu v1, zero, 8 /* v1 = DCTSIZE = 8 */
- 1:
- lh s4, 32(a0) /* s4 = inptr[16] */
- lh s5, 64(a0) /* s5 = inptr[32] */
- lh s6, 96(a0) /* s6 = inptr[48] */
- lh t1, 112(a0) /* t1 = inptr[56] */
- lh t7, 16(a0) /* t7 = inptr[8] */
- lh t5, 80(a0) /* t5 = inptr[40] */
- lh t3, 48(a0) /* t3 = inptr[24] */
- or s4, s4, t1
- or s4, s4, t3
- or s4, s4, t5
- or s4, s4, t7
- or s4, s4, s5
- or s4, s4, s6
- bnez s4, 2f
- addiu v1, v1, -1
- lh s5, 0(a1) /* quantptr[DCTSIZE*0] */
- lh s6, 0(a0) /* inptr[DCTSIZE*0] */
- mul s5, s5, s6 /* DEQUANTIZE(inptr[0], quantptr[0]) */
- sll s5, s5, 2
- sw s5, 0(v0)
- sw s5, 32(v0)
- sw s5, 64(v0)
- sw s5, 96(v0)
- sw s5, 128(v0)
- sw s5, 160(v0)
- sw s5, 192(v0)
- b 3f
- sw s5, 224(v0)
- 2:
- lh t0, 112(a1)
- lh t2, 48(a1)
- lh t4, 80(a1)
- lh t6, 16(a1)
- mul t0, t0, t1 /* DEQUANTIZE(inptr[DCTSIZE*7],
- quantptr[DCTSIZE*7]) */
- mul t1, t2, t3 /* DEQUANTIZE(inptr[DCTSIZE*3],
- quantptr[DCTSIZE*3]) */
- mul t2, t4, t5 /* DEQUANTIZE(inptr[DCTSIZE*5],
- quantptr[DCTSIZE*5]) */
- mul t3, t6, t7 /* DEQUANTIZE(inptr[DCTSIZE*1],
- quantptr[DCTSIZE*1]) */
- lh t4, 32(a1)
- lh t5, 32(a0)
- lh t6, 96(a1)
- lh t7, 96(a0)
- addu s0, t0, t1 /* z3 = tmp0 + tmp2 */
- addu s1, t1, t2 /* z2 = tmp1 + tmp2 */
- addu s2, t2, t3 /* z4 = tmp1 + tmp3 */
- addu s3, s0, s2 /* z3 + z4 */
- addiu t9, zero, 9633 /* FIX_1_175875602 */
- mul s3, s3, t9 /* z5 = MULTIPLY(z3 + z4, FIX_1_175875602) */
- addu t8, t0, t3 /* z1 = tmp0 + tmp3 */
- addiu t9, zero, 2446 /* FIX_0_298631336 */
- mul t0, t0, t9 /* tmp0 = MULTIPLY(tmp0, FIX_0_298631336) */
- addiu t9, zero, 16819 /* FIX_2_053119869 */
- mul t2, t2, t9 /* tmp1 = MULTIPLY(tmp1, FIX_2_053119869) */
- addiu t9, zero, 25172 /* FIX_3_072711026 */
- mul t1, t1, t9 /* tmp2 = MULTIPLY(tmp2, FIX_3_072711026) */
- addiu t9, zero, 12299 /* FIX_1_501321110 */
- mul t3, t3, t9 /* tmp3 = MULTIPLY(tmp3, FIX_1_501321110) */
- addiu t9, zero, 16069 /* FIX_1_961570560 */
- mul s0, s0, t9 /* -z3 = MULTIPLY(z3, FIX_1_961570560) */
- addiu t9, zero, 3196 /* FIX_0_390180644 */
- mul s2, s2, t9 /* -z4 = MULTIPLY(z4, FIX_0_390180644) */
- addiu t9, zero, 7373 /* FIX_0_899976223 */
- mul t8, t8, t9 /* -z1 = MULTIPLY(z1, FIX_0_899976223) */
- addiu t9, zero, 20995 /* FIX_2_562915447 */
- mul s1, s1, t9 /* -z2 = MULTIPLY(z2, FIX_2_562915447) */
- subu s0, s3, s0 /* z3 += z5 */
- addu t0, t0, s0 /* tmp0 += z3 */
- addu t1, t1, s0 /* tmp2 += z3 */
- subu s2, s3, s2 /* z4 += z5 */
- addu t2, t2, s2 /* tmp1 += z4 */
- addu t3, t3, s2 /* tmp3 += z4 */
- subu t0, t0, t8 /* tmp0 += z1 */
- subu t1, t1, s1 /* tmp2 += z2 */
- subu t2, t2, s1 /* tmp1 += z2 */
- subu t3, t3, t8 /* tmp3 += z1 */
- mul s0, t4, t5 /* DEQUANTIZE(inptr[DCTSIZE*2],
- quantptr[DCTSIZE*2]) */
- addiu t9, zero, 6270 /* FIX_0_765366865 */
- mul s1, t6, t7 /* DEQUANTIZE(inptr[DCTSIZE*6],
- quantptr[DCTSIZE*6]) */
- lh t4, 0(a1)
- lh t5, 0(a0)
- lh t6, 64(a1)
- lh t7, 64(a0)
- mul s2, t9, s0 /* MULTIPLY(z2, FIX_0_765366865) */
- mul t5, t4, t5 /* DEQUANTIZE(inptr[DCTSIZE*0],
- quantptr[DCTSIZE*0]) */
- mul t6, t6, t7 /* DEQUANTIZE(inptr[DCTSIZE*4],
- quantptr[DCTSIZE*4]) */
- addiu t9, zero, 4433 /* FIX_0_541196100 */
- addu s3, s0, s1 /* z2 + z3 */
- mul s3, s3, t9 /* z1 = MULTIPLY(z2 + z3, FIX_0_541196100) */
- addiu t9, zero, 15137 /* FIX_1_847759065 */
- mul t8, s1, t9 /* MULTIPLY(z3, FIX_1_847759065) */
- addu t4, t5, t6
- subu t5, t5, t6
- sll t4, t4, 13 /* tmp0 = (z2 + z3) << CONST_BITS */
- sll t5, t5, 13 /* tmp1 = (z2 - z3) << CONST_BITS */
- addu t7, s3, s2 /* tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865) */
- subu t6, s3, t8 /* tmp2 =
- z1 + MULTIPLY(z3, -FIX_1_847759065) */
- addu s0, t4, t7
- subu s1, t4, t7
- addu s2, t5, t6
- subu s3, t5, t6
- addu t4, s0, t3
- subu s0, s0, t3
- addu t3, s2, t1
- subu s2, s2, t1
- addu t1, s3, t2
- subu s3, s3, t2
- addu t2, s1, t0
- subu s1, s1, t0
- shra_r.w t4, t4, 11
- shra_r.w t3, t3, 11
- shra_r.w t1, t1, 11
- shra_r.w t2, t2, 11
- shra_r.w s1, s1, 11
- shra_r.w s3, s3, 11
- shra_r.w s2, s2, 11
- shra_r.w s0, s0, 11
- sw t4, 0(v0)
- sw t3, 32(v0)
- sw t1, 64(v0)
- sw t2, 96(v0)
- sw s1, 128(v0)
- sw s3, 160(v0)
- sw s2, 192(v0)
- sw s0, 224(v0)
- 3:
- addiu a1, a1, 2
- addiu a0, a0, 2
- bgtz v1, 1b
- addiu v0, v0, 4
- move v0, sp
- addiu v1, zero, 8
- 4:
- lw t0, 8(v0) /* z2 = (JLONG)wsptr[2] */
- lw t1, 24(v0) /* z3 = (JLONG)wsptr[6] */
- lw t2, 0(v0) /* (JLONG)wsptr[0] */
- lw t3, 16(v0) /* (JLONG)wsptr[4] */
- lw s4, 4(v0) /* (JLONG)wsptr[1] */
- lw s5, 12(v0) /* (JLONG)wsptr[3] */
- lw s6, 20(v0) /* (JLONG)wsptr[5] */
- lw s7, 28(v0) /* (JLONG)wsptr[7] */
- or s4, s4, t0
- or s4, s4, t1
- or s4, s4, t3
- or s4, s4, s7
- or s4, s4, s5
- or s4, s4, s6
- bnez s4, 5f
- addiu v1, v1, -1
- shra_r.w s5, t2, 5
- andi s5, s5, 0x3ff
- lbux s5, s5(a3)
- lw s1, 0(a2)
- replv.qb s5, s5
- usw s5, 0(s1)
- usw s5, 4(s1)
- b 6f
- nop
- 5:
- addu t4, t0, t1 /* z2 + z3 */
- addiu t8, zero, 4433 /* FIX_0_541196100 */
- mul t5, t4, t8 /* z1 = MULTIPLY(z2 + z3, FIX_0_541196100) */
- addiu t8, zero, 15137 /* FIX_1_847759065 */
- mul t1, t1, t8 /* MULTIPLY(z3, FIX_1_847759065) */
- addiu t8, zero, 6270 /* FIX_0_765366865 */
- mul t0, t0, t8 /* MULTIPLY(z2, FIX_0_765366865) */
- addu t4, t2, t3 /* (JLONG)wsptr[0] + (JLONG)wsptr[4] */
- subu t2, t2, t3 /* (JLONG)wsptr[0] - (JLONG)wsptr[4] */
- sll t4, t4, 13 /* tmp0 =
- (wsptr[0] + wsptr[4]) << CONST_BITS */
- sll t2, t2, 13 /* tmp1 =
- (wsptr[0] - wsptr[4]) << CONST_BITS */
- subu t1, t5, t1 /* tmp2 =
- z1 + MULTIPLY(z3, -FIX_1_847759065) */
- subu t3, t2, t1 /* tmp12 = tmp1 - tmp2 */
- addu t2, t2, t1 /* tmp11 = tmp1 + tmp2 */
- addu t5, t5, t0 /* tmp3 =
- z1 + MULTIPLY(z2, FIX_0_765366865) */
- subu t1, t4, t5 /* tmp13 = tmp0 - tmp3 */
- addu t0, t4, t5 /* tmp10 = tmp0 + tmp3 */
- lw t4, 28(v0) /* tmp0 = (JLONG)wsptr[7] */
- lw t6, 12(v0) /* tmp2 = (JLONG)wsptr[3] */
- lw t5, 20(v0) /* tmp1 = (JLONG)wsptr[5] */
- lw t7, 4(v0) /* tmp3 = (JLONG)wsptr[1] */
- addu s0, t4, t6 /* z3 = tmp0 + tmp2 */
- addiu t8, zero, 9633 /* FIX_1_175875602 */
- addu s1, t5, t7 /* z4 = tmp1 + tmp3 */
- addu s2, s0, s1 /* z3 + z4 */
- mul s2, s2, t8 /* z5 = MULTIPLY(z3 + z4, FIX_1_175875602) */
- addu s3, t4, t7 /* z1 = tmp0 + tmp3 */
- addu t9, t5, t6 /* z2 = tmp1 + tmp2 */
- addiu t8, zero, 16069 /* FIX_1_961570560 */
- mul s0, s0, t8 /* -z3 = MULTIPLY(z3, FIX_1_961570560) */
- addiu t8, zero, 3196 /* FIX_0_390180644 */
- mul s1, s1, t8 /* -z4 = MULTIPLY(z4, FIX_0_390180644) */
- addiu t8, zero, 2446 /* FIX_0_298631336 */
- mul t4, t4, t8 /* tmp0 = MULTIPLY(tmp0, FIX_0_298631336) */
- addiu t8, zero, 7373 /* FIX_0_899976223 */
- mul s3, s3, t8 /* -z1 = MULTIPLY(z1, FIX_0_899976223) */
- addiu t8, zero, 16819 /* FIX_2_053119869 */
- mul t5, t5, t8 /* tmp1 = MULTIPLY(tmp1, FIX_2_053119869) */
- addiu t8, zero, 20995 /* FIX_2_562915447 */
- mul t9, t9, t8 /* -z2 = MULTIPLY(z2, FIX_2_562915447) */
- addiu t8, zero, 25172 /* FIX_3_072711026 */
- mul t6, t6, t8 /* tmp2 = MULTIPLY(tmp2, FIX_3_072711026) */
- addiu t8, zero, 12299 /* FIX_1_501321110 */
- mul t7, t7, t8 /* tmp3 = MULTIPLY(tmp3, FIX_1_501321110) */
- subu s0, s2, s0 /* z3 += z5 */
- subu s1, s2, s1 /* z4 += z5 */
- addu t4, t4, s0
- subu t4, t4, s3 /* tmp0 */
- addu t5, t5, s1
- subu t5, t5, t9 /* tmp1 */
- addu t6, t6, s0
- subu t6, t6, t9 /* tmp2 */
- addu t7, t7, s1
- subu t7, t7, s3 /* tmp3 */
- addu s0, t0, t7
- subu t0, t0, t7
- addu t7, t2, t6
- subu t2, t2, t6
- addu t6, t3, t5
- subu t3, t3, t5
- addu t5, t1, t4
- subu t1, t1, t4
- shra_r.w s0, s0, 18
- shra_r.w t7, t7, 18
- shra_r.w t6, t6, 18
- shra_r.w t5, t5, 18
- shra_r.w t1, t1, 18
- shra_r.w t3, t3, 18
- shra_r.w t2, t2, 18
- shra_r.w t0, t0, 18
- andi s0, s0, 0x3ff
- andi t7, t7, 0x3ff
- andi t6, t6, 0x3ff
- andi t5, t5, 0x3ff
- andi t1, t1, 0x3ff
- andi t3, t3, 0x3ff
- andi t2, t2, 0x3ff
- andi t0, t0, 0x3ff
- lw s1, 0(a2)
- lbux s0, s0(a3)
- lbux t7, t7(a3)
- lbux t6, t6(a3)
- lbux t5, t5(a3)
- lbux t1, t1(a3)
- lbux t3, t3(a3)
- lbux t2, t2(a3)
- lbux t0, t0(a3)
- sb s0, 0(s1)
- sb t7, 1(s1)
- sb t6, 2(s1)
- sb t5, 3(s1)
- sb t1, 4(s1)
- sb t3, 5(s1)
- sb t2, 6(s1)
- sb t0, 7(s1)
- 6:
- addiu v0, v0, 32
- bgtz v1, 4b
- addiu a2, a2, 4
- addiu sp, sp, 256
- RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
- j ra
- nop
- END(jsimd_idct_islow_dspr2)
- /*****************************************************************************/
- LEAF_DSPR2(jsimd_idct_ifast_cols_dspr2)
- /*
- * a0 = inptr
- * a1 = quantptr
- * a2 = wsptr
- * a3 = mips_idct_ifast_coefs
- */
- SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
- addiu t9, a0, 16 /* end address */
- or AT, a3, zero
- 0:
- lw s0, 0(a1) /* quantptr[DCTSIZE*0] */
- lw t0, 0(a0) /* inptr[DCTSIZE*0] */
- lw t1, 16(a0) /* inptr[DCTSIZE*1] */
- muleq_s.w.phl v0, t0, s0 /* tmp0 ... */
- lw t2, 32(a0) /* inptr[DCTSIZE*2] */
- lw t3, 48(a0) /* inptr[DCTSIZE*3] */
- lw t4, 64(a0) /* inptr[DCTSIZE*4] */
- lw t5, 80(a0) /* inptr[DCTSIZE*5] */
- muleq_s.w.phr t0, t0, s0 /* ... tmp0 ... */
- lw t6, 96(a0) /* inptr[DCTSIZE*6] */
- lw t7, 112(a0) /* inptr[DCTSIZE*7] */
- or s4, t1, t2
- or s5, t3, t4
- bnez s4, 1f
- ins t0, v0, 16, 16 /* ... tmp0 */
- bnez s5, 1f
- or s6, t5, t6
- or s6, s6, t7
- bnez s6, 1f
- sw t0, 0(a2) /* wsptr[DCTSIZE*0] */
- sw t0, 16(a2) /* wsptr[DCTSIZE*1] */
- sw t0, 32(a2) /* wsptr[DCTSIZE*2] */
- sw t0, 48(a2) /* wsptr[DCTSIZE*3] */
- sw t0, 64(a2) /* wsptr[DCTSIZE*4] */
- sw t0, 80(a2) /* wsptr[DCTSIZE*5] */
- sw t0, 96(a2) /* wsptr[DCTSIZE*6] */
- sw t0, 112(a2) /* wsptr[DCTSIZE*7] */
- addiu a0, a0, 4
- b 2f
- addiu a1, a1, 4
- 1:
- lw s1, 32(a1) /* quantptr[DCTSIZE*2] */
- lw s2, 64(a1) /* quantptr[DCTSIZE*4] */
- muleq_s.w.phl v0, t2, s1 /* tmp1 ... */
- muleq_s.w.phr t2, t2, s1 /* ... tmp1 ... */
- lw s0, 16(a1) /* quantptr[DCTSIZE*1] */
- lw s1, 48(a1) /* quantptr[DCTSIZE*3] */
- lw s3, 96(a1) /* quantptr[DCTSIZE*6] */
- muleq_s.w.phl v1, t4, s2 /* tmp2 ... */
- muleq_s.w.phr t4, t4, s2 /* ... tmp2 ... */
- lw s2, 80(a1) /* quantptr[DCTSIZE*5] */
- lw t8, 4(AT) /* FIX(1.414213562) */
- ins t2, v0, 16, 16 /* ... tmp1 */
- muleq_s.w.phl v0, t6, s3 /* tmp3 ... */
- muleq_s.w.phr t6, t6, s3 /* ... tmp3 ... */
- ins t4, v1, 16, 16 /* ... tmp2 */
- addq.ph s4, t0, t4 /* tmp10 */
- subq.ph s5, t0, t4 /* tmp11 */
- ins t6, v0, 16, 16 /* ... tmp3 */
- subq.ph s6, t2, t6 /* tmp12 ... */
- addq.ph s7, t2, t6 /* tmp13 */
- mulq_s.ph s6, s6, t8 /* ... tmp12 ... */
- addq.ph t0, s4, s7 /* tmp0 */
- subq.ph t6, s4, s7 /* tmp3 */
- muleq_s.w.phl v0, t1, s0 /* tmp4 ... */
- muleq_s.w.phr t1, t1, s0 /* ... tmp4 ... */
- shll_s.ph s6, s6, 1 /* x2 */
- lw s3, 112(a1) /* quantptr[DCTSIZE*7] */
- subq.ph s6, s6, s7 /* ... tmp12 */
- muleq_s.w.phl v1, t7, s3 /* tmp7 ... */
- muleq_s.w.phr t7, t7, s3 /* ... tmp7 ... */
- ins t1, v0, 16, 16 /* ... tmp4 */
- addq.ph t2, s5, s6 /* tmp1 */
- subq.ph t4, s5, s6 /* tmp2 */
- muleq_s.w.phl v0, t5, s2 /* tmp6 ... */
- muleq_s.w.phr t5, t5, s2 /* ... tmp6 ... */
- ins t7, v1, 16, 16 /* ... tmp7 */
- addq.ph s5, t1, t7 /* z11 */
- subq.ph s6, t1, t7 /* z12 */
- muleq_s.w.phl v1, t3, s1 /* tmp5 ... */
- muleq_s.w.phr t3, t3, s1 /* ... tmp5 ... */
- ins t5, v0, 16, 16 /* ... tmp6 */
- ins t3, v1, 16, 16 /* ... tmp5 */
- addq.ph s7, t5, t3 /* z13 */
- subq.ph v0, t5, t3 /* z10 */
- addq.ph t7, s5, s7 /* tmp7 */
- subq.ph s5, s5, s7 /* tmp11 ... */
- addq.ph v1, v0, s6 /* z5 ... */
- mulq_s.ph s5, s5, t8 /* ... tmp11 */
- lw t8, 8(AT) /* FIX(1.847759065) */
- lw s4, 0(AT) /* FIX(1.082392200) */
- addq.ph s0, t0, t7
- subq.ph s1, t0, t7
- mulq_s.ph v1, v1, t8 /* ... z5 */
- shll_s.ph s5, s5, 1 /* x2 */
- lw t8, 12(AT) /* FIX(-2.613125930) */
- sw s0, 0(a2) /* wsptr[DCTSIZE*0] */
- shll_s.ph v0, v0, 1 /* x4 */
- mulq_s.ph v0, v0, t8 /* tmp12 ... */
- mulq_s.ph s4, s6, s4 /* tmp10 ... */
- shll_s.ph v1, v1, 1 /* x2 */
- addiu a0, a0, 4
- addiu a1, a1, 4
- sw s1, 112(a2) /* wsptr[DCTSIZE*7] */
- shll_s.ph s6, v0, 1 /* x4 */
- shll_s.ph s4, s4, 1 /* x2 */
- addq.ph s6, s6, v1 /* ... tmp12 */
- subq.ph t5, s6, t7 /* tmp6 */
- subq.ph s4, s4, v1 /* ... tmp10 */
- subq.ph t3, s5, t5 /* tmp5 */
- addq.ph s2, t2, t5
- addq.ph t1, s4, t3 /* tmp4 */
- subq.ph s3, t2, t5
- sw s2, 16(a2) /* wsptr[DCTSIZE*1] */
- sw s3, 96(a2) /* wsptr[DCTSIZE*6] */
- addq.ph v0, t4, t3
- subq.ph v1, t4, t3
- sw v0, 32(a2) /* wsptr[DCTSIZE*2] */
- sw v1, 80(a2) /* wsptr[DCTSIZE*5] */
- addq.ph v0, t6, t1
- subq.ph v1, t6, t1
- sw v0, 64(a2) /* wsptr[DCTSIZE*4] */
- sw v1, 48(a2) /* wsptr[DCTSIZE*3] */
- 2:
- bne a0, t9, 0b
- addiu a2, a2, 4
- RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
- j ra
- nop
- END(jsimd_idct_ifast_cols_dspr2)
- /*****************************************************************************/
- LEAF_DSPR2(jsimd_idct_ifast_rows_dspr2)
- /*
- * a0 = wsptr
- * a1 = output_buf
- * a2 = output_col
- * a3 = mips_idct_ifast_coefs
- */
- SAVE_REGS_ON_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, s8, a3
- addiu t9, a0, 128 /* end address */
- lui s8, 0x8080
- ori s8, s8, 0x8080
- 0:
- lw AT, 36(sp) /* restore $a3 (mips_idct_ifast_coefs) */
- lw t0, 0(a0) /* wsptr[DCTSIZE*0+0/1] b a */
- lw s0, 16(a0) /* wsptr[DCTSIZE*1+0/1] B A */
- lw t2, 4(a0) /* wsptr[DCTSIZE*0+2/3] d c */
- lw s2, 20(a0) /* wsptr[DCTSIZE*1+2/3] D C */
- lw t4, 8(a0) /* wsptr[DCTSIZE*0+4/5] f e */
- lw s4, 24(a0) /* wsptr[DCTSIZE*1+4/5] F E */
- lw t6, 12(a0) /* wsptr[DCTSIZE*0+6/7] h g */
- lw s6, 28(a0) /* wsptr[DCTSIZE*1+6/7] H G */
- precrq.ph.w t1, s0, t0 /* B b */
- ins t0, s0, 16, 16 /* A a */
- bnez t1, 1f
- or s0, t2, s2
- bnez s0, 1f
- or s0, t4, s4
- bnez s0, 1f
- or s0, t6, s6
- bnez s0, 1f
- shll_s.ph s0, t0, 2 /* A a */
- lw a3, 0(a1)
- lw AT, 4(a1)
- precrq.ph.w t0, s0, s0 /* A A */
- ins s0, s0, 16, 16 /* a a */
- addu a3, a3, a2
- addu AT, AT, a2
- precrq.qb.ph t0, t0, t0 /* A A A A */
- precrq.qb.ph s0, s0, s0 /* a a a a */
- addu.qb s0, s0, s8
- addu.qb t0, t0, s8
- sw s0, 0(a3)
- sw s0, 4(a3)
- sw t0, 0(AT)
- sw t0, 4(AT)
- addiu a0, a0, 32
- bne a0, t9, 0b
- addiu a1, a1, 8
- b 2f
- nop
- 1:
- precrq.ph.w t3, s2, t2
- ins t2, s2, 16, 16
- precrq.ph.w t5, s4, t4
- ins t4, s4, 16, 16
- precrq.ph.w t7, s6, t6
- ins t6, s6, 16, 16
- lw t8, 4(AT) /* FIX(1.414213562) */
- addq.ph s4, t0, t4 /* tmp10 */
- subq.ph s5, t0, t4 /* tmp11 */
- subq.ph s6, t2, t6 /* tmp12 ... */
- addq.ph s7, t2, t6 /* tmp13 */
- mulq_s.ph s6, s6, t8 /* ... tmp12 ... */
- addq.ph t0, s4, s7 /* tmp0 */
- subq.ph t6, s4, s7 /* tmp3 */
- shll_s.ph s6, s6, 1 /* x2 */
- subq.ph s6, s6, s7 /* ... tmp12 */
- addq.ph t2, s5, s6 /* tmp1 */
- subq.ph t4, s5, s6 /* tmp2 */
- addq.ph s5, t1, t7 /* z11 */
- subq.ph s6, t1, t7 /* z12 */
- addq.ph s7, t5, t3 /* z13 */
- subq.ph v0, t5, t3 /* z10 */
- addq.ph t7, s5, s7 /* tmp7 */
- subq.ph s5, s5, s7 /* tmp11 ... */
- addq.ph v1, v0, s6 /* z5 ... */
- mulq_s.ph s5, s5, t8 /* ... tmp11 */
- lw t8, 8(AT) /* FIX(1.847759065) */
- lw s4, 0(AT) /* FIX(1.082392200) */
- addq.ph s0, t0, t7 /* tmp0 + tmp7 */
- subq.ph s7, t0, t7 /* tmp0 - tmp7 */
- mulq_s.ph v1, v1, t8 /* ... z5 */
- lw a3, 0(a1)
- lw t8, 12(AT) /* FIX(-2.613125930) */
- shll_s.ph s5, s5, 1 /* x2 */
- addu a3, a3, a2
- shll_s.ph v0, v0, 1 /* x4 */
- mulq_s.ph v0, v0, t8 /* tmp12 ... */
- mulq_s.ph s4, s6, s4 /* tmp10 ... */
- shll_s.ph v1, v1, 1 /* x2 */
- addiu a0, a0, 32
- addiu a1, a1, 8
- shll_s.ph s6, v0, 1 /* x4 */
- shll_s.ph s4, s4, 1 /* x2 */
- addq.ph s6, s6, v1 /* ... tmp12 */
- shll_s.ph s0, s0, 2
- subq.ph t5, s6, t7 /* tmp6 */
- subq.ph s4, s4, v1 /* ... tmp10 */
- subq.ph t3, s5, t5 /* tmp5 */
- shll_s.ph s7, s7, 2
- addq.ph t1, s4, t3 /* tmp4 */
- addq.ph s1, t2, t5 /* tmp1 + tmp6 */
- subq.ph s6, t2, t5 /* tmp1 - tmp6 */
- addq.ph s2, t4, t3 /* tmp2 + tmp5 */
- subq.ph s5, t4, t3 /* tmp2 - tmp5 */
- addq.ph s4, t6, t1 /* tmp3 + tmp4 */
- subq.ph s3, t6, t1 /* tmp3 - tmp4 */
- shll_s.ph s1, s1, 2
- shll_s.ph s2, s2, 2
- shll_s.ph s3, s3, 2
- shll_s.ph s4, s4, 2
- shll_s.ph s5, s5, 2
- shll_s.ph s6, s6, 2
- precrq.ph.w t0, s1, s0 /* B A */
- ins s0, s1, 16, 16 /* b a */
- precrq.ph.w t2, s3, s2 /* D C */
- ins s2, s3, 16, 16 /* d c */
- precrq.ph.w t4, s5, s4 /* F E */
- ins s4, s5, 16, 16 /* f e */
- precrq.ph.w t6, s7, s6 /* H G */
- ins s6, s7, 16, 16 /* h g */
- precrq.qb.ph t0, t2, t0 /* D C B A */
- precrq.qb.ph s0, s2, s0 /* d c b a */
- precrq.qb.ph t4, t6, t4 /* H G F E */
- precrq.qb.ph s4, s6, s4 /* h g f e */
- addu.qb s0, s0, s8
- addu.qb s4, s4, s8
- sw s0, 0(a3) /* outptr[0/1/2/3] d c b a */
- sw s4, 4(a3) /* outptr[4/5/6/7] h g f e */
- lw a3, -4(a1)
- addu.qb t0, t0, s8
- addu a3, a3, a2
- addu.qb t4, t4, s8
- sw t0, 0(a3) /* outptr[0/1/2/3] D C B A */
- bne a0, t9, 0b
- sw t4, 4(a3) /* outptr[4/5/6/7] H G F E */
- 2:
- RESTORE_REGS_FROM_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, s8, a3
- j ra
- nop
- END(jsimd_idct_ifast_rows_dspr2)
- /*****************************************************************************/
- LEAF_DSPR2(jsimd_fdct_islow_dspr2)
- /*
- * a0 = data
- */
- SAVE_REGS_ON_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, s8
- lui t0, 6437
- ori t0, 2260
- lui t1, 9633
- ori t1, 11363
- lui t2, 0xd39e
- ori t2, 0xe6dc
- lui t3, 0xf72d
- ori t3, 9633
- lui t4, 2261
- ori t4, 9633
- lui t5, 0xd39e
- ori t5, 6437
- lui t6, 9633
- ori t6, 0xd39d
- lui t7, 0xe6dc
- ori t7, 2260
- lui t8, 4433
- ori t8, 10703
- lui t9, 0xd630
- ori t9, 4433
- li s8, 8
- move a1, a0
- 1:
- lw s0, 0(a1) /* tmp0 = 1|0 */
- lw s1, 4(a1) /* tmp1 = 3|2 */
- lw s2, 8(a1) /* tmp2 = 5|4 */
- lw s3, 12(a1) /* tmp3 = 7|6 */
- packrl.ph s1, s1, s1 /* tmp1 = 2|3 */
- packrl.ph s3, s3, s3 /* tmp3 = 6|7 */
- subq.ph s7, s1, s2 /* tmp7 = 2-5|3-4 = t5|t4 */
- subq.ph s5, s0, s3 /* tmp5 = 1-6|0-7 = t6|t7 */
- mult $0, $0 /* ac0 = 0 */
- dpa.w.ph $ac0, s7, t0 /* ac0 += t5* 6437 + t4* 2260 */
- dpa.w.ph $ac0, s5, t1 /* ac0 += t6* 9633 + t7* 11363 */
- mult $ac1, $0, $0 /* ac1 = 0 */
- dpa.w.ph $ac1, s7, t2 /* ac1 += t5*-11362 + t4* -6436 */
- dpa.w.ph $ac1, s5, t3 /* ac1 += t6* -2259 + t7* 9633 */
- mult $ac2, $0, $0 /* ac2 = 0 */
- dpa.w.ph $ac2, s7, t4 /* ac2 += t5* 2261 + t4* 9633 */
- dpa.w.ph $ac2, s5, t5 /* ac2 += t6*-11362 + t7* 6437 */
- mult $ac3, $0, $0 /* ac3 = 0 */
- dpa.w.ph $ac3, s7, t6 /* ac3 += t5* 9633 + t4*-11363 */
- dpa.w.ph $ac3, s5, t7 /* ac3 += t6* -6436 + t7* 2260 */
- addq.ph s6, s1, s2 /* tmp6 = 2+5|3+4 = t2|t3 */
- addq.ph s4, s0, s3 /* tmp4 = 1+6|0+7 = t1|t0 */
- extr_r.w s0, $ac0, 11 /* tmp0 = (ac0 + 1024) >> 11 */
- extr_r.w s1, $ac1, 11 /* tmp1 = (ac1 + 1024) >> 11 */
- extr_r.w s2, $ac2, 11 /* tmp2 = (ac2 + 1024) >> 11 */
- extr_r.w s3, $ac3, 11 /* tmp3 = (ac3 + 1024) >> 11 */
- addq.ph s5, s4, s6 /* tmp5 = t1+t2|t0+t3 = t11|t10 */
- subq.ph s7, s4, s6 /* tmp7 = t1-t2|t0-t3 = t12|t13 */
- sh s0, 2(a1)
- sh s1, 6(a1)
- sh s2, 10(a1)
- sh s3, 14(a1)
- mult $0, $0 /* ac0 = 0 */
- dpa.w.ph $ac0, s7, t8 /* ac0 += t12* 4433 + t13* 10703 */
- mult $ac1, $0, $0 /* ac1 = 0 */
- dpa.w.ph $ac1, s7, t9 /* ac1 += t12*-10704 + t13* 4433 */
- sra s4, s5, 16 /* tmp4 = t11 */
- addiu a1, a1, 16
- addiu s8, s8, -1
- extr_r.w s0, $ac0, 11 /* tmp0 = (ac0 + 1024) >> 11 */
- extr_r.w s1, $ac1, 11 /* tmp1 = (ac1 + 1024) >> 11 */
- addu s2, s5, s4 /* tmp2 = t10 + t11 */
- subu s3, s5, s4 /* tmp3 = t10 - t11 */
- sll s2, s2, 2 /* tmp2 = (t10 + t11) << 2 */
- sll s3, s3, 2 /* tmp3 = (t10 - t11) << 2 */
- sh s2, -16(a1)
- sh s3, -8(a1)
- sh s0, -12(a1)
- bgtz s8, 1b
- sh s1, -4(a1)
- li t0, 2260
- li t1, 11363
- li t2, 9633
- li t3, 6436
- li t4, 6437
- li t5, 2261
- li t6, 11362
- li t7, 2259
- li t8, 4433
- li t9, 10703
- li a1, 10704
- li s8, 8
- 2:
- lh a2, 0(a0) /* 0 */
- lh a3, 16(a0) /* 8 */
- lh v0, 32(a0) /* 16 */
- lh v1, 48(a0) /* 24 */
- lh s4, 64(a0) /* 32 */
- lh s5, 80(a0) /* 40 */
- lh s6, 96(a0) /* 48 */
- lh s7, 112(a0) /* 56 */
- addu s2, v0, s5 /* tmp2 = 16 + 40 */
- subu s5, v0, s5 /* tmp5 = 16 - 40 */
- addu s3, v1, s4 /* tmp3 = 24 + 32 */
- subu s4, v1, s4 /* tmp4 = 24 - 32 */
- addu s0, a2, s7 /* tmp0 = 0 + 56 */
- subu s7, a2, s7 /* tmp7 = 0 - 56 */
- addu s1, a3, s6 /* tmp1 = 8 + 48 */
- subu s6, a3, s6 /* tmp6 = 8 - 48 */
- addu a2, s0, s3 /* tmp10 = tmp0 + tmp3 */
- subu v1, s0, s3 /* tmp13 = tmp0 - tmp3 */
- addu a3, s1, s2 /* tmp11 = tmp1 + tmp2 */
- subu v0, s1, s2 /* tmp12 = tmp1 - tmp2 */
- mult s7, t1 /* ac0 = tmp7 * c1 */
- madd s4, t0 /* ac0 += tmp4 * c0 */
- madd s5, t4 /* ac0 += tmp5 * c4 */
- madd s6, t2 /* ac0 += tmp6 * c2 */
- mult $ac1, s7, t2 /* ac1 = tmp7 * c2 */
- msub $ac1, s4, t3 /* ac1 -= tmp4 * c3 */
- msub $ac1, s5, t6 /* ac1 -= tmp5 * c6 */
- msub $ac1, s6, t7 /* ac1 -= tmp6 * c7 */
- mult $ac2, s7, t4 /* ac2 = tmp7 * c4 */
- madd $ac2, s4, t2 /* ac2 += tmp4 * c2 */
- madd $ac2, s5, t5 /* ac2 += tmp5 * c5 */
- msub $ac2, s6, t6 /* ac2 -= tmp6 * c6 */
- mult $ac3, s7, t0 /* ac3 = tmp7 * c0 */
- msub $ac3, s4, t1 /* ac3 -= tmp4 * c1 */
- madd $ac3, s5, t2 /* ac3 += tmp5 * c2 */
- msub $ac3, s6, t3 /* ac3 -= tmp6 * c3 */
- extr_r.w s0, $ac0, 15 /* tmp0 = (ac0 + 16384) >> 15 */
- extr_r.w s1, $ac1, 15 /* tmp1 = (ac1 + 16384) >> 15 */
- extr_r.w s2, $ac2, 15 /* tmp2 = (ac2 + 16384) >> 15 */
- extr_r.w s3, $ac3, 15 /* tmp3 = (ac3 + 16384) >> 15 */
- addiu s8, s8, -1
- addu s4, a2, a3 /* tmp4 = tmp10 + tmp11 */
- subu s5, a2, a3 /* tmp5 = tmp10 - tmp11 */
- sh s0, 16(a0)
- sh s1, 48(a0)
- sh s2, 80(a0)
- sh s3, 112(a0)
- mult v0, t8 /* ac0 = tmp12 * c8 */
- madd v1, t9 /* ac0 += tmp13 * c9 */
- mult $ac1, v1, t8 /* ac1 = tmp13 * c8 */
- msub $ac1, v0, a1 /* ac1 -= tmp12 * c10 */
- addiu a0, a0, 2
- extr_r.w s6, $ac0, 15 /* tmp6 = (ac0 + 16384) >> 15 */
- extr_r.w s7, $ac1, 15 /* tmp7 = (ac1 + 16384) >> 15 */
- shra_r.w s4, s4, 2 /* tmp4 = (tmp4 + 2) >> 2 */
- shra_r.w s5, s5, 2 /* tmp5 = (tmp5 + 2) >> 2 */
- sh s4, -2(a0)
- sh s5, 62(a0)
- sh s6, 30(a0)
- bgtz s8, 2b
- sh s7, 94(a0)
- RESTORE_REGS_FROM_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, s8
- jr ra
- nop
- END(jsimd_fdct_islow_dspr2)
- /**************************************************************************/
- LEAF_DSPR2(jsimd_fdct_ifast_dspr2)
- /*
- * a0 = data
- */
- .set at
- SAVE_REGS_ON_STACK 8, s0, s1
- li a1, 0x014e014e /* FIX_1_306562965 (334 << 16) |
- (334 & 0xffff) */
- li a2, 0x008b008b /* FIX_0_541196100 (139 << 16) |
- (139 & 0xffff) */
- li a3, 0x00620062 /* FIX_0_382683433 (98 << 16) |
- (98 & 0xffff) */
- li s1, 0x00b500b5 /* FIX_0_707106781 (181 << 16) |
- (181 & 0xffff) */
- move v0, a0
- addiu v1, v0, 128 /* end address */
- 0:
- lw t0, 0(v0) /* tmp0 = 1|0 */
- lw t1, 4(v0) /* tmp1 = 3|2 */
- lw t2, 8(v0) /* tmp2 = 5|4 */
- lw t3, 12(v0) /* tmp3 = 7|6 */
- packrl.ph t1, t1, t1 /* tmp1 = 2|3 */
- packrl.ph t3, t3, t3 /* tmp3 = 6|7 */
- subq.ph t7, t1, t2 /* tmp7 = 2-5|3-4 = t5|t4 */
- subq.ph t5, t0, t3 /* tmp5 = 1-6|0-7 = t6|t7 */
- addq.ph t6, t1, t2 /* tmp6 = 2+5|3+4 = t2|t3 */
- addq.ph t4, t0, t3 /* tmp4 = 1+6|0+7 = t1|t0 */
- addq.ph t8, t4, t6 /* tmp5 = t1+t2|t0+t3 = t11|t10 */
- subq.ph t9, t4, t6 /* tmp7 = t1-t2|t0-t3 = t12|t13 */
- sra t4, t8, 16 /* tmp4 = t11 */
- mult $0, $0 /* ac0 = 0 */
- dpa.w.ph $ac0, t9, s1
- mult $ac1, $0, $0 /* ac1 = 0 */
- dpa.w.ph $ac1, t7, a3 /* ac1 += t4*98 + t5*98 */
- dpsx.w.ph $ac1, t5, a3 /* ac1 += t6*98 + t7*98 */
- mult $ac2, $0, $0 /* ac2 = 0 */
- dpa.w.ph $ac2, t7, a2 /* ac2 += t4*139 + t5*139 */
- mult $ac3, $0, $0 /* ac3 = 0 */
- dpa.w.ph $ac3, t5, a1 /* ac3 += t6*334 + t7*334 */
- precrq.ph.w t0, t5, t7 /* t0 = t5|t6 */
- addq.ph t2, t8, t4 /* tmp2 = t10 + t11 */
- subq.ph t3, t8, t4 /* tmp3 = t10 - t11 */
- extr.w t4, $ac0, 8
- mult $0, $0 /* ac0 = 0 */
- dpa.w.ph $ac0, t0, s1 /* ac0 += t5*181 + t6*181 */
- extr.w t0, $ac1, 8 /* t0 = z5 */
- extr.w t1, $ac2, 8 /* t1 = MULTIPLY(tmp10, 139) */
- extr.w t7, $ac3, 8 /* t2 = MULTIPLY(tmp12, 334) */
- extr.w t8, $ac0, 8 /* t8 = z3 = MULTIPLY(tmp11, 181) */
- add t6, t1, t0 /* t6 = z2 */
- add t7, t7, t0 /* t7 = z4 */
- subq.ph t0, t5, t8 /* t0 = z13 = tmp7 - z3 */
- addq.ph t8, t5, t8 /* t9 = z11 = tmp7 + z3 */
- addq.ph t1, t0, t6 /* t1 = z13 + z2 */
- subq.ph t6, t0, t6 /* t6 = z13 - z2 */
- addq.ph t0, t8, t7 /* t0 = z11 + z4 */
- subq.ph t7, t8, t7 /* t7 = z11 - z4 */
- addq.ph t5, t4, t9
- subq.ph t4, t9, t4
- sh t2, 0(v0)
- sh t5, 4(v0)
- sh t3, 8(v0)
- sh t4, 12(v0)
- sh t1, 10(v0)
- sh t6, 6(v0)
- sh t0, 2(v0)
- sh t7, 14(v0)
- addiu v0, 16
- bne v1, v0, 0b
- nop
- move v0, a0
- addiu v1, v0, 16
- 1:
- lh t0, 0(v0) /* 0 */
- lh t1, 16(v0) /* 8 */
- lh t2, 32(v0) /* 16 */
- lh t3, 48(v0) /* 24 */
- lh t4, 64(v0) /* 32 */
- lh t5, 80(v0) /* 40 */
- lh t6, 96(v0) /* 48 */
- lh t7, 112(v0) /* 56 */
- add t8, t0, t7 /* t8 = tmp0 */
- sub t7, t0, t7 /* t7 = tmp7 */
- add t0, t1, t6 /* t0 = tmp1 */
- sub t1, t1, t6 /* t1 = tmp6 */
- add t6, t2, t5 /* t6 = tmp2 */
- sub t5, t2, t5 /* t5 = tmp5 */
- add t2, t3, t4 /* t2 = tmp3 */
- sub t3, t3, t4 /* t3 = tmp4 */
- add t4, t8, t2 /* t4 = tmp10 = tmp0 + tmp3 */
- sub t8, t8, t2 /* t8 = tmp13 = tmp0 - tmp3 */
- sub s0, t0, t6 /* s0 = tmp12 = tmp1 - tmp2 */
- ins t8, s0, 16, 16 /* t8 = tmp12|tmp13 */
- add t2, t0, t6 /* t2 = tmp11 = tmp1 + tmp2 */
- mult $0, $0 /* ac0 = 0 */
- dpa.w.ph $ac0, t8, s1 /* ac0 += t12*181 + t13*181 */
- add s0, t4, t2 /* t8 = tmp10+tmp11 */
- sub t4, t4, t2 /* t4 = tmp10-tmp11 */
- sh s0, 0(v0)
- sh t4, 64(v0)
- extr.w t2, $ac0, 8 /* z1 = MULTIPLY(tmp12+tmp13,
- FIX_0_707106781) */
- addq.ph t4, t8, t2 /* t9 = tmp13 + z1 */
- subq.ph t8, t8, t2 /* t2 = tmp13 - z1 */
- sh t4, 32(v0)
- sh t8, 96(v0)
- add t3, t3, t5 /* t3 = tmp10 = tmp4 + tmp5 */
- add t0, t5, t1 /* t0 = tmp11 = tmp5 + tmp6 */
- add t1, t1, t7 /* t1 = tmp12 = tmp6 + tmp7 */
- andi t4, a1, 0xffff
- mul s0, t1, t4
- sra s0, s0, 8 /* s0 = z4 =
- MULTIPLY(tmp12, FIX_1_306562965) */
- ins t1, t3, 16, 16 /* t1 = tmp10|tmp12 */
- mult $0, $0 /* ac0 = 0 */
- mulsa.w.ph $ac0, t1, a3 /* ac0 += t10*98 - t12*98 */
- extr.w t8, $ac0, 8 /* z5 = MULTIPLY(tmp10-tmp12,
- FIX_0_382683433) */
- add t2, t7, t8 /* t2 = tmp7 + z5 */
- sub t7, t7, t8 /* t7 = tmp7 - z5 */
- andi t4, a2, 0xffff
- mul t8, t3, t4
- sra t8, t8, 8 /* t8 = z2 =
- MULTIPLY(tmp10, FIX_0_541196100) */
- andi t4, s1, 0xffff
- mul t6, t0, t4
- sra t6, t6, 8 /* t6 = z3 =
- MULTIPLY(tmp11, FIX_0_707106781) */
- add t0, t6, t8 /* t0 = z3 + z2 */
- sub t1, t6, t8 /* t1 = z3 - z2 */
- add t3, t6, s0 /* t3 = z3 + z4 */
- sub t4, t6, s0 /* t4 = z3 - z4 */
- sub t5, t2, t1 /* t5 = dataptr[5] */
- sub t6, t7, t0 /* t6 = dataptr[3] */
- add t3, t2, t3 /* t3 = dataptr[1] */
- add t4, t7, t4 /* t4 = dataptr[7] */
- sh t5, 80(v0)
- sh t6, 48(v0)
- sh t3, 16(v0)
- sh t4, 112(v0)
- addiu v0, 2
- bne v0, v1, 1b
- nop
- RESTORE_REGS_FROM_STACK 8, s0, s1
- j ra
- nop
- END(jsimd_fdct_ifast_dspr2)
- /*****************************************************************************/
- LEAF_DSPR2(jsimd_quantize_dspr2)
- /*
- * a0 = coef_block
- * a1 = divisors
- * a2 = workspace
- */
- .set at
- SAVE_REGS_ON_STACK 16, s0, s1, s2
- addiu v0, a2, 124 /* v0 = workspace_end */
- lh t0, 0(a2)
- lh t1, 0(a1)
- lh t2, 128(a1)
- sra t3, t0, 15
- sll t3, t3, 1
- addiu t3, t3, 1
- mul t0, t0, t3
- lh t4, 384(a1)
- lh t5, 130(a1)
- lh t6, 2(a2)
- lh t7, 2(a1)
- lh t8, 386(a1)
- 1:
- andi t1, 0xffff
- add t9, t0, t2
- andi t9, 0xffff
- mul v1, t9, t1
- sra s0, t6, 15
- sll s0, s0, 1
- addiu s0, s0, 1
- addiu t9, t4, 16
- srav v1, v1, t9
- mul v1, v1, t3
- mul t6, t6, s0
- andi t7, 0xffff
- addiu a2, a2, 4
- addiu a1, a1, 4
- add s1, t6, t5
- andi s1, 0xffff
- sh v1, 0(a0)
- mul s2, s1, t7
- addiu s1, t8, 16
- srav s2, s2, s1
- mul s2, s2, s0
- lh t0, 0(a2)
- lh t1, 0(a1)
- sra t3, t0, 15
- sll t3, t3, 1
- addiu t3, t3, 1
- mul t0, t0, t3
- lh t2, 128(a1)
- lh t4, 384(a1)
- lh t5, 130(a1)
- lh t8, 386(a1)
- lh t6, 2(a2)
- lh t7, 2(a1)
- sh s2, 2(a0)
- lh t0, 0(a2)
- sra t3, t0, 15
- sll t3, t3, 1
- addiu t3, t3, 1
- mul t0, t0, t3
- bne a2, v0, 1b
- addiu a0, a0, 4
- andi t1, 0xffff
- add t9, t0, t2
- andi t9, 0xffff
- mul v1, t9, t1
- sra s0, t6, 15
- sll s0, s0, 1
- addiu s0, s0, 1
- addiu t9, t4, 16
- srav v1, v1, t9
- mul v1, v1, t3
- mul t6, t6, s0
- andi t7, 0xffff
- sh v1, 0(a0)
- add s1, t6, t5
- andi s1, 0xffff
- mul s2, s1, t7
- addiu s1, t8, 16
- addiu a2, a2, 4
- addiu a1, a1, 4
- srav s2, s2, s1
- mul s2, s2, s0
- sh s2, 2(a0)
- RESTORE_REGS_FROM_STACK 16, s0, s1, s2
- j ra
- nop
- END(jsimd_quantize_dspr2)
- #ifndef __mips_soft_float
- /*****************************************************************************/
- LEAF_DSPR2(jsimd_quantize_float_dspr2)
- /*
- * a0 = coef_block
- * a1 = divisors
- * a2 = workspace
- */
- .set at
- li t1, 0x46800100 /* integer representation 16384.5 */
- mtc1 t1, f0
- li t0, 63
- 0:
- lwc1 f2, 0(a2)
- lwc1 f10, 0(a1)
- lwc1 f4, 4(a2)
- lwc1 f12, 4(a1)
- lwc1 f6, 8(a2)
- lwc1 f14, 8(a1)
- lwc1 f8, 12(a2)
- lwc1 f16, 12(a1)
- madd.s f2, f0, f2, f10
- madd.s f4, f0, f4, f12
- madd.s f6, f0, f6, f14
- madd.s f8, f0, f8, f16
- lwc1 f10, 16(a1)
- lwc1 f12, 20(a1)
- trunc.w.s f2, f2
- trunc.w.s f4, f4
- trunc.w.s f6, f6
- trunc.w.s f8, f8
- lwc1 f14, 24(a1)
- lwc1 f16, 28(a1)
- mfc1 t1, f2
- mfc1 t2, f4
- mfc1 t3, f6
- mfc1 t4, f8
- lwc1 f2, 16(a2)
- lwc1 f4, 20(a2)
- lwc1 f6, 24(a2)
- lwc1 f8, 28(a2)
- madd.s f2, f0, f2, f10
- madd.s f4, f0, f4, f12
- madd.s f6, f0, f6, f14
- madd.s f8, f0, f8, f16
- addiu t1, t1, -16384
- addiu t2, t2, -16384
- addiu t3, t3, -16384
- addiu t4, t4, -16384
- trunc.w.s f2, f2
- trunc.w.s f4, f4
- trunc.w.s f6, f6
- trunc.w.s f8, f8
- sh t1, 0(a0)
- sh t2, 2(a0)
- sh t3, 4(a0)
- sh t4, 6(a0)
- mfc1 t1, f2
- mfc1 t2, f4
- mfc1 t3, f6
- mfc1 t4, f8
- addiu t0, t0, -8
- addiu a2, a2, 32
- addiu a1, a1, 32
- addiu t1, t1, -16384
- addiu t2, t2, -16384
- addiu t3, t3, -16384
- addiu t4, t4, -16384
- sh t1, 8(a0)
- sh t2, 10(a0)
- sh t3, 12(a0)
- sh t4, 14(a0)
- bgez t0, 0b
- addiu a0, a0, 16
- j ra
- nop
- END(jsimd_quantize_float_dspr2)
- #endif
- /*****************************************************************************/
- LEAF_DSPR2(jsimd_idct_2x2_dspr2)
- /*
- * a0 = compptr->dct_table
- * a1 = coef_block
- * a2 = output_buf
- * a3 = output_col
- */
- .set at
- SAVE_REGS_ON_STACK 24, s0, s1, s2, s3, s4, s5
- addiu sp, sp, -40
- move v0, sp
- addiu s2, zero, 29692
- addiu s3, zero, -10426
- addiu s4, zero, 6967
- addiu s5, zero, -5906
- lh t0, 0(a1) /* t0 = inptr[DCTSIZE*0] */
- lh t5, 0(a0) /* t5 = quantptr[DCTSIZE*0] */
- lh t1, 48(a1) /* t1 = inptr[DCTSIZE*3] */
- lh t6, 48(a0) /* t6 = quantptr[DCTSIZE*3] */
- mul t4, t5, t0
- lh t0, 16(a1) /* t0 = inptr[DCTSIZE*1] */
- lh t5, 16(a0) /* t5 = quantptr[DCTSIZE*1] */
- mul t6, t6, t1
- mul t5, t5, t0
- lh t2, 80(a1) /* t2 = inptr[DCTSIZE*5] */
- lh t7, 80(a0) /* t7 = quantptr[DCTSIZE*5] */
- lh t3, 112(a1) /* t3 = inptr[DCTSIZE*7] */
- lh t8, 112(a0) /* t8 = quantptr[DCTSIZE*7] */
- mul t7, t7, t2
- mult zero, zero
- mul t8, t8, t3
- li s0, 0x73FCD746 /* s0 = (29692 << 16) | (-10426 & 0xffff) */
- li s1, 0x1B37E8EE /* s1 = (6967 << 16) | (-5906 & 0xffff) */
- ins t6, t5, 16, 16 /* t6 = t5|t6 */
- sll t4, t4, 15
- dpa.w.ph $ac0, t6, s0
- lh t1, 2(a1)
- lh t6, 2(a0)
- ins t8, t7, 16, 16 /* t8 = t7|t8 */
- dpa.w.ph $ac0, t8, s1
- mflo t0, $ac0
- mul t5, t6, t1
- lh t1, 18(a1)
- lh t6, 18(a0)
- lh t2, 50(a1)
- lh t7, 50(a0)
- mul t6, t6, t1
- subu t8, t4, t0
- mul t7, t7, t2
- addu t0, t4, t0
- shra_r.w t0, t0, 13
- lh t1, 82(a1)
- lh t2, 82(a0)
- lh t3, 114(a1)
- lh t4, 114(a0)
- shra_r.w t8, t8, 13
- mul t1, t1, t2
- mul t3, t3, t4
- sw t0, 0(v0)
- sw t8, 20(v0)
- sll t4, t5, 15
- ins t7, t6, 16, 16
- mult zero, zero
- dpa.w.ph $ac0, t7, s0
- ins t3, t1, 16, 16
- lh t1, 6(a1)
- lh t6, 6(a0)
- dpa.w.ph $ac0, t3, s1
- mflo t0, $ac0
- mul t5, t6, t1
- lh t1, 22(a1)
- lh t6, 22(a0)
- lh t2, 54(a1)
- lh t7, 54(a0)
- mul t6, t6, t1
- subu t8, t4, t0
- mul t7, t7, t2
- addu t0, t4, t0
- shra_r.w t0, t0, 13
- lh t1, 86(a1)
- lh t2, 86(a0)
- lh t3, 118(a1)
- lh t4, 118(a0)
- shra_r.w t8, t8, 13
- mul t1, t1, t2
- mul t3, t3, t4
- sw t0, 4(v0)
- sw t8, 24(v0)
- sll t4, t5, 15
- ins t7, t6, 16, 16
- mult zero, zero
- dpa.w.ph $ac0, t7, s0
- ins t3, t1, 16, 16
- lh t1, 10(a1)
- lh t6, 10(a0)
- dpa.w.ph $ac0, t3, s1
- mflo t0, $ac0
- mul t5, t6, t1
- lh t1, 26(a1)
- lh t6, 26(a0)
- lh t2, 58(a1)
- lh t7, 58(a0)
- mul t6, t6, t1
- subu t8, t4, t0
- mul t7, t7, t2
- addu t0, t4, t0
- shra_r.w t0, t0, 13
- lh t1, 90(a1)
- lh t2, 90(a0)
- lh t3, 122(a1)
- lh t4, 122(a0)
- shra_r.w t8, t8, 13
- mul t1, t1, t2
- mul t3, t3, t4
- sw t0, 8(v0)
- sw t8, 28(v0)
- sll t4, t5, 15
- ins t7, t6, 16, 16
- mult zero, zero
- dpa.w.ph $ac0, t7, s0
- ins t3, t1, 16, 16
- lh t1, 14(a1)
- lh t6, 14(a0)
- dpa.w.ph $ac0, t3, s1
- mflo t0, $ac0
- mul t5, t6, t1
- lh t1, 30(a1)
- lh t6, 30(a0)
- lh t2, 62(a1)
- lh t7, 62(a0)
- mul t6, t6, t1
- subu t8, t4, t0
- mul t7, t7, t2
- addu t0, t4, t0
- shra_r.w t0, t0, 13
- lh t1, 94(a1)
- lh t2, 94(a0)
- lh t3, 126(a1)
- lh t4, 126(a0)
- shra_r.w t8, t8, 13
- mul t1, t1, t2
- mul t3, t3, t4
- sw t0, 12(v0)
- sw t8, 32(v0)
- sll t4, t5, 15
- ins t7, t6, 16, 16
- mult zero, zero
- dpa.w.ph $ac0, t7, s0
- ins t3, t1, 16, 16
- dpa.w.ph $ac0, t3, s1
- mflo t0, $ac0
- lw t9, 0(a2)
- lw t3, 0(v0)
- lw t7, 4(v0)
- lw t1, 8(v0)
- addu t9, t9, a3
- sll t3, t3, 15
- subu t8, t4, t0
- addu t0, t4, t0
- shra_r.w t0, t0, 13
- shra_r.w t8, t8, 13
- sw t0, 16(v0)
- sw t8, 36(v0)
- lw t5, 12(v0)
- lw t6, 16(v0)
- mult t7, s2
- madd t1, s3
- madd t5, s4
- madd t6, s5
- lw t5, 24(v0)
- lw t7, 28(v0)
- mflo t0, $ac0
- lw t8, 32(v0)
- lw t2, 36(v0)
- mult $ac1, t5, s2
- madd $ac1, t7, s3
- madd $ac1, t8, s4
- madd $ac1, t2, s5
- addu t1, t3, t0
- subu t6, t3, t0
- shra_r.w t1, t1, 20
- shra_r.w t6, t6, 20
- mflo t4, $ac1
- shll_s.w t1, t1, 24
- shll_s.w t6, t6, 24
- sra t1, t1, 24
- sra t6, t6, 24
- addiu t1, t1, 128
- addiu t6, t6, 128
- lw t0, 20(v0)
- sb t1, 0(t9)
- sb t6, 1(t9)
- sll t0, t0, 15
- lw t9, 4(a2)
- addu t1, t0, t4
- subu t6, t0, t4
- addu t9, t9, a3
- shra_r.w t1, t1, 20
- shra_r.w t6, t6, 20
- shll_s.w t1, t1, 24
- shll_s.w t6, t6, 24
- sra t1, t1, 24
- sra t6, t6, 24
- addiu t1, t1, 128
- addiu t6, t6, 128
- sb t1, 0(t9)
- sb t6, 1(t9)
- addiu sp, sp, 40
- RESTORE_REGS_FROM_STACK 24, s0, s1, s2, s3, s4, s5
- j ra
- nop
- END(jsimd_idct_2x2_dspr2)
- /*****************************************************************************/
- LEAF_DSPR2(jsimd_idct_4x4_dspr2)
- /*
- * a0 = compptr->dct_table
- * a1 = coef_block
- * a2 = output_buf
- * a3 = output_col
- * 16(sp) = workspace[DCTSIZE*4] (buffers data between passes)
- */
- .set at
- SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
- lw v1, 48(sp)
- move t0, a1
- move t1, v1
- li t9, 4
- li s0, 0x2e75f93e
- li s1, 0x21f9ba79
- li s2, 0xecc2efb0
- li s3, 0x52031ccd
- 0:
- lh s6, 32(t0) /* inptr[DCTSIZE*2] */
- lh t6, 32(a0) /* quantptr[DCTSIZE*2] */
- lh s7, 96(t0) /* inptr[DCTSIZE*6] */
- lh t7, 96(a0) /* quantptr[DCTSIZE*6] */
- mul t6, s6, t6 /* z2 = (inptr[DCTSIZE*2] *
- quantptr[DCTSIZE*2]) */
- lh s4, 0(t0) /* inptr[DCTSIZE*0] */
- mul t7, s7, t7 /* z3 = (inptr[DCTSIZE*6] *
- quantptr[DCTSIZE*6]) */
- lh s5, 0(a0) /* quantptr[0] */
- li s6, 15137
- li s7, 6270
- mul t2, s4, s5 /* tmp0 = (inptr[0] * quantptr[0]) */
- mul t6, s6, t6 /* z2 = (inptr[DCTSIZE*2] *
- quantptr[DCTSIZE*2]) */
- lh t5, 112(t0) /* inptr[DCTSIZE*7] */
- mul t7, s7, t7 /* z3 = (inptr[DCTSIZE*6] *
- quantptr[DCTSIZE*6]) */
- lh s4, 112(a0) /* quantptr[DCTSIZE*7] */
- lh v0, 80(t0) /* inptr[DCTSIZE*5] */
- lh s5, 80(a0) /* quantptr[DCTSIZE*5] */
- lh s6, 48(a0) /* quantptr[DCTSIZE*3] */
- sll t2, t2, 14 /* tmp0 <<= (CONST_BITS+1) */
- lh s7, 16(a0) /* quantptr[DCTSIZE*1] */
- lh t8, 16(t0) /* inptr[DCTSIZE*1] */
- subu t6, t6, t7 /* tmp2 =
- MULTIPLY(z2, t5) - MULTIPLY(z3, t6) */
- lh t7, 48(t0) /* inptr[DCTSIZE*3] */
- mul t5, s4, t5 /* z1 = (inptr[DCTSIZE*7] *
- quantptr[DCTSIZE*7]) */
- mul v0, s5, v0 /* z2 = (inptr[DCTSIZE*5] *
- quantptr[DCTSIZE*5]) */
- mul t7, s6, t7 /* z3 = (inptr[DCTSIZE*3] *
- quantptr[DCTSIZE*3]) */
- mul t8, s7, t8 /* z4 = (inptr[DCTSIZE*1] *
- quantptr[DCTSIZE*1]) */
- addu t3, t2, t6 /* tmp10 = tmp0 + z2 */
- subu t4, t2, t6 /* tmp10 = tmp0 - z2 */
- mult $ac0, zero, zero
- mult $ac1, zero, zero
- ins t5, v0, 16, 16
- ins t7, t8, 16, 16
- addiu t9, t9, -1
- dpa.w.ph $ac0, t5, s0
- dpa.w.ph $ac0, t7, s1
- dpa.w.ph $ac1, t5, s2
- dpa.w.ph $ac1, t7, s3
- mflo s4, $ac0
- mflo s5, $ac1
- addiu a0, a0, 2
- addiu t1, t1, 4
- addiu t0, t0, 2
- addu t6, t4, s4
- subu t5, t4, s4
- addu s6, t3, s5
- subu s7, t3, s5
- shra_r.w t6, t6, 12 /* DESCALE(tmp12 + temp1, 12) */
- shra_r.w t5, t5, 12 /* DESCALE(tmp12 - temp1, 12) */
- shra_r.w s6, s6, 12 /* DESCALE(tmp10 + temp2, 12) */
- shra_r.w s7, s7, 12 /* DESCALE(tmp10 - temp2, 12) */
- sw t6, 28(t1)
- sw t5, 60(t1)
- sw s6, -4(t1)
- bgtz t9, 0b
- sw s7, 92(t1)
- /* second loop three pass */
- li t9, 3
- 1:
- lh s6, 34(t0) /* inptr[DCTSIZE*2] */
- lh t6, 34(a0) /* quantptr[DCTSIZE*2] */
- lh s7, 98(t0) /* inptr[DCTSIZE*6] */
- lh t7, 98(a0) /* quantptr[DCTSIZE*6] */
- mul t6, s6, t6 /* z2 = (inptr[DCTSIZE*2] *
- quantptr[DCTSIZE*2]) */
- lh s4, 2(t0) /* inptr[DCTSIZE*0] */
- mul t7, s7, t7 /* z3 = (inptr[DCTSIZE*6] *
- quantptr[DCTSIZE*6]) */
- lh s5, 2(a0) /* quantptr[DCTSIZE*0] */
- li s6, 15137
- li s7, 6270
- mul t2, s4, s5 /* tmp0 = (inptr[0] * quantptr[0]) */
- mul v0, s6, t6 /* z2 = (inptr[DCTSIZE*2] *
- quantptr[DCTSIZE*2]) */
- lh t5, 114(t0) /* inptr[DCTSIZE*7] */
- mul t7, s7, t7 /* z3 = (inptr[DCTSIZE*6] *
- quantptr[DCTSIZE*6]) */
- lh s4, 114(a0) /* quantptr[DCTSIZE*7] */
- lh s5, 82(a0) /* quantptr[DCTSIZE*5] */
- lh t6, 82(t0) /* inptr[DCTSIZE*5] */
- sll t2, t2, 14 /* tmp0 <<= (CONST_BITS+1) */
- lh s6, 50(a0) /* quantptr[DCTSIZE*3] */
- lh t8, 18(t0) /* inptr[DCTSIZE*1] */
- subu v0, v0, t7 /* tmp2 =
- MULTIPLY(z2, t5) - MULTIPLY(z3, t6) */
- lh t7, 50(t0) /* inptr[DCTSIZE*3] */
- lh s7, 18(a0) /* quantptr[DCTSIZE*1] */
- mul t5, s4, t5 /* z1 = (inptr[DCTSIZE*7] *
- quantptr[DCTSIZE*7]) */
- mul t6, s5, t6 /* z2 = (inptr[DCTSIZE*5] *
- quantptr[DCTSIZE*5]) */
- mul t7, s6, t7 /* z3 = (inptr[DCTSIZE*3] *
- quantptr[DCTSIZE*3]) */
- mul t8, s7, t8 /* z4 = (inptr[DCTSIZE*1] *
- quantptr[DCTSIZE*1]) */
- addu t3, t2, v0 /* tmp10 = tmp0 + z2 */
- subu t4, t2, v0 /* tmp10 = tmp0 - z2 */
- mult $ac0, zero, zero
- mult $ac1, zero, zero
- ins t5, t6, 16, 16
- ins t7, t8, 16, 16
- dpa.w.ph $ac0, t5, s0
- dpa.w.ph $ac0, t7, s1
- dpa.w.ph $ac1, t5, s2
- dpa.w.ph $ac1, t7, s3
- mflo t5, $ac0
- mflo t6, $ac1
- addiu t9, t9, -1
- addiu t0, t0, 2
- addiu a0, a0, 2
- addiu t1, t1, 4
- addu s5, t4, t5
- subu s4, t4, t5
- addu s6, t3, t6
- subu s7, t3, t6
- shra_r.w s5, s5, 12 /* DESCALE(tmp12 + temp1, 12) */
- shra_r.w s4, s4, 12 /* DESCALE(tmp12 - temp1, 12) */
- shra_r.w s6, s6, 12 /* DESCALE(tmp10 + temp2, 12) */
- shra_r.w s7, s7, 12 /* DESCALE(tmp10 - temp2, 12) */
- sw s5, 32(t1)
- sw s4, 64(t1)
- sw s6, 0(t1)
- bgtz t9, 1b
- sw s7, 96(t1)
- move t1, v1
- li s4, 15137
- lw s6, 8(t1) /* wsptr[2] */
- li s5, 6270
- lw s7, 24(t1) /* wsptr[6] */
- mul s4, s4, s6 /* MULTIPLY((JLONG)wsptr[2],
- FIX_1_847759065) */
- lw t2, 0(t1) /* wsptr[0] */
- mul s5, s5, s7 /* MULTIPLY((JLONG)wsptr[6],
- -FIX_0_765366865) */
- lh t5, 28(t1) /* wsptr[7] */
- lh t6, 20(t1) /* wsptr[5] */
- lh t7, 12(t1) /* wsptr[3] */
- lh t8, 4(t1) /* wsptr[1] */
- ins t5, t6, 16, 16
- ins t7, t8, 16, 16
- mult $ac0, zero, zero
- dpa.w.ph $ac0, t5, s0
- dpa.w.ph $ac0, t7, s1
- mult $ac1, zero, zero
- dpa.w.ph $ac1, t5, s2
- dpa.w.ph $ac1, t7, s3
- sll t2, t2, 14 /* tmp0 =
- ((JLONG)wsptr[0]) << (CONST_BITS+1) */
- mflo s6, $ac0
- /* MULTIPLY(wsptr[2], FIX_1_847759065) +
- MULTIPLY(wsptr[6], -FIX_0_765366865) */
- subu s4, s4, s5
- addu t3, t2, s4 /* tmp10 = tmp0 + z2 */
- mflo s7, $ac1
- subu t4, t2, s4 /* tmp10 = tmp0 - z2 */
- addu t7, t4, s6
- subu t8, t4, s6
- addu t5, t3, s7
- subu t6, t3, s7
- shra_r.w t5, t5, 19 /* DESCALE(tmp10 + temp2, 19) */
- shra_r.w t6, t6, 19 /* DESCALE(tmp10 - temp2, 19) */
- shra_r.w t7, t7, 19 /* DESCALE(tmp12 + temp1, 19) */
- shra_r.w t8, t8, 19 /* DESCALE(tmp12 - temp1, 19) */
- sll s4, t9, 2
- lw v0, 0(a2) /* output_buf[ctr] */
- shll_s.w t5, t5, 24
- shll_s.w t6, t6, 24
- shll_s.w t7, t7, 24
- shll_s.w t8, t8, 24
- sra t5, t5, 24
- sra t6, t6, 24
- sra t7, t7, 24
- sra t8, t8, 24
- addu v0, v0, a3 /* outptr = output_buf[ctr] + output_col */
- addiu t5, t5, 128
- addiu t6, t6, 128
- addiu t7, t7, 128
- addiu t8, t8, 128
- sb t5, 0(v0)
- sb t7, 1(v0)
- sb t8, 2(v0)
- sb t6, 3(v0)
- /* 2 */
- li s4, 15137
- lw s6, 40(t1) /* wsptr[2] */
- li s5, 6270
- lw s7, 56(t1) /* wsptr[6] */
- mul s4, s4, s6 /* MULTIPLY((JLONG)wsptr[2],
- FIX_1_847759065) */
- lw t2, 32(t1) /* wsptr[0] */
- mul s5, s5, s7 /* MULTIPLY((JLONG)wsptr[6],
- -FIX_0_765366865) */
- lh t5, 60(t1) /* wsptr[7] */
- lh t6, 52(t1) /* wsptr[5] */
- lh t7, 44(t1) /* wsptr[3] */
- lh t8, 36(t1) /* wsptr[1] */
- ins t5, t6, 16, 16
- ins t7, t8, 16, 16
- mult $ac0, zero, zero
- dpa.w.ph $ac0, t5, s0
- dpa.w.ph $ac0, t7, s1
- mult $ac1, zero, zero
- dpa.w.ph $ac1, t5, s2
- dpa.w.ph $ac1, t7, s3
- sll t2, t2, 14 /* tmp0 =
- ((JLONG)wsptr[0]) << (CONST_BITS+1) */
- mflo s6, $ac0
- /* MULTIPLY(wsptr[2], FIX_1_847759065) +
- MULTIPLY(wsptr[6], -FIX_0_765366865) */
- subu s4, s4, s5
- addu t3, t2, s4 /* tmp10 = tmp0 + z2 */
- mflo s7, $ac1
- subu t4, t2, s4 /* tmp10 = tmp0 - z2 */
- addu t7, t4, s6
- subu t8, t4, s6
- addu t5, t3, s7
- subu t6, t3, s7
- shra_r.w t5, t5, 19 /* DESCALE(tmp10 + temp2,
- CONST_BITS-PASS1_BITS+1) */
- shra_r.w t6, t6, 19 /* DESCALE(tmp10 - temp2,
- CONST_BITS-PASS1_BITS+1) */
- shra_r.w t7, t7, 19 /* DESCALE(tmp12 + temp1,
- CONST_BITS-PASS1_BITS+1) */
- shra_r.w t8, t8, 19 /* DESCALE(tmp12 - temp1,
- CONST_BITS-PASS1_BITS+1) */
- sll s4, t9, 2
- lw v0, 4(a2) /* output_buf[ctr] */
- shll_s.w t5, t5, 24
- shll_s.w t6, t6, 24
- shll_s.w t7, t7, 24
- shll_s.w t8, t8, 24
- sra t5, t5, 24
- sra t6, t6, 24
- sra t7, t7, 24
- sra t8, t8, 24
- addu v0, v0, a3 /* outptr = output_buf[ctr] + output_col */
- addiu t5, t5, 128
- addiu t6, t6, 128
- addiu t7, t7, 128
- addiu t8, t8, 128
- sb t5, 0(v0)
- sb t7, 1(v0)
- sb t8, 2(v0)
- sb t6, 3(v0)
- /* 3 */
- li s4, 15137
- lw s6, 72(t1) /* wsptr[2] */
- li s5, 6270
- lw s7, 88(t1) /* wsptr[6] */
- mul s4, s4, s6 /* MULTIPLY((JLONG)wsptr[2],
- FIX_1_847759065) */
- lw t2, 64(t1) /* wsptr[0] */
- mul s5, s5, s7 /* MULTIPLY((JLONG)wsptr[6],
- -FIX_0_765366865) */
- lh t5, 92(t1) /* wsptr[7] */
- lh t6, 84(t1) /* wsptr[5] */
- lh t7, 76(t1) /* wsptr[3] */
- lh t8, 68(t1) /* wsptr[1] */
- ins t5, t6, 16, 16
- ins t7, t8, 16, 16
- mult $ac0, zero, zero
- dpa.w.ph $ac0, t5, s0
- dpa.w.ph $ac0, t7, s1
- mult $ac1, zero, zero
- dpa.w.ph $ac1, t5, s2
- dpa.w.ph $ac1, t7, s3
- sll t2, t2, 14 /* tmp0 =
- ((JLONG)wsptr[0]) << (CONST_BITS+1) */
- mflo s6, $ac0
- /* MULTIPLY(wsptr[2], FIX_1_847759065) +
- MULTIPLY(wsptr[6], -FIX_0_765366865) */
- subu s4, s4, s5
- addu t3, t2, s4 /* tmp10 = tmp0 + z2 */
- mflo s7, $ac1
- subu t4, t2, s4 /* tmp10 = tmp0 - z2 */
- addu t7, t4, s6
- subu t8, t4, s6
- addu t5, t3, s7
- subu t6, t3, s7
- shra_r.w t5, t5, 19 /* DESCALE(tmp10 + temp2, 19) */
- shra_r.w t6, t6, 19 /* DESCALE(tmp10 - temp2, 19) */
- shra_r.w t7, t7, 19 /* DESCALE(tmp12 + temp1, 19) */
- shra_r.w t8, t8, 19 /* DESCALE(tmp12 - temp1, 19) */
- sll s4, t9, 2
- lw v0, 8(a2) /* output_buf[ctr] */
- shll_s.w t5, t5, 24
- shll_s.w t6, t6, 24
- shll_s.w t7, t7, 24
- shll_s.w t8, t8, 24
- sra t5, t5, 24
- sra t6, t6, 24
- sra t7, t7, 24
- sra t8, t8, 24
- addu v0, v0, a3 /* outptr = output_buf[ctr] + output_col */
- addiu t5, t5, 128
- addiu t6, t6, 128
- addiu t7, t7, 128
- addiu t8, t8, 128
- sb t5, 0(v0)
- sb t7, 1(v0)
- sb t8, 2(v0)
- sb t6, 3(v0)
- li s4, 15137
- lw s6, 104(t1) /* wsptr[2] */
- li s5, 6270
- lw s7, 120(t1) /* wsptr[6] */
- mul s4, s4, s6 /* MULTIPLY((JLONG)wsptr[2],
- FIX_1_847759065) */
- lw t2, 96(t1) /* wsptr[0] */
- mul s5, s5, s7 /* MULTIPLY((JLONG)wsptr[6],
- -FIX_0_765366865) */
- lh t5, 124(t1) /* wsptr[7] */
- lh t6, 116(t1) /* wsptr[5] */
- lh t7, 108(t1) /* wsptr[3] */
- lh t8, 100(t1) /* wsptr[1] */
- ins t5, t6, 16, 16
- ins t7, t8, 16, 16
- mult $ac0, zero, zero
- dpa.w.ph $ac0, t5, s0
- dpa.w.ph $ac0, t7, s1
- mult $ac1, zero, zero
- dpa.w.ph $ac1, t5, s2
- dpa.w.ph $ac1, t7, s3
- sll t2, t2, 14 /* tmp0 =
- ((JLONG)wsptr[0]) << (CONST_BITS+1) */
- mflo s6, $ac0
- /* MULTIPLY(wsptr[2], FIX_1_847759065) +
- MULTIPLY(wsptr[6], -FIX_0_765366865) */
- subu s4, s4, s5
- addu t3, t2, s4 /* tmp10 = tmp0 + z2; */
- mflo s7, $ac1
- subu t4, t2, s4 /* tmp10 = tmp0 - z2; */
- addu t7, t4, s6
- subu t8, t4, s6
- addu t5, t3, s7
- subu t6, t3, s7
- shra_r.w t5, t5, 19 /* DESCALE(tmp10 + temp2, 19) */
- shra_r.w t6, t6, 19 /* DESCALE(tmp10 - temp2, 19) */
- shra_r.w t7, t7, 19 /* DESCALE(tmp12 + temp1, 19) */
- shra_r.w t8, t8, 19 /* DESCALE(tmp12 - temp1, 19) */
- sll s4, t9, 2
- lw v0, 12(a2) /* output_buf[ctr] */
- shll_s.w t5, t5, 24
- shll_s.w t6, t6, 24
- shll_s.w t7, t7, 24
- shll_s.w t8, t8, 24
- sra t5, t5, 24
- sra t6, t6, 24
- sra t7, t7, 24
- sra t8, t8, 24
- addu v0, v0, a3 /* outptr = output_buf[ctr] + output_col */
- addiu t5, t5, 128
- addiu t6, t6, 128
- addiu t7, t7, 128
- addiu t8, t8, 128
- sb t5, 0(v0)
- sb t7, 1(v0)
- sb t8, 2(v0)
- sb t6, 3(v0)
- RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
- j ra
- nop
- END(jsimd_idct_4x4_dspr2)
- /*****************************************************************************/
- LEAF_DSPR2(jsimd_idct_6x6_dspr2)
- /*
- * a0 = compptr->dct_table
- * a1 = coef_block
- * a2 = output_buf
- * a3 = output_col
- */
- .set at
- SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
- addiu sp, sp, -144
- move v0, sp
- addiu v1, v0, 24
- addiu t9, zero, 5793
- addiu s0, zero, 10033
- addiu s1, zero, 2998
- 1:
- lh s2, 0(a0) /* q0 = quantptr[ 0] */
- lh s3, 32(a0) /* q1 = quantptr[16] */
- lh s4, 64(a0) /* q2 = quantptr[32] */
- lh t2, 64(a1) /* tmp2 = inptr[32] */
- lh t1, 32(a1) /* tmp1 = inptr[16] */
- lh t0, 0(a1) /* tmp0 = inptr[ 0] */
- mul t2, t2, s4 /* tmp2 = tmp2 * q2 */
- mul t1, t1, s3 /* tmp1 = tmp1 * q1 */
- mul t0, t0, s2 /* tmp0 = tmp0 * q0 */
- lh t6, 16(a1) /* z1 = inptr[ 8] */
- lh t8, 80(a1) /* z3 = inptr[40] */
- lh t7, 48(a1) /* z2 = inptr[24] */
- lh s2, 16(a0) /* q0 = quantptr[ 8] */
- lh s4, 80(a0) /* q2 = quantptr[40] */
- lh s3, 48(a0) /* q1 = quantptr[24] */
- mul t2, t2, t9 /* tmp2 = tmp2 * 5793 */
- mul t1, t1, s0 /* tmp1 = tmp1 * 10033 */
- sll t0, t0, 13 /* tmp0 = tmp0 << 13 */
- mul t6, t6, s2 /* z1 = z1 * q0 */
- mul t8, t8, s4 /* z3 = z3 * q2 */
- mul t7, t7, s3 /* z2 = z2 * q1 */
- addu t3, t0, t2 /* tmp10 = tmp0 + tmp2 */
- sll t2, t2, 1 /* tmp2 = tmp2 << 2 */
- subu t4, t0, t2 /* tmp11 = tmp0 - tmp2; */
- subu t5, t3, t1 /* tmp12 = tmp10 - tmp1 */
- addu t3, t3, t1 /* tmp10 = tmp10 + tmp1 */
- addu t1, t6, t8 /* tmp1 = z1 + z3 */
- mul t1, t1, s1 /* tmp1 = tmp1 * 2998 */
- shra_r.w t4, t4, 11 /* tmp11 = (tmp11 + 1024) >> 11 */
- subu t2, t6, t8 /* tmp2 = z1 - z3 */
- subu t2, t2, t7 /* tmp2 = tmp2 - z2 */
- sll t2, t2, 2 /* tmp2 = tmp2 << 2 */
- addu t0, t6, t7 /* tmp0 = z1 + z2 */
- sll t0, t0, 13 /* tmp0 = tmp0 << 13 */
- subu s2, t8, t7 /* q0 = z3 - z2 */
- sll s2, s2, 13 /* q0 = q0 << 13 */
- addu t0, t0, t1 /* tmp0 = tmp0 + tmp1 */
- addu t1, s2, t1 /* tmp1 = q0 + tmp1 */
- addu s2, t4, t2 /* q0 = tmp11 + tmp2 */
- subu s3, t4, t2 /* q1 = tmp11 - tmp2 */
- addu t6, t3, t0 /* z1 = tmp10 + tmp0 */
- subu t7, t3, t0 /* z2 = tmp10 - tmp0 */
- addu t4, t5, t1 /* tmp11 = tmp12 + tmp1 */
- subu t5, t5, t1 /* tmp12 = tmp12 - tmp1 */
- shra_r.w t6, t6, 11 /* z1 = (z1 + 1024) >> 11 */
- shra_r.w t7, t7, 11 /* z2 = (z2 + 1024) >> 11 */
- shra_r.w t4, t4, 11 /* tmp11 = (tmp11 + 1024) >> 11 */
- shra_r.w t5, t5, 11 /* tmp12 = (tmp12 + 1024) >> 11 */
- sw s2, 24(v0)
- sw s3, 96(v0)
- sw t6, 0(v0)
- sw t7, 120(v0)
- sw t4, 48(v0)
- sw t5, 72(v0)
- addiu v0, v0, 4
- addiu a1, a1, 2
- bne v0, v1, 1b
- addiu a0, a0, 2
- /* Pass 2: process 6 rows from work array, store into output array. */
- move v0, sp
- addiu v1, v0, 144
- 2:
- lw t0, 0(v0)
- lw t2, 16(v0)
- lw s5, 0(a2)
- addiu t0, t0, 16
- sll t0, t0, 13
- mul t3, t2, t9
- lw t6, 4(v0)
- lw t8, 20(v0)
- lw t7, 12(v0)
- addu s5, s5, a3
- addu s6, t6, t8
- mul s6, s6, s1
- addu t1, t0, t3
- subu t4, t0, t3
- subu t4, t4, t3
- lw t3, 8(v0)
- mul t0, t3, s0
- addu s7, t6, t7
- sll s7, s7, 13
- addu s7, s6, s7
- subu t2, t8, t7
- sll t2, t2, 13
- addu t2, s6, t2
- subu s6, t6, t7
- subu s6, s6, t8
- sll s6, s6, 13
- addu t3, t1, t0
- subu t5, t1, t0
- addu t6, t3, s7
- subu t3, t3, s7
- addu t7, t4, s6
- subu t4, t4, s6
- addu t8, t5, t2
- subu t5, t5, t2
- shll_s.w t6, t6, 6
- shll_s.w t3, t3, 6
- shll_s.w t7, t7, 6
- shll_s.w t4, t4, 6
- shll_s.w t8, t8, 6
- shll_s.w t5, t5, 6
- sra t6, t6, 24
- addiu t6, t6, 128
- sra t3, t3, 24
- addiu t3, t3, 128
- sb t6, 0(s5)
- sra t7, t7, 24
- addiu t7, t7, 128
- sb t3, 5(s5)
- sra t4, t4, 24
- addiu t4, t4, 128
- sb t7, 1(s5)
- sra t8, t8, 24
- addiu t8, t8, 128
- sb t4, 4(s5)
- addiu v0, v0, 24
- sra t5, t5, 24
- addiu t5, t5, 128
- sb t8, 2(s5)
- addiu a2, a2, 4
- bne v0, v1, 2b
- sb t5, 3(s5)
- addiu sp, sp, 144
- RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
- j ra
- nop
- END(jsimd_idct_6x6_dspr2)
- /*****************************************************************************/
- LEAF_DSPR2(jsimd_idct_12x12_pass1_dspr2)
- /*
- * a0 = compptr->dct_table
- * a1 = coef_block
- * a2 = workspace
- */
- SAVE_REGS_ON_STACK 16, s0, s1, s2, s3
- li a3, 8
- 1:
- /* odd part */
- lh t0, 48(a1)
- lh t1, 48(a0)
- lh t2, 16(a1)
- lh t3, 16(a0)
- lh t4, 80(a1)
- lh t5, 80(a0)
- lh t6, 112(a1)
- lh t7, 112(a0)
- mul t0, t0, t1 /* z2 */
- mul t1, t2, t3 /* z1 */
- mul t2, t4, t5 /* z3 */
- mul t3, t6, t7 /* z4 */
- li t4, 10703 /* FIX(1.306562965) */
- li t5, 4433 /* FIX_0_541196100 */
- li t6, 7053 /* FIX(0.860918669) */
- mul t4, t0, t4 /* tmp11 */
- mul t5, t0, t5 /* -tmp14 */
- addu t7, t1, t2 /* tmp10 */
- addu t8, t7, t3 /* tmp10 + z4 */
- mul t6, t6, t8 /* tmp15 */
- li t8, 2139 /* FIX(0.261052384) */
- mul t8, t7, t8 /* MULTIPLY(tmp10, FIX(0.261052384)) */
- li t7, 2295 /* FIX(0.280143716) */
- mul t7, t1, t7 /* MULTIPLY(z1, FIX(0.280143716)) */
- addu t9, t2, t3 /* z3 + z4 */
- li s0, 8565 /* FIX(1.045510580) */
- mul t9, t9, s0 /* -tmp13 */
- li s0, 12112 /* FIX(1.478575242) */
- mul s0, t2, s0 /* MULTIPLY(z3, FIX(1.478575242) */
- li s1, 12998 /* FIX(1.586706681) */
- mul s1, t3, s1 /* MULTIPLY(z4, FIX(1.586706681)) */
- li s2, 5540 /* FIX(0.676326758) */
- mul s2, t1, s2 /* MULTIPLY(z1, FIX(0.676326758)) */
- li s3, 16244 /* FIX(1.982889723) */
- mul s3, t3, s3 /* MULTIPLY(z4, FIX(1.982889723)) */
- subu t1, t1, t3 /* z1-=z4 */
- subu t0, t0, t2 /* z2-=z3 */
- addu t2, t0, t1 /* z1+z2 */
- li t3, 4433 /* FIX_0_541196100 */
- mul t2, t2, t3 /* z3 */
- li t3, 6270 /* FIX_0_765366865 */
- mul t1, t1, t3 /* MULTIPLY(z1, FIX_0_765366865) */
- li t3, 15137 /* FIX_0_765366865 */
- mul t0, t0, t3 /* MULTIPLY(z2, FIX_1_847759065) */
- addu t8, t6, t8 /* tmp12 */
- addu t3, t8, t4 /* tmp12 + tmp11 */
- addu t3, t3, t7 /* tmp10 */
- subu t8, t8, t9 /* tmp12 + tmp13 */
- addu s0, t5, s0
- subu t8, t8, s0 /* tmp12 */
- subu t9, t6, t9
- subu s1, s1, t4
- addu t9, t9, s1 /* tmp13 */
- subu t6, t6, t5
- subu t6, t6, s2
- subu t6, t6, s3 /* tmp15 */
- /* even part start */
- lh t4, 64(a1)
- lh t5, 64(a0)
- lh t7, 32(a1)
- lh s0, 32(a0)
- lh s1, 0(a1)
- lh s2, 0(a0)
- lh s3, 96(a1)
- lh v0, 96(a0)
- mul t4, t4, t5 /* DEQUANTIZE(inptr[DCTSIZE*4],
- quantptr[DCTSIZE*4]) */
- mul t5, t7, s0 /* DEQUANTIZE(inptr[DCTSIZE*2],
- quantptr[DCTSIZE*2]) */
- mul t7, s1, s2 /* DEQUANTIZE(inptr[DCTSIZE*0],
- quantptr[DCTSIZE*0]) */
- mul s0, s3, v0 /* DEQUANTIZE(inptr[DCTSIZE*6],
- quantptr[DCTSIZE*6]) */
- /* odd part end */
- addu t1, t2, t1 /* tmp11 */
- subu t0, t2, t0 /* tmp14 */
- /* update counter and pointers */
- addiu a3, a3, -1
- addiu a0, a0, 2
- addiu a1, a1, 2
- /* even part rest */
- li s1, 10033
- li s2, 11190
- mul t4, t4, s1 /* z4 */
- mul s1, t5, s2 /* z4 */
- sll t5, t5, 13 /* z1 */
- sll t7, t7, 13
- addiu t7, t7, 1024 /* z3 */
- sll s0, s0, 13 /* z2 */
- addu s2, t7, t4 /* tmp10 */
- subu t4, t7, t4 /* tmp11 */
- subu s3, t5, s0 /* tmp12 */
- addu t2, t7, s3 /* tmp21 */
- subu s3, t7, s3 /* tmp24 */
- addu t7, s1, s0 /* tmp12 */
- addu v0, s2, t7 /* tmp20 */
- subu s2, s2, t7 /* tmp25 */
- subu s1, s1, t5 /* z4 - z1 */
- subu s1, s1, s0 /* tmp12 */
- addu s0, t4, s1 /* tmp22 */
- subu t4, t4, s1 /* tmp23 */
- /* final output stage */
- addu t5, v0, t3
- subu v0, v0, t3
- addu t3, t2, t1
- subu t2, t2, t1
- addu t1, s0, t8
- subu s0, s0, t8
- addu t8, t4, t9
- subu t4, t4, t9
- addu t9, s3, t0
- subu s3, s3, t0
- addu t0, s2, t6
- subu s2, s2, t6
- sra t5, t5, 11
- sra t3, t3, 11
- sra t1, t1, 11
- sra t8, t8, 11
- sra t9, t9, 11
- sra t0, t0, 11
- sra s2, s2, 11
- sra s3, s3, 11
- sra t4, t4, 11
- sra s0, s0, 11
- sra t2, t2, 11
- sra v0, v0, 11
- sw t5, 0(a2)
- sw t3, 32(a2)
- sw t1, 64(a2)
- sw t8, 96(a2)
- sw t9, 128(a2)
- sw t0, 160(a2)
- sw s2, 192(a2)
- sw s3, 224(a2)
- sw t4, 256(a2)
- sw s0, 288(a2)
- sw t2, 320(a2)
- sw v0, 352(a2)
- bgtz a3, 1b
- addiu a2, a2, 4
- RESTORE_REGS_FROM_STACK 16, s0, s1, s2, s3
- j ra
- nop
- END(jsimd_idct_12x12_pass1_dspr2)
- /*****************************************************************************/
- LEAF_DSPR2(jsimd_idct_12x12_pass2_dspr2)
- /*
- * a0 = workspace
- * a1 = output
- */
- SAVE_REGS_ON_STACK 16, s0, s1, s2, s3
- li a3, 12
- 1:
- /* Odd part */
- lw t0, 12(a0)
- lw t1, 4(a0)
- lw t2, 20(a0)
- lw t3, 28(a0)
- li t4, 10703 /* FIX(1.306562965) */
- li t5, 4433 /* FIX_0_541196100 */
- mul t4, t0, t4 /* tmp11 */
- mul t5, t0, t5 /* -tmp14 */
- addu t6, t1, t2 /* tmp10 */
- li t7, 2139 /* FIX(0.261052384) */
- mul t7, t6, t7 /* MULTIPLY(tmp10, FIX(0.261052384)) */
- addu t6, t6, t3 /* tmp10 + z4 */
- li t8, 7053 /* FIX(0.860918669) */
- mul t6, t6, t8 /* tmp15 */
- li t8, 2295 /* FIX(0.280143716) */
- mul t8, t1, t8 /* MULTIPLY(z1, FIX(0.280143716)) */
- addu t9, t2, t3 /* z3 + z4 */
- li s0, 8565 /* FIX(1.045510580) */
- mul t9, t9, s0 /* -tmp13 */
- li s0, 12112 /* FIX(1.478575242) */
- mul s0, t2, s0 /* MULTIPLY(z3, FIX(1.478575242)) */
- li s1, 12998 /* FIX(1.586706681) */
- mul s1, t3, s1 /* MULTIPLY(z4, FIX(1.586706681)) */
- li s2, 5540 /* FIX(0.676326758) */
- mul s2, t1, s2 /* MULTIPLY(z1, FIX(0.676326758)) */
- li s3, 16244 /* FIX(1.982889723) */
- mul s3, t3, s3 /* MULTIPLY(z4, FIX(1.982889723)) */
- subu t1, t1, t3 /* z1 -= z4 */
- subu t0, t0, t2 /* z2 -= z3 */
- addu t2, t1, t0 /* z1 + z2 */
- li t3, 4433 /* FIX_0_541196100 */
- mul t2, t2, t3 /* z3 */
- li t3, 6270 /* FIX_0_765366865 */
- mul t1, t1, t3 /* MULTIPLY(z1, FIX_0_765366865) */
- li t3, 15137 /* FIX_1_847759065 */
- mul t0, t0, t3 /* MULTIPLY(z2, FIX_1_847759065) */
- addu t3, t6, t7 /* tmp12 */
- addu t7, t3, t4
- addu t7, t7, t8 /* tmp10 */
- subu t3, t3, t9
- subu t3, t3, t5
- subu t3, t3, s0 /* tmp12 */
- subu t9, t6, t9
- subu t9, t9, t4
- addu t9, t9, s1 /* tmp13 */
- subu t6, t6, t5
- subu t6, t6, s2
- subu t6, t6, s3 /* tmp15 */
- addu t1, t2, t1 /* tmp11 */
- subu t0, t2, t0 /* tmp14 */
- /* even part */
- lw t2, 16(a0) /* z4 */
- lw t4, 8(a0) /* z1 */
- lw t5, 0(a0) /* z3 */
- lw t8, 24(a0) /* z2 */
- li s0, 10033 /* FIX(1.224744871) */
- li s1, 11190 /* FIX(1.366025404) */
- mul t2, t2, s0 /* z4 */
- mul s0, t4, s1 /* z4 */
- addiu t5, t5, 0x10
- sll t5, t5, 13 /* z3 */
- sll t4, t4, 13 /* z1 */
- sll t8, t8, 13 /* z2 */
- subu s1, t4, t8 /* tmp12 */
- addu s2, t5, t2 /* tmp10 */
- subu t2, t5, t2 /* tmp11 */
- addu s3, t5, s1 /* tmp21 */
- subu s1, t5, s1 /* tmp24 */
- addu t5, s0, t8 /* tmp12 */
- addu v0, s2, t5 /* tmp20 */
- subu t5, s2, t5 /* tmp25 */
- subu t4, s0, t4
- subu t4, t4, t8 /* tmp12 */
- addu t8, t2, t4 /* tmp22 */
- subu t2, t2, t4 /* tmp23 */
- /* increment counter and pointers */
- addiu a3, a3, -1
- addiu a0, a0, 32
- /* Final stage */
- addu t4, v0, t7
- subu v0, v0, t7
- addu t7, s3, t1
- subu s3, s3, t1
- addu t1, t8, t3
- subu t8, t8, t3
- addu t3, t2, t9
- subu t2, t2, t9
- addu t9, s1, t0
- subu s1, s1, t0
- addu t0, t5, t6
- subu t5, t5, t6
- sll t4, t4, 4
- sll t7, t7, 4
- sll t1, t1, 4
- sll t3, t3, 4
- sll t9, t9, 4
- sll t0, t0, 4
- sll t5, t5, 4
- sll s1, s1, 4
- sll t2, t2, 4
- sll t8, t8, 4
- sll s3, s3, 4
- sll v0, v0, 4
- shll_s.w t4, t4, 2
- shll_s.w t7, t7, 2
- shll_s.w t1, t1, 2
- shll_s.w t3, t3, 2
- shll_s.w t9, t9, 2
- shll_s.w t0, t0, 2
- shll_s.w t5, t5, 2
- shll_s.w s1, s1, 2
- shll_s.w t2, t2, 2
- shll_s.w t8, t8, 2
- shll_s.w s3, s3, 2
- shll_s.w v0, v0, 2
- srl t4, t4, 24
- srl t7, t7, 24
- srl t1, t1, 24
- srl t3, t3, 24
- srl t9, t9, 24
- srl t0, t0, 24
- srl t5, t5, 24
- srl s1, s1, 24
- srl t2, t2, 24
- srl t8, t8, 24
- srl s3, s3, 24
- srl v0, v0, 24
- lw t6, 0(a1)
- addiu t4, t4, 0x80
- addiu t7, t7, 0x80
- addiu t1, t1, 0x80
- addiu t3, t3, 0x80
- addiu t9, t9, 0x80
- addiu t0, t0, 0x80
- addiu t5, t5, 0x80
- addiu s1, s1, 0x80
- addiu t2, t2, 0x80
- addiu t8, t8, 0x80
- addiu s3, s3, 0x80
- addiu v0, v0, 0x80
- sb t4, 0(t6)
- sb t7, 1(t6)
- sb t1, 2(t6)
- sb t3, 3(t6)
- sb t9, 4(t6)
- sb t0, 5(t6)
- sb t5, 6(t6)
- sb s1, 7(t6)
- sb t2, 8(t6)
- sb t8, 9(t6)
- sb s3, 10(t6)
- sb v0, 11(t6)
- bgtz a3, 1b
- addiu a1, a1, 4
- RESTORE_REGS_FROM_STACK 16, s0, s1, s2, s3
- jr ra
- nop
- END(jsimd_idct_12x12_pass2_dspr2)
- /*****************************************************************************/
- LEAF_DSPR2(jsimd_convsamp_dspr2)
- /*
- * a0 = sample_data
- * a1 = start_col
- * a2 = workspace
- */
- lw t0, 0(a0)
- li t7, 0xff80ff80
- addu t0, t0, a1
- ulw t1, 0(t0)
- ulw t2, 4(t0)
- preceu.ph.qbr t3, t1
- preceu.ph.qbl t4, t1
- lw t0, 4(a0)
- preceu.ph.qbr t5, t2
- preceu.ph.qbl t6, t2
- addu t0, t0, a1
- addu.ph t3, t3, t7
- addu.ph t4, t4, t7
- ulw t1, 0(t0)
- ulw t2, 4(t0)
- addu.ph t5, t5, t7
- addu.ph t6, t6, t7
- usw t3, 0(a2)
- usw t4, 4(a2)
- preceu.ph.qbr t3, t1
- preceu.ph.qbl t4, t1
- usw t5, 8(a2)
- usw t6, 12(a2)
- lw t0, 8(a0)
- preceu.ph.qbr t5, t2
- preceu.ph.qbl t6, t2
- addu t0, t0, a1
- addu.ph t3, t3, t7
- addu.ph t4, t4, t7
- ulw t1, 0(t0)
- ulw t2, 4(t0)
- addu.ph t5, t5, t7
- addu.ph t6, t6, t7
- usw t3, 16(a2)
- usw t4, 20(a2)
- preceu.ph.qbr t3, t1
- preceu.ph.qbl t4, t1
- usw t5, 24(a2)
- usw t6, 28(a2)
- lw t0, 12(a0)
- preceu.ph.qbr t5, t2
- preceu.ph.qbl t6, t2
- addu t0, t0, a1
- addu.ph t3, t3, t7
- addu.ph t4, t4, t7
- ulw t1, 0(t0)
- ulw t2, 4(t0)
- addu.ph t5, t5, t7
- addu.ph t6, t6, t7
- usw t3, 32(a2)
- usw t4, 36(a2)
- preceu.ph.qbr t3, t1
- preceu.ph.qbl t4, t1
- usw t5, 40(a2)
- usw t6, 44(a2)
- lw t0, 16(a0)
- preceu.ph.qbr t5, t2
- preceu.ph.qbl t6, t2
- addu t0, t0, a1
- addu.ph t3, t3, t7
- addu.ph t4, t4, t7
- ulw t1, 0(t0)
- ulw t2, 4(t0)
- addu.ph t5, t5, t7
- addu.ph t6, t6, t7
- usw t3, 48(a2)
- usw t4, 52(a2)
- preceu.ph.qbr t3, t1
- preceu.ph.qbl t4, t1
- usw t5, 56(a2)
- usw t6, 60(a2)
- lw t0, 20(a0)
- preceu.ph.qbr t5, t2
- preceu.ph.qbl t6, t2
- addu t0, t0, a1
- addu.ph t3, t3, t7
- addu.ph t4, t4, t7
- ulw t1, 0(t0)
- ulw t2, 4(t0)
- addu.ph t5, t5, t7
- addu.ph t6, t6, t7
- usw t3, 64(a2)
- usw t4, 68(a2)
- preceu.ph.qbr t3, t1
- preceu.ph.qbl t4, t1
- usw t5, 72(a2)
- usw t6, 76(a2)
- lw t0, 24(a0)
- preceu.ph.qbr t5, t2
- preceu.ph.qbl t6, t2
- addu t0, t0, a1
- addu.ph t3, t3, t7
- addu.ph t4, t4, t7
- ulw t1, 0(t0)
- ulw t2, 4(t0)
- addu.ph t5, t5, t7
- addu.ph t6, t6, t7
- usw t3, 80(a2)
- usw t4, 84(a2)
- preceu.ph.qbr t3, t1
- preceu.ph.qbl t4, t1
- usw t5, 88(a2)
- usw t6, 92(a2)
- lw t0, 28(a0)
- preceu.ph.qbr t5, t2
- preceu.ph.qbl t6, t2
- addu t0, t0, a1
- addu.ph t3, t3, t7
- addu.ph t4, t4, t7
- ulw t1, 0(t0)
- ulw t2, 4(t0)
- addu.ph t5, t5, t7
- addu.ph t6, t6, t7
- usw t3, 96(a2)
- usw t4, 100(a2)
- preceu.ph.qbr t3, t1
- preceu.ph.qbl t4, t1
- usw t5, 104(a2)
- usw t6, 108(a2)
- preceu.ph.qbr t5, t2
- preceu.ph.qbl t6, t2
- addu.ph t3, t3, t7
- addu.ph t4, t4, t7
- addu.ph t5, t5, t7
- addu.ph t6, t6, t7
- usw t3, 112(a2)
- usw t4, 116(a2)
- usw t5, 120(a2)
- usw t6, 124(a2)
- j ra
- nop
- END(jsimd_convsamp_dspr2)
- #ifndef __mips_soft_float
- /*****************************************************************************/
- LEAF_DSPR2(jsimd_convsamp_float_dspr2)
- /*
- * a0 = sample_data
- * a1 = start_col
- * a2 = workspace
- */
- .set at
- lw t0, 0(a0)
- addu t0, t0, a1
- lbu t1, 0(t0)
- lbu t2, 1(t0)
- lbu t3, 2(t0)
- lbu t4, 3(t0)
- lbu t5, 4(t0)
- lbu t6, 5(t0)
- lbu t7, 6(t0)
- lbu t8, 7(t0)
- addiu t1, t1, -128
- addiu t2, t2, -128
- addiu t3, t3, -128
- addiu t4, t4, -128
- addiu t5, t5, -128
- addiu t6, t6, -128
- addiu t7, t7, -128
- addiu t8, t8, -128
- mtc1 t1, f2
- mtc1 t2, f4
- mtc1 t3, f6
- mtc1 t4, f8
- mtc1 t5, f10
- mtc1 t6, f12
- mtc1 t7, f14
- mtc1 t8, f16
- cvt.s.w f2, f2
- cvt.s.w f4, f4
- cvt.s.w f6, f6
- cvt.s.w f8, f8
- cvt.s.w f10, f10
- cvt.s.w f12, f12
- cvt.s.w f14, f14
- cvt.s.w f16, f16
- lw t0, 4(a0)
- swc1 f2, 0(a2)
- swc1 f4, 4(a2)
- swc1 f6, 8(a2)
- addu t0, t0, a1
- swc1 f8, 12(a2)
- swc1 f10, 16(a2)
- swc1 f12, 20(a2)
- swc1 f14, 24(a2)
- swc1 f16, 28(a2)
- /* elemr 1 */
- lbu t1, 0(t0)
- lbu t2, 1(t0)
- lbu t3, 2(t0)
- lbu t4, 3(t0)
- lbu t5, 4(t0)
- lbu t6, 5(t0)
- lbu t7, 6(t0)
- lbu t8, 7(t0)
- addiu t1, t1, -128
- addiu t2, t2, -128
- addiu t3, t3, -128
- addiu t4, t4, -128
- addiu t5, t5, -128
- addiu t6, t6, -128
- addiu t7, t7, -128
- addiu t8, t8, -128
- mtc1 t1, f2
- mtc1 t2, f4
- mtc1 t3, f6
- mtc1 t4, f8
- mtc1 t5, f10
- mtc1 t6, f12
- mtc1 t7, f14
- mtc1 t8, f16
- cvt.s.w f2, f2
- cvt.s.w f4, f4
- cvt.s.w f6, f6
- cvt.s.w f8, f8
- cvt.s.w f10, f10
- cvt.s.w f12, f12
- cvt.s.w f14, f14
- cvt.s.w f16, f16
- lw t0, 8(a0)
- swc1 f2, 32(a2)
- swc1 f4, 36(a2)
- swc1 f6, 40(a2)
- addu t0, t0, a1
- swc1 f8, 44(a2)
- swc1 f10, 48(a2)
- swc1 f12, 52(a2)
- swc1 f14, 56(a2)
- swc1 f16, 60(a2)
- /* elemr 2 */
- lbu t1, 0(t0)
- lbu t2, 1(t0)
- lbu t3, 2(t0)
- lbu t4, 3(t0)
- lbu t5, 4(t0)
- lbu t6, 5(t0)
- lbu t7, 6(t0)
- lbu t8, 7(t0)
- addiu t1, t1, -128
- addiu t2, t2, -128
- addiu t3, t3, -128
- addiu t4, t4, -128
- addiu t5, t5, -128
- addiu t6, t6, -128
- addiu t7, t7, -128
- addiu t8, t8, -128
- mtc1 t1, f2
- mtc1 t2, f4
- mtc1 t3, f6
- mtc1 t4, f8
- mtc1 t5, f10
- mtc1 t6, f12
- mtc1 t7, f14
- mtc1 t8, f16
- cvt.s.w f2, f2
- cvt.s.w f4, f4
- cvt.s.w f6, f6
- cvt.s.w f8, f8
- cvt.s.w f10, f10
- cvt.s.w f12, f12
- cvt.s.w f14, f14
- cvt.s.w f16, f16
- lw t0, 12(a0)
- swc1 f2, 64(a2)
- swc1 f4, 68(a2)
- swc1 f6, 72(a2)
- addu t0, t0, a1
- swc1 f8, 76(a2)
- swc1 f10, 80(a2)
- swc1 f12, 84(a2)
- swc1 f14, 88(a2)
- swc1 f16, 92(a2)
- /* elemr 3 */
- lbu t1, 0(t0)
- lbu t2, 1(t0)
- lbu t3, 2(t0)
- lbu t4, 3(t0)
- lbu t5, 4(t0)
- lbu t6, 5(t0)
- lbu t7, 6(t0)
- lbu t8, 7(t0)
- addiu t1, t1, -128
- addiu t2, t2, -128
- addiu t3, t3, -128
- addiu t4, t4, -128
- addiu t5, t5, -128
- addiu t6, t6, -128
- addiu t7, t7, -128
- addiu t8, t8, -128
- mtc1 t1, f2
- mtc1 t2, f4
- mtc1 t3, f6
- mtc1 t4, f8
- mtc1 t5, f10
- mtc1 t6, f12
- mtc1 t7, f14
- mtc1 t8, f16
- cvt.s.w f2, f2
- cvt.s.w f4, f4
- cvt.s.w f6, f6
- cvt.s.w f8, f8
- cvt.s.w f10, f10
- cvt.s.w f12, f12
- cvt.s.w f14, f14
- cvt.s.w f16, f16
- lw t0, 16(a0)
- swc1 f2, 96(a2)
- swc1 f4, 100(a2)
- swc1 f6, 104(a2)
- addu t0, t0, a1
- swc1 f8, 108(a2)
- swc1 f10, 112(a2)
- swc1 f12, 116(a2)
- swc1 f14, 120(a2)
- swc1 f16, 124(a2)
- /* elemr 4 */
- lbu t1, 0(t0)
- lbu t2, 1(t0)
- lbu t3, 2(t0)
- lbu t4, 3(t0)
- lbu t5, 4(t0)
- lbu t6, 5(t0)
- lbu t7, 6(t0)
- lbu t8, 7(t0)
- addiu t1, t1, -128
- addiu t2, t2, -128
- addiu t3, t3, -128
- addiu t4, t4, -128
- addiu t5, t5, -128
- addiu t6, t6, -128
- addiu t7, t7, -128
- addiu t8, t8, -128
- mtc1 t1, f2
- mtc1 t2, f4
- mtc1 t3, f6
- mtc1 t4, f8
- mtc1 t5, f10
- mtc1 t6, f12
- mtc1 t7, f14
- mtc1 t8, f16
- cvt.s.w f2, f2
- cvt.s.w f4, f4
- cvt.s.w f6, f6
- cvt.s.w f8, f8
- cvt.s.w f10, f10
- cvt.s.w f12, f12
- cvt.s.w f14, f14
- cvt.s.w f16, f16
- lw t0, 20(a0)
- swc1 f2, 128(a2)
- swc1 f4, 132(a2)
- swc1 f6, 136(a2)
- addu t0, t0, a1
- swc1 f8, 140(a2)
- swc1 f10, 144(a2)
- swc1 f12, 148(a2)
- swc1 f14, 152(a2)
- swc1 f16, 156(a2)
- /* elemr 5 */
- lbu t1, 0(t0)
- lbu t2, 1(t0)
- lbu t3, 2(t0)
- lbu t4, 3(t0)
- lbu t5, 4(t0)
- lbu t6, 5(t0)
- lbu t7, 6(t0)
- lbu t8, 7(t0)
- addiu t1, t1, -128
- addiu t2, t2, -128
- addiu t3, t3, -128
- addiu t4, t4, -128
- addiu t5, t5, -128
- addiu t6, t6, -128
- addiu t7, t7, -128
- addiu t8, t8, -128
- mtc1 t1, f2
- mtc1 t2, f4
- mtc1 t3, f6
- mtc1 t4, f8
- mtc1 t5, f10
- mtc1 t6, f12
- mtc1 t7, f14
- mtc1 t8, f16
- cvt.s.w f2, f2
- cvt.s.w f4, f4
- cvt.s.w f6, f6
- cvt.s.w f8, f8
- cvt.s.w f10, f10
- cvt.s.w f12, f12
- cvt.s.w f14, f14
- cvt.s.w f16, f16
- lw t0, 24(a0)
- swc1 f2, 160(a2)
- swc1 f4, 164(a2)
- swc1 f6, 168(a2)
- addu t0, t0, a1
- swc1 f8, 172(a2)
- swc1 f10, 176(a2)
- swc1 f12, 180(a2)
- swc1 f14, 184(a2)
- swc1 f16, 188(a2)
- /* elemr 6 */
- lbu t1, 0(t0)
- lbu t2, 1(t0)
- lbu t3, 2(t0)
- lbu t4, 3(t0)
- lbu t5, 4(t0)
- lbu t6, 5(t0)
- lbu t7, 6(t0)
- lbu t8, 7(t0)
- addiu t1, t1, -128
- addiu t2, t2, -128
- addiu t3, t3, -128
- addiu t4, t4, -128
- addiu t5, t5, -128
- addiu t6, t6, -128
- addiu t7, t7, -128
- addiu t8, t8, -128
- mtc1 t1, f2
- mtc1 t2, f4
- mtc1 t3, f6
- mtc1 t4, f8
- mtc1 t5, f10
- mtc1 t6, f12
- mtc1 t7, f14
- mtc1 t8, f16
- cvt.s.w f2, f2
- cvt.s.w f4, f4
- cvt.s.w f6, f6
- cvt.s.w f8, f8
- cvt.s.w f10, f10
- cvt.s.w f12, f12
- cvt.s.w f14, f14
- cvt.s.w f16, f16
- lw t0, 28(a0)
- swc1 f2, 192(a2)
- swc1 f4, 196(a2)
- swc1 f6, 200(a2)
- addu t0, t0, a1
- swc1 f8, 204(a2)
- swc1 f10, 208(a2)
- swc1 f12, 212(a2)
- swc1 f14, 216(a2)
- swc1 f16, 220(a2)
- /* elemr 7 */
- lbu t1, 0(t0)
- lbu t2, 1(t0)
- lbu t3, 2(t0)
- lbu t4, 3(t0)
- lbu t5, 4(t0)
- lbu t6, 5(t0)
- lbu t7, 6(t0)
- lbu t8, 7(t0)
- addiu t1, t1, -128
- addiu t2, t2, -128
- addiu t3, t3, -128
- addiu t4, t4, -128
- addiu t5, t5, -128
- addiu t6, t6, -128
- addiu t7, t7, -128
- addiu t8, t8, -128
- mtc1 t1, f2
- mtc1 t2, f4
- mtc1 t3, f6
- mtc1 t4, f8
- mtc1 t5, f10
- mtc1 t6, f12
- mtc1 t7, f14
- mtc1 t8, f16
- cvt.s.w f2, f2
- cvt.s.w f4, f4
- cvt.s.w f6, f6
- cvt.s.w f8, f8
- cvt.s.w f10, f10
- cvt.s.w f12, f12
- cvt.s.w f14, f14
- cvt.s.w f16, f16
- swc1 f2, 224(a2)
- swc1 f4, 228(a2)
- swc1 f6, 232(a2)
- swc1 f8, 236(a2)
- swc1 f10, 240(a2)
- swc1 f12, 244(a2)
- swc1 f14, 248(a2)
- swc1 f16, 252(a2)
- j ra
- nop
- END(jsimd_convsamp_float_dspr2)
- #endif
- /*****************************************************************************/
|