VirtualBox

source: vbox/trunk/src/libs/libpng-1.6.45/arm/palette_neon_intrinsics.c@ 107935

Last change on this file since 107935 was 107813, checked in by vboxsync, 3 weeks ago

libpng-1.6.45: Applied and adjusted our libpng changes to 1.6.45. bugref:8515

  • Property svn:eol-style set to native
File size: 4.6 KB
Line 
1/* palette_neon_intrinsics.c - NEON optimised palette expansion functions
2 *
3 * Copyright (c) 2018-2019 Cosmin Truta
4 * Copyright (c) 2017-2018 Arm Holdings. All rights reserved.
5 * Written by Richard Townsend <[email protected]>, February 2017.
6 *
7 * This code is released under the libpng license.
8 * For conditions of distribution and use, see the disclaimer
9 * and license in png.h
10 */
11
12#include "../pngpriv.h"
13
14#if PNG_ARM_NEON_IMPLEMENTATION == 1
15
16#if defined(_MSC_VER) && !defined(__clang__) && defined(_M_ARM64)
17# include <arm64_neon.h>
18#else
19# include <arm_neon.h>
20#endif
21
22/* Build an RGBA8 palette from the separate RGB and alpha palettes. */
23void
24png_riffle_palette_neon(png_structrp png_ptr)
25{
26 png_const_colorp palette = png_ptr->palette;
27 png_bytep riffled_palette = png_ptr->riffled_palette;
28 png_const_bytep trans_alpha = png_ptr->trans_alpha;
29 int num_trans = png_ptr->num_trans;
30 int i;
31
32 /* Initially black, opaque. */
33 uint8x16x4_t w = {{
34 vdupq_n_u8(0x00),
35 vdupq_n_u8(0x00),
36 vdupq_n_u8(0x00),
37 vdupq_n_u8(0xff),
38 }};
39
40 png_debug(1, "in png_riffle_palette_neon");
41
42 /* First, riffle the RGB colours into an RGBA8 palette.
43 * The alpha component is set to opaque for now.
44 */
45 for (i = 0; i < 256; i += 16)
46 {
47 uint8x16x3_t v = vld3q_u8((png_const_bytep)(palette + i));
48 w.val[0] = v.val[0];
49 w.val[1] = v.val[1];
50 w.val[2] = v.val[2];
51 vst4q_u8(riffled_palette + (i << 2), w);
52 }
53
54 /* Fix up the missing transparency values. */
55 for (i = 0; i < num_trans; i++)
56 riffled_palette[(i << 2) + 3] = trans_alpha[i];
57}
58
59/* Expands a palettized row into RGBA8. */
60int
61png_do_expand_palette_rgba8_neon(png_structrp png_ptr, png_row_infop row_info,
62 png_const_bytep row, png_bytepp ssp, png_bytepp ddp)
63{
64 png_uint_32 row_width = row_info->width;
65 const png_uint_32 *riffled_palette =
66 png_aligncastconst(png_const_uint_32p, png_ptr->riffled_palette);
67 const png_uint_32 pixels_per_chunk = 4;
68 png_uint_32 i;
69
70 png_debug(1, "in png_do_expand_palette_rgba8_neon");
71
72 PNG_UNUSED(row)
73 if (row_width < pixels_per_chunk)
74 return 0;
75
76 /* This function originally gets the last byte of the output row.
77 * The NEON part writes forward from a given position, so we have
78 * to seek this back by 4 pixels x 4 bytes.
79 */
80 *ddp = *ddp - ((pixels_per_chunk * sizeof(png_uint_32)) - 1);
81
82 for (i = 0; i < row_width; i += pixels_per_chunk)
83 {
84 uint32x4_t cur;
85 png_bytep sp = *ssp - i, dp = *ddp - (i << 2);
86 cur = vld1q_dup_u32 (riffled_palette + *(sp - 3));
87 cur = vld1q_lane_u32(riffled_palette + *(sp - 2), cur, 1);
88 cur = vld1q_lane_u32(riffled_palette + *(sp - 1), cur, 2);
89 cur = vld1q_lane_u32(riffled_palette + *(sp - 0), cur, 3);
90 vst1q_u32((void *)dp, cur);
91 }
92 if (i != row_width)
93 {
94 /* Remove the amount that wasn't processed. */
95 i -= pixels_per_chunk;
96 }
97
98 /* Decrement output pointers. */
99 *ssp = *ssp - i;
100 *ddp = *ddp - (i << 2);
101 return i;
102}
103
104/* Expands a palettized row into RGB8. */
105int
106png_do_expand_palette_rgb8_neon(png_structrp png_ptr, png_row_infop row_info,
107 png_const_bytep row, png_bytepp ssp, png_bytepp ddp)
108{
109 png_uint_32 row_width = row_info->width;
110 png_const_bytep palette = (png_const_bytep)png_ptr->palette;
111 const png_uint_32 pixels_per_chunk = 8;
112 png_uint_32 i;
113
114 png_debug(1, "in png_do_expand_palette_rgb8_neon");
115
116 PNG_UNUSED(row)
117 if (row_width <= pixels_per_chunk)
118 return 0;
119
120 /* Seeking this back by 8 pixels x 3 bytes. */
121 *ddp = *ddp - ((pixels_per_chunk * sizeof(png_color)) - 1);
122
123 for (i = 0; i < row_width; i += pixels_per_chunk)
124 {
125 uint8x8x3_t cur;
126 png_bytep sp = *ssp - i, dp = *ddp - ((i << 1) + i);
127 cur = vld3_dup_u8(palette + sizeof(png_color) * (*(sp - 7)));
128 cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 6)), cur, 1);
129 cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 5)), cur, 2);
130 cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 4)), cur, 3);
131 cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 3)), cur, 4);
132 cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 2)), cur, 5);
133 cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 1)), cur, 6);
134 cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 0)), cur, 7);
135 vst3_u8((void *)dp, cur);
136 }
137
138 if (i != row_width)
139 {
140 /* Remove the amount that wasn't processed. */
141 i -= pixels_per_chunk;
142 }
143
144 /* Decrement output pointers. */
145 *ssp = *ssp - i;
146 *ddp = *ddp - ((i << 1) + i);
147 return i;
148}
149
150#endif /* PNG_ARM_NEON_IMPLEMENTATION */
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette