1 /*
2 * Copyright 2021 TiKV Project Authors.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 */
17
18 package org.tikv.common.columnar;
19
20 import java.math.BigDecimal;
21 import org.tikv.common.types.DataType;
22
23 /**
24 * An interface is mostly copied from Spark's ColumnVector (we do not link it here because we do not
25 * want to pollute tikv java client's dependencies).
26 *
27 * <p>Most of the APIs take the rowId as a parameter. This is the batch local 0-based row id for
28 * values in this TiColumnVector.
29 *
30 * <p>Spark only calls specific `get` method according to the data type of this {@link
31 * TiColumnVector}, e.g. if it's int type, Spark is guaranteed to only call {@link #getInt(int)} or
32 * {@link #getInts(int, int)}.
33 *
34 * <p>TiColumnVector is expected to be reused during the entire data loading process, to avoid
35 * allocating memory again and again.
36 */
37 public abstract class TiColumnVector implements AutoCloseable {
38
39 private final int numOfRows;
40 /** Data type for this column. */
41 protected DataType type;
42
43 /** Sets up the data type of this column vector. */
44 protected TiColumnVector(DataType type, int numOfRows) {
45 this.type = type;
46 this.numOfRows = numOfRows;
47 }
48
49 /** Returns the data type of this column vector. */
50 public final DataType dataType() {
51 return type;
52 }
53
54 /**
55 * Cleans up memory for this column vector. The column vector is not usable after this.
56 *
57 * <p>This overwrites `AutoCloseable.close` to remove the `throws` clause, as column vector is
58 * in-memory and we don't expect any exception to happen during closing.
59 */
60 @Override
61 public abstract void close();
62
63 /** Returns true if this column vector contains any null values. */
64 public abstract boolean hasNull();
65
66 /** Returns the number of nulls in this column vector. */
67 public abstract int numNulls();
68
69 /** Returns whether the value at rowId is NULL. */
70 public abstract boolean isNullAt(int rowId);
71
72 /**
73 * Returns the boolean type value for rowId. The return value is undefined and can be anything, if
74 * the slot for rowId is null.
75 */
76 public abstract boolean getBoolean(int rowId);
77
78 /**
79 * Gets boolean type values from [rowId, rowId + count). The return values for the null slots are
80 * undefined and can be anything.
81 */
82 public boolean[] getBooleans(int rowId, int count) {
83 boolean[] res = new boolean[count];
84 for (int i = 0; i < count; i++) {
85 res[i] = getBoolean(rowId + i);
86 }
87 return res;
88 }
89
90 /**
91 * Returns the byte type value for rowId. The return value is undefined and can be anything, if
92 * the slot for rowId is null.
93 */
94 public abstract byte getByte(int rowId);
95
96 /**
97 * Gets byte type values from [rowId, rowId + count). The return values for the null slots are
98 * undefined and can be anything.
99 */
100 public byte[] getBytes(int rowId, int count) {
101 byte[] res = new byte[count];
102 for (int i = 0; i < count; i++) {
103 res[i] = getByte(rowId + i);
104 }
105 return res;
106 }
107
108 /**
109 * Returns the short type value for rowId. The return value is undefined and can be anything, if
110 * the slot for rowId is null.
111 */
112 public abstract short getShort(int rowId);
113
114 /**
115 * Gets short type values from [rowId, rowId + count). The return values for the null slots are
116 * undefined and can be anything.
117 */
118 public short[] getShorts(int rowId, int count) {
119 short[] res = new short[count];
120 for (int i = 0; i < count; i++) {
121 res[i] = getShort(rowId + i);
122 }
123 return res;
124 }
125
126 /**
127 * Returns the int type value for rowId. The return value is undefined and can be anything, if the
128 * slot for rowId is null.
129 */
130 public abstract int getInt(int rowId);
131
132 /**
133 * Gets int type values from [rowId, rowId + count). The return values for the null slots are
134 * undefined and can be anything.
135 */
136 public int[] getInts(int rowId, int count) {
137 int[] res = new int[count];
138 for (int i = 0; i < count; i++) {
139 res[i] = getInt(rowId + i);
140 }
141 return res;
142 }
143
144 /**
145 * Returns the long type value for rowId. The return value is undefined and can be anything, if
146 * the slot for rowId is null.
147 */
148 public abstract long getLong(int rowId);
149
150 /**
151 * Gets long type values from [rowId, rowId + count). The return values for the null slots are
152 * undefined and can be anything.
153 */
154 public long[] getLongs(int rowId, int count) {
155 long[] res = new long[count];
156 for (int i = 0; i < count; i++) {
157 res[i] = getLong(rowId + i);
158 }
159 return res;
160 }
161
162 /**
163 * Returns the float type value for rowId. The return value is undefined and can be anything, if
164 * the slot for rowId is null.
165 */
166 public abstract float getFloat(int rowId);
167
168 /**
169 * Gets float type values from [rowId, rowId + count). The return values for the null slots are
170 * undefined and can be anything.
171 */
172 public float[] getFloats(int rowId, int count) {
173 float[] res = new float[count];
174 for (int i = 0; i < count; i++) {
175 res[i] = getFloat(rowId + i);
176 }
177 return res;
178 }
179
180 /**
181 * Returns the double type value for rowId. The return value is undefined and can be anything, if
182 * the slot for rowId is null.
183 */
184 public abstract double getDouble(int rowId);
185
186 /**
187 * Gets double type values from [rowId, rowId + count). The return values for the null slots are
188 * undefined and can be anything.
189 */
190 public double[] getDoubles(int rowId, int count) {
191 double[] res = new double[count];
192 for (int i = 0; i < count; i++) {
193 res[i] = getDouble(rowId + i);
194 }
195 return res;
196 }
197
198 /**
199 * Returns the decimal type value for rowId. If the slot for rowId is null, it should return null.
200 */
201 public abstract BigDecimal getDecimal(int rowId, int precision, int scale);
202
203 /**
204 * Returns the string type value for rowId. If the slot for rowId is null, it should return null.
205 * Note that the returned UTF8String may point to the data of this column vector, please copy it
206 * if you want to keep it after this column vector is freed.
207 */
208 public abstract String getUTF8String(int rowId);
209
210 /**
211 * Returns the binary type value for rowId. If the slot for rowId is null, it should return null.
212 */
213 public abstract byte[] getBinary(int rowId);
214
215 /** @return child [[TiColumnVector]] at the given ordinal. */
216 protected abstract TiColumnVector getChild(int ordinal);
217
218 public int numOfRows() {
219 return numOfRows;
220 }
221 }