1 /* 2 * Copyright 2021 TiKV Project Authors. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 * 16 */ 17 18 package org.tikv.common.columnar; 19 20 import java.math.BigDecimal; 21 import org.tikv.common.types.DataType; 22 23 /** 24 * An interface is mostly copied from Spark's ColumnVector (we do not link it here because we do not 25 * want to pollute tikv java client's dependencies). 26 * 27 * <p>Most of the APIs take the rowId as a parameter. This is the batch local 0-based row id for 28 * values in this TiColumnVector. 29 * 30 * <p>Spark only calls specific `get` method according to the data type of this {@link 31 * TiColumnVector}, e.g. if it's int type, Spark is guaranteed to only call {@link #getInt(int)} or 32 * {@link #getInts(int, int)}. 33 * 34 * <p>TiColumnVector is expected to be reused during the entire data loading process, to avoid 35 * allocating memory again and again. 36 */ 37 public abstract class TiColumnVector implements AutoCloseable { 38 39 private final int numOfRows; 40 /** Data type for this column. */ 41 protected DataType type; 42 43 /** Sets up the data type of this column vector. */ 44 protected TiColumnVector(DataType type, int numOfRows) { 45 this.type = type; 46 this.numOfRows = numOfRows; 47 } 48 49 /** Returns the data type of this column vector. */ 50 public final DataType dataType() { 51 return type; 52 } 53 54 /** 55 * Cleans up memory for this column vector. The column vector is not usable after this. 56 * 57 * <p>This overwrites `AutoCloseable.close` to remove the `throws` clause, as column vector is 58 * in-memory and we don't expect any exception to happen during closing. 59 */ 60 @Override 61 public abstract void close(); 62 63 /** Returns true if this column vector contains any null values. */ 64 public abstract boolean hasNull(); 65 66 /** Returns the number of nulls in this column vector. */ 67 public abstract int numNulls(); 68 69 /** Returns whether the value at rowId is NULL. */ 70 public abstract boolean isNullAt(int rowId); 71 72 /** 73 * Returns the boolean type value for rowId. The return value is undefined and can be anything, if 74 * the slot for rowId is null. 75 */ 76 public abstract boolean getBoolean(int rowId); 77 78 /** 79 * Gets boolean type values from [rowId, rowId + count). The return values for the null slots are 80 * undefined and can be anything. 81 */ 82 public boolean[] getBooleans(int rowId, int count) { 83 boolean[] res = new boolean[count]; 84 for (int i = 0; i < count; i++) { 85 res[i] = getBoolean(rowId + i); 86 } 87 return res; 88 } 89 90 /** 91 * Returns the byte type value for rowId. The return value is undefined and can be anything, if 92 * the slot for rowId is null. 93 */ 94 public abstract byte getByte(int rowId); 95 96 /** 97 * Gets byte type values from [rowId, rowId + count). The return values for the null slots are 98 * undefined and can be anything. 99 */ 100 public byte[] getBytes(int rowId, int count) { 101 byte[] res = new byte[count]; 102 for (int i = 0; i < count; i++) { 103 res[i] = getByte(rowId + i); 104 } 105 return res; 106 } 107 108 /** 109 * Returns the short type value for rowId. The return value is undefined and can be anything, if 110 * the slot for rowId is null. 111 */ 112 public abstract short getShort(int rowId); 113 114 /** 115 * Gets short type values from [rowId, rowId + count). The return values for the null slots are 116 * undefined and can be anything. 117 */ 118 public short[] getShorts(int rowId, int count) { 119 short[] res = new short[count]; 120 for (int i = 0; i < count; i++) { 121 res[i] = getShort(rowId + i); 122 } 123 return res; 124 } 125 126 /** 127 * Returns the int type value for rowId. The return value is undefined and can be anything, if the 128 * slot for rowId is null. 129 */ 130 public abstract int getInt(int rowId); 131 132 /** 133 * Gets int type values from [rowId, rowId + count). The return values for the null slots are 134 * undefined and can be anything. 135 */ 136 public int[] getInts(int rowId, int count) { 137 int[] res = new int[count]; 138 for (int i = 0; i < count; i++) { 139 res[i] = getInt(rowId + i); 140 } 141 return res; 142 } 143 144 /** 145 * Returns the long type value for rowId. The return value is undefined and can be anything, if 146 * the slot for rowId is null. 147 */ 148 public abstract long getLong(int rowId); 149 150 /** 151 * Gets long type values from [rowId, rowId + count). The return values for the null slots are 152 * undefined and can be anything. 153 */ 154 public long[] getLongs(int rowId, int count) { 155 long[] res = new long[count]; 156 for (int i = 0; i < count; i++) { 157 res[i] = getLong(rowId + i); 158 } 159 return res; 160 } 161 162 /** 163 * Returns the float type value for rowId. The return value is undefined and can be anything, if 164 * the slot for rowId is null. 165 */ 166 public abstract float getFloat(int rowId); 167 168 /** 169 * Gets float type values from [rowId, rowId + count). The return values for the null slots are 170 * undefined and can be anything. 171 */ 172 public float[] getFloats(int rowId, int count) { 173 float[] res = new float[count]; 174 for (int i = 0; i < count; i++) { 175 res[i] = getFloat(rowId + i); 176 } 177 return res; 178 } 179 180 /** 181 * Returns the double type value for rowId. The return value is undefined and can be anything, if 182 * the slot for rowId is null. 183 */ 184 public abstract double getDouble(int rowId); 185 186 /** 187 * Gets double type values from [rowId, rowId + count). The return values for the null slots are 188 * undefined and can be anything. 189 */ 190 public double[] getDoubles(int rowId, int count) { 191 double[] res = new double[count]; 192 for (int i = 0; i < count; i++) { 193 res[i] = getDouble(rowId + i); 194 } 195 return res; 196 } 197 198 /** 199 * Returns the decimal type value for rowId. If the slot for rowId is null, it should return null. 200 */ 201 public abstract BigDecimal getDecimal(int rowId, int precision, int scale); 202 203 /** 204 * Returns the string type value for rowId. If the slot for rowId is null, it should return null. 205 * Note that the returned UTF8String may point to the data of this column vector, please copy it 206 * if you want to keep it after this column vector is freed. 207 */ 208 public abstract String getUTF8String(int rowId); 209 210 /** 211 * Returns the binary type value for rowId. If the slot for rowId is null, it should return null. 212 */ 213 public abstract byte[] getBinary(int rowId); 214 215 /** @return child [[TiColumnVector]] at the given ordinal. */ 216 protected abstract TiColumnVector getChild(int ordinal); 217 218 public int numOfRows() { 219 return numOfRows; 220 } 221 }