View Javadoc
1   /*
2    * Copyright 2021 TiKV Project Authors.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    * http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   *
16   */
17  
18  package org.tikv.common.columnar;
19  
20  import java.math.BigDecimal;
21  import org.tikv.common.types.DataType;
22  
23  /**
24   * An interface is mostly copied from Spark's ColumnVector (we do not link it here because we do not
25   * want to pollute tikv java client's dependencies).
26   *
27   * <p>Most of the APIs take the rowId as a parameter. This is the batch local 0-based row id for
28   * values in this TiColumnVector.
29   *
30   * <p>Spark only calls specific `get` method according to the data type of this {@link
31   * TiColumnVector}, e.g. if it's int type, Spark is guaranteed to only call {@link #getInt(int)} or
32   * {@link #getInts(int, int)}.
33   *
34   * <p>TiColumnVector is expected to be reused during the entire data loading process, to avoid
35   * allocating memory again and again.
36   */
37  public abstract class TiColumnVector implements AutoCloseable {
38  
39    private final int numOfRows;
40    /** Data type for this column. */
41    protected DataType type;
42  
43    /** Sets up the data type of this column vector. */
44    protected TiColumnVector(DataType type, int numOfRows) {
45      this.type = type;
46      this.numOfRows = numOfRows;
47    }
48  
49    /** Returns the data type of this column vector. */
50    public final DataType dataType() {
51      return type;
52    }
53  
54    /**
55     * Cleans up memory for this column vector. The column vector is not usable after this.
56     *
57     * <p>This overwrites `AutoCloseable.close` to remove the `throws` clause, as column vector is
58     * in-memory and we don't expect any exception to happen during closing.
59     */
60    @Override
61    public abstract void close();
62  
63    /** Returns true if this column vector contains any null values. */
64    public abstract boolean hasNull();
65  
66    /** Returns the number of nulls in this column vector. */
67    public abstract int numNulls();
68  
69    /** Returns whether the value at rowId is NULL. */
70    public abstract boolean isNullAt(int rowId);
71  
72    /**
73     * Returns the boolean type value for rowId. The return value is undefined and can be anything, if
74     * the slot for rowId is null.
75     */
76    public abstract boolean getBoolean(int rowId);
77  
78    /**
79     * Gets boolean type values from [rowId, rowId + count). The return values for the null slots are
80     * undefined and can be anything.
81     */
82    public boolean[] getBooleans(int rowId, int count) {
83      boolean[] res = new boolean[count];
84      for (int i = 0; i < count; i++) {
85        res[i] = getBoolean(rowId + i);
86      }
87      return res;
88    }
89  
90    /**
91     * Returns the byte type value for rowId. The return value is undefined and can be anything, if
92     * the slot for rowId is null.
93     */
94    public abstract byte getByte(int rowId);
95  
96    /**
97     * Gets byte type values from [rowId, rowId + count). The return values for the null slots are
98     * undefined and can be anything.
99     */
100   public byte[] getBytes(int rowId, int count) {
101     byte[] res = new byte[count];
102     for (int i = 0; i < count; i++) {
103       res[i] = getByte(rowId + i);
104     }
105     return res;
106   }
107 
108   /**
109    * Returns the short type value for rowId. The return value is undefined and can be anything, if
110    * the slot for rowId is null.
111    */
112   public abstract short getShort(int rowId);
113 
114   /**
115    * Gets short type values from [rowId, rowId + count). The return values for the null slots are
116    * undefined and can be anything.
117    */
118   public short[] getShorts(int rowId, int count) {
119     short[] res = new short[count];
120     for (int i = 0; i < count; i++) {
121       res[i] = getShort(rowId + i);
122     }
123     return res;
124   }
125 
126   /**
127    * Returns the int type value for rowId. The return value is undefined and can be anything, if the
128    * slot for rowId is null.
129    */
130   public abstract int getInt(int rowId);
131 
132   /**
133    * Gets int type values from [rowId, rowId + count). The return values for the null slots are
134    * undefined and can be anything.
135    */
136   public int[] getInts(int rowId, int count) {
137     int[] res = new int[count];
138     for (int i = 0; i < count; i++) {
139       res[i] = getInt(rowId + i);
140     }
141     return res;
142   }
143 
144   /**
145    * Returns the long type value for rowId. The return value is undefined and can be anything, if
146    * the slot for rowId is null.
147    */
148   public abstract long getLong(int rowId);
149 
150   /**
151    * Gets long type values from [rowId, rowId + count). The return values for the null slots are
152    * undefined and can be anything.
153    */
154   public long[] getLongs(int rowId, int count) {
155     long[] res = new long[count];
156     for (int i = 0; i < count; i++) {
157       res[i] = getLong(rowId + i);
158     }
159     return res;
160   }
161 
162   /**
163    * Returns the float type value for rowId. The return value is undefined and can be anything, if
164    * the slot for rowId is null.
165    */
166   public abstract float getFloat(int rowId);
167 
168   /**
169    * Gets float type values from [rowId, rowId + count). The return values for the null slots are
170    * undefined and can be anything.
171    */
172   public float[] getFloats(int rowId, int count) {
173     float[] res = new float[count];
174     for (int i = 0; i < count; i++) {
175       res[i] = getFloat(rowId + i);
176     }
177     return res;
178   }
179 
180   /**
181    * Returns the double type value for rowId. The return value is undefined and can be anything, if
182    * the slot for rowId is null.
183    */
184   public abstract double getDouble(int rowId);
185 
186   /**
187    * Gets double type values from [rowId, rowId + count). The return values for the null slots are
188    * undefined and can be anything.
189    */
190   public double[] getDoubles(int rowId, int count) {
191     double[] res = new double[count];
192     for (int i = 0; i < count; i++) {
193       res[i] = getDouble(rowId + i);
194     }
195     return res;
196   }
197 
198   /**
199    * Returns the decimal type value for rowId. If the slot for rowId is null, it should return null.
200    */
201   public abstract BigDecimal getDecimal(int rowId, int precision, int scale);
202 
203   /**
204    * Returns the string type value for rowId. If the slot for rowId is null, it should return null.
205    * Note that the returned UTF8String may point to the data of this column vector, please copy it
206    * if you want to keep it after this column vector is freed.
207    */
208   public abstract String getUTF8String(int rowId);
209 
210   /**
211    * Returns the binary type value for rowId. If the slot for rowId is null, it should return null.
212    */
213   public abstract byte[] getBinary(int rowId);
214 
215   /** @return child [[TiColumnVector]] at the given ordinal. */
216   protected abstract TiColumnVector getChild(int ordinal);
217 
218   public int numOfRows() {
219     return numOfRows;
220   }
221 }