View Javadoc
1   /*
2    * Copyright 2021 TiKV Project Authors.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    * http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   *
16   */
17  
18  package org.tikv.common.columnar;
19  
20  import com.google.common.primitives.UnsignedLong;
21  import java.math.BigDecimal;
22  import java.nio.ByteBuffer;
23  import java.sql.Timestamp;
24  import java.text.SimpleDateFormat;
25  import java.util.Date;
26  import org.joda.time.LocalDate;
27  import org.tikv.common.codec.CodecDataInput;
28  import org.tikv.common.codec.MyDecimal;
29  import org.tikv.common.exception.UnsupportedSyntaxException;
30  import org.tikv.common.types.*;
31  import org.tikv.common.util.JsonUtils;
32  
33  /** An implementation of {@link TiColumnVector}. All data is stored in TiDB chunk format. */
34  public class TiChunkColumnVector extends TiColumnVector {
35    /** Represents the length of each different data type */
36    private final int fixLength;
37    /** Represents how many nulls in this column vector */
38    private final int numOfNulls;
39    /** Can be used to determine data at rowId is null or not */
40    private final byte[] nullBitMaps;
41    /** Can be used to read non-fixed length data type such as string */
42    private final long[] offsets;
43  
44    private final ByteBuffer data;
45  
46    public TiChunkColumnVector(
47        DataType dataType,
48        int fixLength,
49        int numOfRows,
50        int numOfNulls,
51        byte[] nullBitMaps,
52        long[] offsets,
53        ByteBuffer data) {
54      super(dataType, numOfRows);
55      this.fixLength = fixLength;
56      this.numOfNulls = numOfNulls;
57      this.nullBitMaps = nullBitMaps;
58      this.data = data;
59      this.offsets = offsets;
60    }
61  
62    public final String typeName() {
63      return dataType().getType().name();
64    }
65  
66    // TODO: once we switch off_heap mode, we need control memory access pattern.
67    public void free() {}
68  
69    /**
70     * Cleans up memory for this column vector. The column vector is not usable after this.
71     *
72     * <p>This overwrites `AutoCloseable.close` to remove the `throws` clause, as column vector is
73     * in-memory and we don't expect any exception to happen during closing.
74     */
75    @Override
76    public void close() {}
77  
78    /** Returns true if this column vector contains any null values. */
79    @Override
80    public boolean hasNull() {
81      return numOfNulls > 0;
82    }
83  
84    /** Returns the number of nulls in this column vector. */
85    @Override
86    public int numNulls() {
87      return numOfNulls;
88    }
89  
90    public boolean isNullAt(int rowId) {
91      int nullByte = this.nullBitMaps[rowId / 8] & 0XFF;
92      return (nullByte & (1 << (rowId & 7))) == 0;
93    }
94  
95    /**
96     * Returns the boolean type value for rowId. The return value is undefined and can be anything, if
97     * the slot for rowId is null.
98     */
99    @Override
100   public boolean getBoolean(int rowId) {
101     return false;
102   }
103 
104   public byte getByte(int rowId) {
105     return data.get();
106   }
107 
108   public short getShort(int rowId) {
109     return data.getShort();
110   }
111 
112   public int getInt(int rowId) {
113     return (int) getLong(rowId);
114   }
115 
116   private boolean isDataTimeOrTimestamp() {
117     return type instanceof DateTimeType || type instanceof TimestampType;
118   }
119 
120   private long getTime(int rowId) {
121     int startPos = rowId * fixLength;
122     TiCoreTime coreTime = new TiCoreTime(data.getLong(startPos));
123 
124     int year = coreTime.getYear();
125     int month = coreTime.getMonth();
126     int day = coreTime.getDay();
127     int hour = coreTime.getHour();
128     int minute = coreTime.getMinute();
129     int second = coreTime.getSecond();
130     long microsecond = coreTime.getMicroSecond();
131     boolean zeroDate = false, zeroTime = false;
132     boolean zeroInDate = false;
133     if (year == 0 && month == 0 && day == 0) {
134       zeroDate = true;
135     }
136     if (hour == 0 && minute == 0 && microsecond == 0) {
137       zeroTime = true;
138     }
139     if (month == 0 || day == 0) {
140       zeroInDate = true;
141     }
142     // This behavior can be modified using the zeroDateTimeBehavior configuration property.
143     // The allowable values are:
144     //    * exception (the default), which throws an SQLException with an SQLState of S1009.
145     //    * convertToNull, which returns NULL instead of the date.
146     //    * round, which rounds the date to the nearest closest value which is 0001-01-01.
147     if (zeroDate && zeroTime) {
148       year = 1;
149       month = 1;
150       day = 1;
151     } else if (!zeroDate && zeroInDate) {
152       String dateString = String.format("%04d-%02d-%02d", year, month, day);
153       try {
154         Date d = new SimpleDateFormat("yyyy-MM-dd").parse(dateString);
155         year = d.getYear() + 1900;
156         month = d.getMonth() + 1;
157         day = d.getDate();
158       } catch (Exception e) {
159         throw new UnsupportedSyntaxException("illegal date value: " + dateString);
160       }
161     }
162     if (this.type instanceof DateType) {
163       LocalDate date = new LocalDate(year, month, day);
164       return ((DateType) type).getDays(date);
165     } else if (type instanceof DateTimeType || type instanceof TimestampType) {
166       // only return microsecond from epoch.
167       Timestamp ts =
168           new Timestamp(
169               year - 1900, month - 1, day, hour, minute, second, (int) microsecond * 1000);
170       return ts.getTime() / 1000 * 1000000 + ts.getNanos() / 1000;
171     } else {
172       throw new UnsupportedOperationException("data, datetime, timestamp are already handled.");
173     }
174   }
175 
176   private long getLongFromBinary(int rowId) {
177     byte[] bytes = getBinary(rowId);
178     if (bytes.length == 0) return 0;
179     long result = 0;
180     for (byte b : bytes) {
181       result = (result << 8) | (b & 0xff);
182     }
183     return result;
184   }
185 
186   public long getLong(int rowId) {
187     if (type instanceof IntegerType) {
188       if (type instanceof BitType) {
189         return getLongFromBinary(rowId);
190       }
191       return data.getLong(rowId * fixLength);
192     } else if (type instanceof AbstractDateTimeType) {
193       return getTime(rowId);
194     } else if (type instanceof TimeType) {
195       return data.getLong(rowId * fixLength);
196     }
197 
198     throw new UnsupportedOperationException("only IntegerType and Time related are supported.");
199   }
200 
201   public float getFloat(int rowId) {
202     return data.getFloat(rowId * fixLength);
203   }
204 
205   public double getDouble(int rowId) {
206     return data.getDouble(rowId * fixLength);
207   }
208 
209   private MyDecimal getMyDecimal(int rowId) {
210     int startPos = rowId * fixLength;
211     int digitsInt = data.get(startPos);
212     int digitsFrac = data.get(startPos + 1);
213     int resultFrac = data.get(startPos + 2);
214     boolean negative = data.get(startPos + 3) == 1;
215     int[] wordBuf = new int[9];
216     for (int i = 0; i < 9; i++) {
217       wordBuf[i] = data.getInt(startPos + 4 + i * 4);
218     }
219 
220     return new MyDecimal(digitsInt, digitsFrac, negative, wordBuf);
221   }
222   /**
223    * Returns the decimal type value for rowId. If the slot for rowId is null, it should return null.
224    */
225   @Override
226   /** digitsInt int8 1 digitsFrac int8 1 resultFrac int8 1 negative bool 1 wordBuf int32[9] 36 */
227   public BigDecimal getDecimal(int rowId, int precision, int scale) {
228     // this is to handle unsigned long to avoid overflow.
229     if (type instanceof IntegerType) {
230       return new BigDecimal(UnsignedLong.fromLongBits(this.getLong(rowId)).bigIntegerValue());
231     }
232     // TODO figure out how to use precision and scale
233     MyDecimal decimal = getMyDecimal(rowId);
234     return decimal.toBigDecimal();
235   }
236 
237   private String getEnumString(int rowId) {
238     int start = (int) this.offsets[rowId];
239     long end = this.offsets[rowId + 1];
240     return new String(getRawBinary(start + 8, end));
241   }
242 
243   private String getJsonString(int rowId) {
244     long start = this.offsets[rowId];
245     long end = this.offsets[rowId + 1];
246     return JsonUtils.parseJson(new CodecDataInput(getRawBinary(start, end))).toString();
247   }
248 
249   public String getUTF8String(int rowId) {
250     if (type instanceof EnumType) {
251       return getEnumString(rowId);
252     }
253 
254     if (type instanceof JsonType) {
255       return getJsonString(rowId);
256     }
257 
258     return new String(getBinary(rowId));
259   }
260 
261   private byte[] getRawBinary(long start, long end) {
262     byte[] buffer = new byte[(int) (end - start)];
263     for (int i = 0; i < (end - start); i++) {
264       buffer[i] = data.get((int) (start + i));
265     }
266     return buffer;
267   }
268 
269   /**
270    * Returns the binary type value for rowId. If the slot for rowId is null, it should return null.
271    */
272   @Override
273   public byte[] getBinary(int rowId) {
274     int start = (int) this.offsets[rowId];
275     long end = this.offsets[rowId + 1];
276     return getRawBinary(start, end);
277   }
278 
279   /** @return child [[TiColumnVector]] at the given ordinal. */
280   @Override
281   protected TiColumnVector getChild(int ordinal) {
282     throw new UnsupportedOperationException("TiChunkColumnVector does not support this operation");
283   }
284 }