View Javadoc
1   /*
2    * Copyright 2021 TiKV Project Authors.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    * http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   *
16   */
17  
18  package org.tikv.common.codec;
19  
20  import java.math.BigDecimal;
21  import java.nio.charset.StandardCharsets;
22  import java.sql.Date;
23  import java.sql.Timestamp;
24  import java.util.Arrays;
25  import java.util.Comparator;
26  import java.util.List;
27  import org.joda.time.DateTime;
28  import org.joda.time.DateTimeZone;
29  import org.tikv.common.ExtendedDateTime;
30  import org.tikv.common.codec.Codec.DateTimeCodec;
31  import org.tikv.common.codec.Codec.DecimalCodec;
32  import org.tikv.common.codec.Codec.EnumCodec;
33  import org.tikv.common.exception.CodecException;
34  import org.tikv.common.exception.TypeException;
35  import org.tikv.common.meta.TiColumnInfo;
36  import org.tikv.common.types.Converter;
37  import org.tikv.common.types.DataType;
38  
39  public class RowEncoderV2 {
40    private static final long SIGN_MASK = 0x8000000000000000L;
41    private int numCols;
42    private Object[] values;
43    private org.tikv.common.codec.RowV2 row;
44  
45    public RowEncoderV2() {}
46  
47    public byte[] encode(List<TiColumnInfo> columnInfos, List<Object> values) {
48      this.row = org.tikv.common.codec.RowV2.createEmpty();
49      numCols = columnInfos.size();
50      for (int i = 0; i < numCols; i++) {
51        if (columnInfos.get(i).getId() > 255) {
52          this.row.large = true;
53        }
54        if (values.get(i) == null) {
55          this.row.numNullCols++;
56        } else {
57          this.row.numNotNullCols++;
58        }
59      }
60  
61      this.values = new Object[numCols];
62      reformatCols(columnInfos, values);
63      encodeRowCols(columnInfos);
64      return this.row.toBytes();
65    }
66  
67    private void reformatCols(List<TiColumnInfo> columnInfos, List<Object> valueList) {
68      int nullIdx = numCols - row.numNullCols;
69      int notNullIdx = 0;
70      if (this.row.large) {
71        row.initColIDs32();
72        row.initOffsets32();
73      } else {
74        row.initColIDs();
75        row.initOffsets();
76      }
77      for (int i = 0; i < numCols; i++) {
78        int colID = (int) columnInfos.get(i).getId();
79        Object value = valueList.get(i);
80        if (value == null) {
81          if (this.row.large) {
82            this.row.colIDs32[nullIdx] = colID;
83          } else {
84            this.row.colIDs[nullIdx] = (byte) colID;
85          }
86          nullIdx++;
87        } else {
88          if (this.row.large) {
89            this.row.colIDs32[notNullIdx] = colID;
90          } else {
91            this.row.colIDs[notNullIdx] = (byte) colID;
92          }
93          valueList.set(notNullIdx, value);
94          notNullIdx++;
95        }
96      }
97      // sort colIDs together with corresponding values
98      int len = this.row.numNotNullCols;
99      if (this.row.large) {
100       int[] temp = Arrays.copyOfRange(this.row.colIDs32, 0, len);
101       Integer[] idx = new Integer[len];
102       for (int i = 0; i < len; i++) {
103         idx[i] = i;
104       }
105       Arrays.sort(idx, Comparator.comparingInt(o -> this.row.colIDs32[o]));
106       for (int i = 0; i < len; i++) {
107         this.row.colIDs32[i] = temp[idx[i]];
108         this.values[i] = valueList.get(idx[i]);
109       }
110       if (this.row.numNullCols > 0) {
111         len = this.row.numNullCols;
112         int start = this.row.numNotNullCols;
113         temp = Arrays.copyOfRange(this.row.colIDs32, start, start + len);
114         idx = new Integer[len];
115         for (int i = 0; i < len; i++) {
116           idx[i] = i;
117         }
118         Arrays.sort(idx, Comparator.comparingInt(o -> this.row.colIDs32[start + o]));
119         for (int i = 0; i < len; i++) {
120           // values should all be null
121           this.row.colIDs32[start + i] = temp[idx[i]];
122         }
123       }
124     } else {
125       byte[] temp = Arrays.copyOfRange(this.row.colIDs, 0, len);
126       Integer[] idx = new Integer[len];
127       for (int i = 0; i < len; i++) {
128         idx[i] = i;
129       }
130       Arrays.sort(idx, Comparator.comparingInt(o -> this.row.colIDs[o]));
131       for (int i = 0; i < len; i++) {
132         this.row.colIDs[i] = temp[idx[i]];
133         this.values[i] = valueList.get(idx[i]);
134       }
135       if (this.row.numNullCols > 0) {
136         len = this.row.numNullCols;
137         int start = this.row.numNotNullCols;
138         temp = Arrays.copyOfRange(this.row.colIDs, start, start + len);
139         idx = new Integer[len];
140         for (int i = 0; i < len; i++) {
141           idx[i] = i;
142         }
143         Arrays.sort(idx, Comparator.comparingInt(o -> this.row.colIDs[start + o]));
144         for (int i = 0; i < len; i++) {
145           // values should all be null
146           this.row.colIDs[start + i] = temp[idx[i]];
147         }
148       }
149     }
150   }
151 
152   private TiColumnInfo getColumnInfoByID(List<TiColumnInfo> columnInfos, int id) {
153     for (TiColumnInfo columnInfo : columnInfos) {
154       if (columnInfo.getId() == id) {
155         return columnInfo;
156       }
157     }
158     throw new CodecException("column id " + id + " not found in ColumnInfo");
159   }
160 
161   private void encodeRowCols(List<TiColumnInfo> columnInfos) {
162     CodecDataOutputLittleEndian cdo = new CodecDataOutputLittleEndian();
163     for (int i = 0; i < this.row.numNotNullCols; i++) {
164       Object o = this.values[i];
165       if (this.row.large) {
166         encodeValue(cdo, o, getColumnInfoByID(columnInfos, this.row.colIDs32[i]).getType());
167       } else {
168         encodeValue(cdo, o, getColumnInfoByID(columnInfos, this.row.colIDs[i]).getType());
169       }
170       if (cdo.size() > 0xffff && !this.row.large) {
171         // only initialize once
172         this.row.initColIDs32();
173         for (int j = 0; j < numCols; j++) {
174           this.row.colIDs32[j] = this.row.colIDs[j];
175         }
176         this.row.initOffsets32();
177         if (numCols >= 0) {
178           System.arraycopy(this.row.offsets, 0, this.row.offsets32, 0, numCols);
179         }
180         this.row.large = true;
181       }
182       if (this.row.large) {
183         this.row.offsets32[i] = cdo.size();
184       } else {
185         this.row.offsets[i] = cdo.size();
186       }
187     }
188     this.row.data = cdo.toBytes();
189   }
190 
191   private void encodeValue(CodecDataOutput cdo, Object value, DataType tp) {
192     switch (tp.getType()) {
193       case TypeLonglong:
194       case TypeLong:
195       case TypeInt24:
196       case TypeShort:
197       case TypeTiny:
198         // TODO: encode consider unsigned
199         encodeInt(cdo, (long) value);
200         break;
201       case TypeFloat:
202       case TypeDouble:
203         if (value instanceof Double) {
204           encodeDouble(cdo, value);
205         } else if (value instanceof Float) {
206           encodeFloat(cdo, value);
207         } else {
208           throw new TypeException("type does not match in encoding, should be float/double");
209         }
210         break;
211       case TypeString:
212       case TypeVarString:
213       case TypeVarchar:
214       case TypeBlob:
215       case TypeTinyBlob:
216       case TypeMediumBlob:
217       case TypeLongBlob:
218         encodeString(cdo, value);
219         break;
220       case TypeNewDecimal:
221         encodeDecimal(cdo, value);
222         break;
223       case TypeBit:
224         encodeBit(cdo, value);
225         break;
226       case TypeTimestamp:
227         encodeTimestamp(cdo, value, DateTimeZone.UTC);
228         break;
229       case TypeDate:
230       case TypeDatetime:
231         encodeTimestamp(cdo, value, Converter.getLocalTimezone());
232         break;
233       case TypeDuration:
234       case TypeYear:
235         encodeInt(cdo, (long) value);
236         break;
237       case TypeEnum:
238         encodeEnum(cdo, value, tp.getElems());
239         break;
240       case TypeSet:
241         encodeSet(cdo, value, tp.getElems());
242         break;
243       case TypeJSON:
244         encodeJson(cdo, value);
245         break;
246       case TypeNull:
247         // ??
248       case TypeDecimal:
249       case TypeGeometry:
250       case TypeNewDate:
251         throw new CodecException("type should not appear in encoding");
252       default:
253         throw new CodecException("invalid data type: " + tp.getType().name());
254     }
255   }
256 
257   private void encodeInt(CodecDataOutput cdo, long value) {
258     if (value == (byte) value) {
259       cdo.writeByte((byte) value);
260     } else if (value == (short) value) {
261       cdo.writeShort((short) value);
262     } else if (value == (int) value) {
263       cdo.writeInt((int) value);
264     } else {
265       cdo.writeLong(value);
266     }
267   }
268 
269   private void encodeFloat(CodecDataOutput cdo, Object value) {
270     long u = Double.doubleToLongBits((float) value);
271     if ((float) value >= 0) {
272       u |= SIGN_MASK;
273     } else {
274       u = ~u;
275     }
276     u = Long.reverseBytes(u);
277     cdo.writeLong(u);
278   }
279 
280   private void encodeDouble(CodecDataOutput cdo, Object value) {
281     long u = Double.doubleToLongBits((double) value);
282     if ((double) value >= 0) {
283       u |= SIGN_MASK;
284     } else {
285       u = ~u;
286     }
287     u = Long.reverseBytes(u);
288     cdo.writeLong(u);
289   }
290 
291   private void encodeBit(CodecDataOutput cdo, Object value) {
292     long s = 0;
293     if (value instanceof Long) {
294       s = (long) value;
295     } else if (value instanceof byte[]) {
296       for (byte b : (byte[]) value) {
297         s <<= 8;
298         s |= b;
299       }
300     } else {
301       throw new CodecException("invalid bytes type " + value.getClass());
302     }
303     encodeInt(cdo, s);
304   }
305 
306   private void encodeTimestamp(CodecDataOutput cdo, Object value, DateTimeZone tz) {
307     if (value instanceof Timestamp) {
308       Timestamp timestamp = (Timestamp) value;
309       DateTime dateTime = new DateTime(timestamp.getTime());
310       int nanos = timestamp.getNanos();
311       ExtendedDateTime extendedDateTime = new ExtendedDateTime(dateTime, (nanos / 1000) % 1000);
312       long t = DateTimeCodec.toPackedLong(extendedDateTime, tz);
313       encodeInt(cdo, t);
314     } else if (value instanceof Date) {
315       ExtendedDateTime extendedDateTime =
316           new ExtendedDateTime(new DateTime(((Date) value).getTime()));
317       long t = DateTimeCodec.toPackedLong(extendedDateTime, tz);
318       encodeInt(cdo, t);
319     } else {
320       throw new CodecException("invalid timestamp type " + value.getClass());
321     }
322   }
323 
324   private void encodeString(CodecDataOutput cdo, Object value) {
325     if (value instanceof byte[]) {
326       cdo.write((byte[]) value);
327     } else if (value instanceof String) {
328       cdo.write(((String) value).getBytes(StandardCharsets.UTF_8));
329     } else {
330       throw new CodecException("invalid string type " + value.getClass());
331     }
332   }
333 
334   private void encodeDecimal(CodecDataOutput cdo, Object value) {
335     if (value instanceof MyDecimal) {
336       MyDecimal dec = (MyDecimal) value;
337       DecimalCodec.writeDecimal(cdo, dec, dec.precision(), dec.frac());
338     } else if (value instanceof BigDecimal) {
339       MyDecimal dec = new MyDecimal();
340       BigDecimal decimal = (BigDecimal) value;
341       int prec = decimal.precision();
342       int frac = decimal.scale();
343       dec.fromString(((BigDecimal) value).toPlainString());
344       DecimalCodec.writeDecimal(cdo, dec, prec, frac);
345     } else {
346       throw new CodecException("invalid decimal type " + value.getClass());
347     }
348   }
349 
350   private void encodeEnum(CodecDataOutput cdo, Object value, List<String> elems) {
351     if (value instanceof Integer) {
352       encodeInt(cdo, (int) value);
353     } else if (value instanceof String) {
354       int val = EnumCodec.parseEnumName((String) value, elems);
355       encodeInt(cdo, val);
356     } else {
357       throw new CodecException("invalid enum type " + value.getClass());
358     }
359   }
360 
361   private void encodeSet(CodecDataOutput cdo, Object value, List<String> elems) {
362     // TODO: Support encoding set
363     throw new CodecException("Set encoding is not yet supported.");
364   }
365 
366   private void encodeJson(CodecDataOutput cdo, Object value) {
367     // TODO: Support encoding JSON
368     throw new CodecException("JSON encoding is not yet supported.");
369   }
370 }