|
31 | 31 | all_schemas = {} |
32 | 32 |
|
33 | 33 | all_schemas["customer"] = [ |
34 | | - ("C_CUSTKEY", pyarrow.int32()), |
| 34 | + ("C_CUSTKEY", pyarrow.int64()), |
35 | 35 | ("C_NAME", pyarrow.string()), |
36 | 36 | ("C_ADDRESS", pyarrow.string()), |
37 | | - ("C_NATIONKEY", pyarrow.int32()), |
| 37 | + ("C_NATIONKEY", pyarrow.int64()), |
38 | 38 | ("C_PHONE", pyarrow.string()), |
39 | 39 | ("C_ACCTBAL", pyarrow.decimal128(15, 2)), |
40 | 40 | ("C_MKTSEGMENT", pyarrow.string()), |
41 | 41 | ("C_COMMENT", pyarrow.string()), |
42 | 42 | ] |
43 | 43 |
|
44 | 44 | all_schemas["lineitem"] = [ |
45 | | - ("L_ORDERKEY", pyarrow.int32()), |
46 | | - ("L_PARTKEY", pyarrow.int32()), |
47 | | - ("L_SUPPKEY", pyarrow.int32()), |
| 45 | + ("L_ORDERKEY", pyarrow.int64()), |
| 46 | + ("L_PARTKEY", pyarrow.int64()), |
| 47 | + ("L_SUPPKEY", pyarrow.int64()), |
48 | 48 | ("L_LINENUMBER", pyarrow.int32()), |
49 | 49 | ("L_QUANTITY", pyarrow.decimal128(15, 2)), |
50 | 50 | ("L_EXTENDEDPRICE", pyarrow.decimal128(15, 2)), |
|
61 | 61 | ] |
62 | 62 |
|
63 | 63 | all_schemas["nation"] = [ |
64 | | - ("N_NATIONKEY", pyarrow.int32()), |
| 64 | + ("N_NATIONKEY", pyarrow.int64()), |
65 | 65 | ("N_NAME", pyarrow.string()), |
66 | | - ("N_REGIONKEY", pyarrow.int32()), |
| 66 | + ("N_REGIONKEY", pyarrow.int64()), |
67 | 67 | ("N_COMMENT", pyarrow.string()), |
68 | 68 | ] |
69 | 69 |
|
70 | 70 | all_schemas["orders"] = [ |
71 | | - ("O_ORDERKEY", pyarrow.int32()), |
72 | | - ("O_CUSTKEY", pyarrow.int32()), |
| 71 | + ("O_ORDERKEY", pyarrow.int64()), |
| 72 | + ("O_CUSTKEY", pyarrow.int64()), |
73 | 73 | ("O_ORDERSTATUS", pyarrow.string()), |
74 | 74 | ("O_TOTALPRICE", pyarrow.decimal128(15, 2)), |
75 | 75 | ("O_ORDERDATE", pyarrow.date32()), |
|
80 | 80 | ] |
81 | 81 |
|
82 | 82 | all_schemas["part"] = [ |
83 | | - ("P_PARTKEY", pyarrow.int32()), |
| 83 | + ("P_PARTKEY", pyarrow.int64()), |
84 | 84 | ("P_NAME", pyarrow.string()), |
85 | 85 | ("P_MFGR", pyarrow.string()), |
86 | 86 | ("P_BRAND", pyarrow.string()), |
|
92 | 92 | ] |
93 | 93 |
|
94 | 94 | all_schemas["partsupp"] = [ |
95 | | - ("PS_PARTKEY", pyarrow.int32()), |
96 | | - ("PS_SUPPKEY", pyarrow.int32()), |
| 95 | + ("PS_PARTKEY", pyarrow.int64()), |
| 96 | + ("PS_SUPPKEY", pyarrow.int64()), |
97 | 97 | ("PS_AVAILQTY", pyarrow.int32()), |
98 | 98 | ("PS_SUPPLYCOST", pyarrow.decimal128(15, 2)), |
99 | 99 | ("PS_COMMENT", pyarrow.string()), |
100 | 100 | ] |
101 | 101 |
|
102 | 102 | all_schemas["region"] = [ |
103 | | - ("r_REGIONKEY", pyarrow.int32()), |
| 103 | + ("r_REGIONKEY", pyarrow.int64()), |
104 | 104 | ("r_NAME", pyarrow.string()), |
105 | 105 | ("r_COMMENT", pyarrow.string()), |
106 | 106 | ] |
107 | 107 |
|
108 | 108 | all_schemas["supplier"] = [ |
109 | | - ("S_SUPPKEY", pyarrow.int32()), |
| 109 | + ("S_SUPPKEY", pyarrow.int64()), |
110 | 110 | ("S_NAME", pyarrow.string()), |
111 | 111 | ("S_ADDRESS", pyarrow.string()), |
112 | 112 | ("S_NATIONKEY", pyarrow.int32()), |
|
125 | 125 | # in to handle the trailing | in the file |
126 | 126 | output_cols = [r[0] for r in curr_schema] |
127 | 127 |
|
| 128 | + curr_schema = [ pyarrow.field(r[0], r[1], nullable=False) for r in curr_schema] |
| 129 | + |
128 | 130 | # Trailing | requires extra field for in processing |
129 | 131 | curr_schema.append(("some_null", pyarrow.null())) |
130 | 132 |
|
|
0 commit comments