package org.apache.carbondata.spark.load;

import java.util.Comparator;
import java.util.List;
import org.apache.carbondata.common.logging.LogServiceFactory;
import org.apache.carbondata.converter.SparkDataTypeConverterImpl;
import org.apache.carbondata.core.constants.CarbonCommonConstants;
import org.apache.carbondata.core.datastore.row.CarbonRow;
import org.apache.carbondata.core.metadata.datatype.DataTypes;
import org.apache.carbondata.core.metadata.datatype.StructField;
import org.apache.carbondata.core.metadata.datatype.StructType;
import org.apache.carbondata.core.metadata.schema.table.CarbonTable;
import org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn;
import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension;
import org.apache.carbondata.core.segmentmeta.SegmentMetaDataInfo;
import org.apache.carbondata.core.statusmanager.LoadMetadataDetails;
import org.apache.carbondata.core.statusmanager.SegmentStatus;
import org.apache.carbondata.core.util.CarbonProperties;
import org.apache.carbondata.core.util.CarbonTaskInfo;
import org.apache.carbondata.core.util.CarbonUtil;
import org.apache.carbondata.core.util.DataLoadMetrics;
import org.apache.carbondata.core.util.DataTypeUtil;
import org.apache.carbondata.core.util.TaskMetricsMap;
import org.apache.carbondata.core.util.ThreadLocalSessionInfo;
import org.apache.carbondata.core.util.ThreadLocalTaskInfo;
import org.apache.carbondata.hadoop.CarbonProjection;
import org.apache.carbondata.hadoop.api.CarbonTableOutputFormat;
import org.apache.carbondata.processing.datatypes.GenericDataType;
import org.apache.carbondata.processing.loading.CarbonDataLoadConfiguration;
import org.apache.carbondata.processing.loading.DataField;
import org.apache.carbondata.processing.loading.DataLoadProcessBuilder;
import org.apache.carbondata.processing.loading.FailureCauses;
import org.apache.carbondata.processing.loading.csvinput.CSVInputFormat;
import org.apache.carbondata.processing.loading.model.CarbonLoadModel;
import org.apache.carbondata.processing.sort.sortdata.NewRowComparator;
import org.apache.carbondata.processing.sort.sortdata.NewRowComparatorForNormalDims;
import org.apache.carbondata.processing.sort.sortdata.SortParameters;
import org.apache.carbondata.processing.util.CarbonDataProcessorUtil;
import org.apache.carbondata.processing.util.TableOptionConstant;
import org.apache.carbondata.spark.rdd.CarbonScanRDD;
import org.apache.carbondata.spark.rdd.CarbonScanRDD$;
import org.apache.carbondata.spark.rdd.InsertTaskCompletionListener;
import org.apache.carbondata.spark.rdd.StringArrayRow;
import org.apache.carbondata.spark.util.CommonUtil$;
import org.apache.carbondata.spark.util.Util;
import org.apache.carbondata.store.CarbonRowReadSupport;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.log4j.Logger;
import org.apache.spark.CarbonInputMetrics;
import org.apache.spark.DataSkewRangePartitioner;
import org.apache.spark.SparkContext;
import org.apache.spark.TaskContext;
import org.apache.spark.TaskContext$;
import org.apache.spark.broadcast.Broadcast;
import org.apache.spark.rdd.RDD;
import org.apache.spark.rdd.RDD$;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.catalyst.InternalRow;
import org.apache.spark.sql.catalyst.expressions.GenericInternalRow;
import org.apache.spark.sql.execution.command.ExecutionErrors;
import org.apache.spark.sql.types.ByteType$;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.DateType$;
import org.apache.spark.sql.types.LongType$;
import org.apache.spark.sql.types.StringType$;
import org.apache.spark.sql.types.TimestampType$;
import org.apache.spark.sql.util.SparkSQLUtil$;
import org.apache.spark.sql.util.SparkTypeConverter$;
import org.apache.spark.storage.StorageLevel$;
import org.apache.spark.util.CollectionAccumulator;
import org.apache.spark.util.LongAccumulator;
import org.apache.spark.util.SerializableConfiguration;
import scala.Function1;
import scala.MatchError;
import scala.Option;
import scala.Option$;
import scala.Predef$;
import scala.Some;
import scala.Tuple2;
import scala.collection.Iterator;
import scala.collection.JavaConverters$;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.TraversableLike;
import scala.collection.TraversableOnce;
import scala.collection.immutable.Map;
import scala.collection.immutable.Nil$;
import scala.collection.mutable.ArrayOps;
import scala.collection.mutable.Buffer;
import scala.collection.mutable.Buffer$;
import scala.collection.mutable.Map$;
import scala.math.Ordering;
import scala.math.PartialOrdering;
import scala.reflect.ClassTag;
import scala.reflect.ClassTag$;
import scala.reflect.package$;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.LazyRef;
import scala.runtime.RichInt$;
import scala.runtime.ScalaRunTime$;

/* compiled from: DataLoadProcessBuilderOnSpark.scala */
/* loaded from: input_file:org/apache/carbondata/spark/load/DataLoadProcessBuilderOnSpark$.class */
public final class DataLoadProcessBuilderOnSpark$ {
    public static DataLoadProcessBuilderOnSpark$ MODULE$;
    private final Logger LOGGER;

    static {
        new DataLoadProcessBuilderOnSpark$();
    }

    private Logger LOGGER() {
        return this.LOGGER;
    }

    public Tuple2<String, Tuple2<LoadMetadataDetails, ExecutionErrors>>[] loadDataUsingGlobalSort(SparkSession sparkSession, Option<Dataset<Row>> option, CarbonLoadModel carbonLoadModel, Configuration configuration, CollectionAccumulator<Map<String, SegmentMetaDataInfo>> collectionAccumulator, boolean z) {
        RDD map;
        RDD mapPartitionsWithIndex;
        LazyRef lazyRef = new LazyRef();
        boolean z2 = false;
        if (option.isDefined()) {
            map = ((Dataset) option.get()).rdd();
        } else {
            z2 = true;
            int length = carbonLoadModel.getCsvHeaderColumns().length;
            map = CsvRDDHelper$.MODULE$.csvFileScanRDD(sparkSession, carbonLoadModel, configuration).map(internalRow -> {
                return DataLoadProcessorStepOnSpark$.MODULE$.toStringArrayRow(internalRow, length);
            }, ClassTag$.MODULE$.apply(StringArrayRow.class));
        }
        RDD rdd = map;
        SparkContext sparkContext = sparkSession.sparkContext();
        Broadcast broadcast = sparkContext.broadcast(carbonLoadModel, ClassTag$.MODULE$.apply(CarbonLoadModel.class));
        LongAccumulator longAccumulator = sparkContext.longAccumulator("Partial Success Accumulator");
        LongAccumulator longAccumulator2 = sparkContext.longAccumulator("Input Processor Accumulator");
        LongAccumulator longAccumulator3 = sparkContext.longAccumulator("Convert Processor Accumulator");
        LongAccumulator longAccumulator4 = sparkContext.longAccumulator("Sort Processor Accumulator");
        LongAccumulator longAccumulator5 = sparkContext.longAccumulator("Write Processor Accumulator");
        configuration.set(CarbonCommonConstants.CARBON_WRITTEN_BY_APPNAME, sparkSession.sparkContext().appName());
        Broadcast<SerializableConfiguration> broadCastHadoopConf = SparkSQLUtil$.MODULE$.broadCastHadoopConf(sparkContext, configuration);
        if (z2) {
            mapPartitionsWithIndex = rdd.mapPartitionsWithIndex((obj, iterator) -> {
                return $anonfun$loadDataUsingGlobalSort$2(broadcast, longAccumulator2, BoxesRunTime.unboxToInt(obj), iterator);
            }, rdd.mapPartitionsWithIndex$default$2(), ClassTag$.MODULE$.apply(CarbonRow.class));
        } else {
            RDD mapPartitions = rdd.mapPartitions(iterator2 -> {
                return DataLoadProcessorStepOnSpark$.MODULE$.toRDDIterator(iterator2, broadcast);
            }, rdd.mapPartitions$default$2(), ClassTag$.MODULE$.apply(ScalaRunTime$.MODULE$.arrayClass(Object.class)));
            mapPartitionsWithIndex = mapPartitions.mapPartitionsWithIndex((obj2, iterator3) -> {
                return $anonfun$loadDataUsingGlobalSort$4(broadcast, longAccumulator2, BoxesRunTime.unboxToInt(obj2), iterator3);
            }, mapPartitions.mapPartitionsWithIndex$default$2(), ClassTag$.MODULE$.apply(CarbonRow.class));
        }
        RDD rdd2 = mapPartitionsWithIndex;
        RDD filter = rdd2.mapPartitionsWithIndex((obj3, iterator4) -> {
            return $anonfun$loadDataUsingGlobalSort$5(broadCastHadoopConf, broadcast, longAccumulator, longAccumulator3, z, BoxesRunTime.unboxToInt(obj3), iterator4);
        }, rdd2.mapPartitionsWithIndex$default$2(), ClassTag$.MODULE$.apply(CarbonRow.class)).filter(carbonRow -> {
            return BoxesRunTime.boxToBoolean($anonfun$loadDataUsingGlobalSort$6(carbonRow));
        });
        CarbonDataLoadConfiguration createConfiguration = DataLoadProcessBuilder.createConfiguration(carbonLoadModel);
        SortParameters createSortParameters = SortParameters.createSortParameters(createConfiguration);
        Comparator newRowComparator = createSortParameters.getNoDictionaryCount() > 0 ? new NewRowComparator(createSortParameters.getNoDictionarySortColumn(), createSortParameters.getNoDictDataType()) : new NewRowComparatorForNormalDims(createSortParameters.getDimColCount());
        int globalSortPartitions = CarbonDataProcessorUtil.getGlobalSortPartitions(createConfiguration.getDataLoadProperty(CarbonCommonConstants.LOAD_GLOBAL_SORT_PARTITIONS));
        if (globalSortPartitions <= 0) {
            globalSortPartitions = filter.partitions().length;
            BoxedUnit boxedUnit = BoxedUnit.UNIT;
        } else if (System.getProperty("useIndexServer") != null) {
            filter.partitions();
        } else {
            BoxedUnit boxedUnit2 = BoxedUnit.UNIT;
        }
        if (globalSortPartitions > 1) {
            filter.persist(StorageLevel$.MODULE$.fromString(CarbonProperties.getInstance().getGlobalSortRddStorageLevel()));
        } else {
            BoxedUnit boxedUnit3 = BoxedUnit.UNIT;
        }
        RDD sortBy = filter.sortBy(carbonRow2 -> {
            return carbonRow2.getData();
        }, filter.sortBy$default$2(), globalSortPartitions, RowOrdering$2(lazyRef, newRowComparator), package$.MODULE$.classTag(ClassTag$.MODULE$.apply(ScalaRunTime$.MODULE$.arrayClass(Object.class))));
        sparkContext.runJob(sortBy.mapPartitionsWithIndex((obj4, iterator5) -> {
            return $anonfun$loadDataUsingGlobalSort$8(broadcast, longAccumulator4, BoxesRunTime.unboxToInt(obj4), iterator5);
        }, sortBy.mapPartitionsWithIndex$default$2(), ClassTag$.MODULE$.apply(CarbonRow.class)), (taskContext, iterator6) -> {
            $anonfun$loadDataUsingGlobalSort$9(carbonLoadModel, collectionAccumulator, broadcast, longAccumulator5, broadCastHadoopConf, taskContext, iterator6);
            return BoxedUnit.UNIT;
        }, ClassTag$.MODULE$.Unit());
        if (globalSortPartitions > 1) {
            filter.unpersist(false);
        } else {
            BoxedUnit boxedUnit4 = BoxedUnit.UNIT;
        }
        LOGGER().info(new StringBuilder(46).append("Total rows processed in step Input Processor: ").append(longAccumulator2.value()).toString());
        LOGGER().info(new StringBuilder(45).append("Total rows processed in step Data Converter: ").append(longAccumulator3.value()).toString());
        LOGGER().info(new StringBuilder(45).append("Total rows processed in step Sort Processor: ").append(longAccumulator4.value()).toString());
        LOGGER().info(new StringBuilder(42).append("Total rows processed in step Data Writer: ").append(longAccumulator5.value()).toString());
        return updateLoadStatus(carbonLoadModel, longAccumulator);
    }

    public boolean loadDataUsingGlobalSort$default$6() {
        return false;
    }

    public Tuple2<String, Tuple2<LoadMetadataDetails, ExecutionErrors>>[] insertDataUsingGlobalSortWithInternalRow(SparkSession sparkSession, RDD<InternalRow> rdd, CarbonLoadModel carbonLoadModel, Configuration configuration, CollectionAccumulator<Map<String, SegmentMetaDataInfo>> collectionAccumulator) {
        SparkContext sparkContext = sparkSession.sparkContext();
        Broadcast broadcast = sparkContext.broadcast(carbonLoadModel, ClassTag$.MODULE$.apply(CarbonLoadModel.class));
        LongAccumulator longAccumulator = sparkContext.longAccumulator("Partial Success Accumulator");
        LongAccumulator longAccumulator2 = sparkContext.longAccumulator("Sort Processor Accumulator");
        LongAccumulator longAccumulator3 = sparkContext.longAccumulator("Write Processor Accumulator");
        configuration.set(CarbonCommonConstants.CARBON_WRITTEN_BY_APPNAME, sparkSession.sparkContext().appName());
        Broadcast<SerializableConfiguration> broadCastHadoopConf = SparkSQLUtil$.MODULE$.broadCastHadoopConf(sparkContext, configuration);
        CarbonDataLoadConfiguration createConfiguration = DataLoadProcessBuilder.createConfiguration(carbonLoadModel);
        Seq seq = new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(Util.convertToSparkSchemaFromColumnSchema(carbonLoadModel.getCarbonDataLoadSchema().getCarbonTable(), true).fields())).toSeq();
        Seq seq2 = (Seq) seq.map(structField -> {
            return structField.dataType();
        }, Seq$.MODULE$.canBuildFrom());
        scala.collection.mutable.Map<String, GenericDataType<?>> apply = Map$.MODULE$.apply(Nil$.MODULE$);
        CommonUtil$.MODULE$.convertComplexDataType(apply, createConfiguration);
        RDD map = rdd.map(internalRow -> {
            return CommonUtil$.MODULE$.getObjectArrayFromInternalRowAndConvertComplexTypeForGlobalSort(internalRow, seq, apply);
        }, ClassTag$.MODULE$.apply(ScalaRunTime$.MODULE$.arrayClass(Object.class)));
        int globalSortPartitions = CarbonDataProcessorUtil.getGlobalSortPartitions(createConfiguration.getDataLoadProperty(CarbonCommonConstants.LOAD_GLOBAL_SORT_PARTITIONS));
        if (globalSortPartitions <= 0) {
            globalSortPartitions = rdd.partitions().length;
        }
        if (globalSortPartitions > 1) {
            map.persist(StorageLevel$.MODULE$.fromString(CarbonProperties.getInstance().getGlobalSortRddStorageLevel()));
        } else {
            BoxedUnit boxedUnit = BoxedUnit.UNIT;
        }
        int size = carbonLoadModel.getCarbonDataLoadSchema().getCarbonTable().getSortColumns().size();
        RDD sortBy = map.sortBy(objArr -> {
            return MODULE$.getKey(objArr, size);
        }, true, globalSortPartitions, GlobalSortHelper$.MODULE$.generateRowComparator((Seq) ((Seq) ((Seq) seq2.take(size)).map(dataType -> {
            DataType dataType;
            if (StringType$.MODULE$.equals(dataType)) {
                dataType = ByteType$.MODULE$;
            } else {
                dataType = TimestampType$.MODULE$.equals(dataType) ? true : DateType$.MODULE$.equals(dataType) ? LongType$.MODULE$ : dataType;
            }
            return dataType;
        }, Seq$.MODULE$.canBuildFrom())).zipWithIndex(Seq$.MODULE$.canBuildFrom())), package$.MODULE$.classTag(ClassTag$.MODULE$.apply(ScalaRunTime$.MODULE$.arrayClass(Object.class))));
        sparkContext.runJob(sortBy.mapPartitionsWithIndex((obj, iterator) -> {
            return $anonfun$insertDataUsingGlobalSortWithInternalRow$5(carbonLoadModel, longAccumulator2, BoxesRunTime.unboxToInt(obj), iterator);
        }, sortBy.mapPartitionsWithIndex$default$2(), ClassTag$.MODULE$.apply(CarbonRow.class)), (taskContext, iterator2) -> {
            $anonfun$insertDataUsingGlobalSortWithInternalRow$6(carbonLoadModel, collectionAccumulator, broadcast, longAccumulator3, broadCastHadoopConf, taskContext, iterator2);
            return BoxedUnit.UNIT;
        }, ClassTag$.MODULE$.Unit());
        if (globalSortPartitions > 1) {
            map.unpersist(false);
        } else {
            BoxedUnit boxedUnit2 = BoxedUnit.UNIT;
        }
        LOGGER().info(new StringBuilder(45).append("Total rows processed in step Sort Processor: ").append(longAccumulator2.value()).toString());
        LOGGER().info(new StringBuilder(42).append("Total rows processed in step Data Writer: ").append(longAccumulator3.value()).toString());
        return updateLoadStatus(carbonLoadModel, longAccumulator);
    }

    public Object[] getKey(Object[] objArr, int i) {
        Object[] objArr2 = new Object[i];
        System.arraycopy(objArr, 0, objArr2, 0, i);
        return objArr2;
    }

    private Tuple2<String, Tuple2<LoadMetadataDetails, ExecutionErrors>>[] updateLoadStatus(CarbonLoadModel carbonLoadModel, LongAccumulator longAccumulator) {
        LoadMetadataDetails loadMetadataDetails = new LoadMetadataDetails();
        loadMetadataDetails.setLoadName(carbonLoadModel.getSegmentId());
        if (BoxesRunTime.equalsNumObject(longAccumulator.value(), BoxesRunTime.boxToInteger(0))) {
            String sb = new StringBuilder(8).append(carbonLoadModel.getTableName()).append("_").append("Success").toString();
            loadMetadataDetails.setSegmentStatus(SegmentStatus.SUCCESS);
            return new Tuple2[]{new Tuple2<>(sb, new Tuple2(loadMetadataDetails, new ExecutionErrors(FailureCauses.NONE, "")))};
        }
        String sb2 = new StringBuilder(16).append(carbonLoadModel.getTableName()).append("_").append("Partial_Success").toString();
        loadMetadataDetails.setSegmentStatus(SegmentStatus.LOAD_PARTIAL_SUCCESS);
        ExecutionErrors executionErrors = new ExecutionErrors(FailureCauses.NONE, "");
        executionErrors.failureCauses_$eq(FailureCauses.BAD_RECORDS);
        return new Tuple2[]{new Tuple2<>(sb2, new Tuple2(loadMetadataDetails, executionErrors))};
    }

    public Tuple2<String, Tuple2<LoadMetadataDetails, ExecutionErrors>>[] loadDataUsingRangeSort(SparkSession sparkSession, CarbonLoadModel carbonLoadModel, Configuration configuration, CollectionAccumulator<Map<String, SegmentMetaDataInfo>> collectionAccumulator) {
        SparkContext sparkContext = sparkSession.sparkContext();
        Broadcast<CarbonLoadModel> broadcast = sparkContext.broadcast(carbonLoadModel, ClassTag$.MODULE$.apply(CarbonLoadModel.class));
        LongAccumulator longAccumulator = sparkContext.longAccumulator("Partial Success Accumulator");
        LongAccumulator longAccumulator2 = sparkContext.longAccumulator("Input Processor Accumulator");
        LongAccumulator longAccumulator3 = sparkContext.longAccumulator("Convert Processor Accumulator");
        LongAccumulator longAccumulator4 = sparkContext.longAccumulator("Sort Processor Accumulator");
        LongAccumulator longAccumulator5 = sparkContext.longAccumulator("Write Processor Accumulator");
        configuration.set(CarbonCommonConstants.CARBON_WRITTEN_BY_APPNAME, sparkSession.sparkContext().appName());
        RDD<InternalRow> csvFileScanRDD = CsvRDDHelper$.MODULE$.csvFileScanRDD(sparkSession, carbonLoadModel, configuration);
        RDD mapPartitionsWithIndex = csvFileScanRDD.mapPartitionsWithIndex((obj, iterator) -> {
            return $anonfun$loadDataUsingRangeSort$1(broadcast, longAccumulator2, BoxesRunTime.unboxToInt(obj), iterator);
        }, csvFileScanRDD.mapPartitionsWithIndex$default$2(), ClassTag$.MODULE$.apply(CarbonRow.class));
        Broadcast<SerializableConfiguration> broadCastHadoopConf = SparkSQLUtil$.MODULE$.broadCastHadoopConf(sparkContext, configuration);
        RDD<CarbonRow> filter = mapPartitionsWithIndex.mapPartitionsWithIndex((obj2, iterator2) -> {
            return $anonfun$loadDataUsingRangeSort$2(broadCastHadoopConf, broadcast, longAccumulator, longAccumulator3, BoxesRunTime.unboxToInt(obj2), iterator2);
        }, mapPartitionsWithIndex.mapPartitionsWithIndex$default$2(), ClassTag$.MODULE$.apply(CarbonRow.class)).filter(carbonRow -> {
            return BoxesRunTime.boxToBoolean($anonfun$loadDataUsingRangeSort$3(carbonRow));
        });
        CarbonDataLoadConfiguration createConfiguration = DataLoadProcessBuilder.createConfiguration(carbonLoadModel);
        int indexOfColumn = indexOfColumn(carbonLoadModel.getRangePartitionColumn(), createConfiguration.getDataFields());
        RDD keyBy = filter.keyBy(carbonRow2 -> {
            return carbonRow2.getObject(indexOfColumn);
        });
        int numPartitions = getNumPartitions(createConfiguration, carbonLoadModel, filter);
        Ordering<Object> createOrderingForColumn = createOrderingForColumn(carbonLoadModel.getRangePartitionColumn());
        RDD<Tuple2<Object, Object>> sampleRDD = getSampleRDD(sparkSession, carbonLoadModel, configuration, createConfiguration, broadcast);
        ClassTag Object = ClassTag$.MODULE$.Object();
        ClassTag apply = ClassTag$.MODULE$.apply(CarbonRow.class);
        RDD$.MODULE$.rddToPairRDDFunctions$default$4(keyBy);
        sparkContext.runJob(RDD$.MODULE$.rddToPairRDDFunctions(keyBy, Object, apply, (Ordering) null).partitionBy(new DataSkewRangePartitioner(numPartitions, sampleRDD, false, createOrderingForColumn, package$.MODULE$.classTag(ClassTag$.MODULE$.Object()))).map(tuple2 -> {
            return (CarbonRow) tuple2._2();
        }, ClassTag$.MODULE$.apply(CarbonRow.class)), (taskContext, iterator3) -> {
            $anonfun$loadDataUsingRangeSort$6(carbonLoadModel, collectionAccumulator, broadcast, longAccumulator5, broadCastHadoopConf, taskContext, iterator3);
            return BoxedUnit.UNIT;
        }, ClassTag$.MODULE$.Unit());
        LOGGER().info(new StringBuilder(46).append("Total rows processed in step Input Processor: ").append(longAccumulator2.value()).toString());
        LOGGER().info(new StringBuilder(45).append("Total rows processed in step Data Converter: ").append(longAccumulator3.value()).toString());
        LOGGER().info(new StringBuilder(45).append("Total rows processed in step Sort Processor: ").append(longAccumulator4.value()).toString());
        LOGGER().info(new StringBuilder(42).append("Total rows processed in step Data Writer: ").append(longAccumulator5.value()).toString());
        return updateLoadStatus(carbonLoadModel, longAccumulator);
    }

    private RDD<Tuple2<Object, Object>> getSampleRDD(SparkSession sparkSession, CarbonLoadModel carbonLoadModel, Configuration configuration, CarbonDataLoadConfiguration carbonDataLoadConfiguration, Broadcast<CarbonLoadModel> broadcast) {
        CarbonDataLoadConfiguration createConfiguration = DataLoadProcessBuilder.createConfiguration(carbonLoadModel);
        String[] header = createConfiguration.getHeader();
        CarbonColumn rangePartitionColumn = carbonLoadModel.getRangePartitionColumn();
        int unboxToInt = BoxesRunTime.unboxToInt(RichInt$.MODULE$.until$extension0(Predef$.MODULE$.intWrapper(0), header.length).find(i -> {
            return header[i].equalsIgnoreCase(rangePartitionColumn.getColName());
        }).get());
        DataField dataField = (DataField) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(createConfiguration.getDataFields())).find(dataField2 -> {
            return BoxesRunTime.boxToBoolean($anonfun$getSampleRDD$2(rangePartitionColumn, dataField2));
        }).get();
        Configuration configuration2 = new Configuration(configuration);
        configuration2.set(CSVInputFormat.SELECT_COLUMN_INDEX, String.valueOf(BoxesRunTime.boxToInteger(unboxToInt)));
        RDD<InternalRow> csvFileScanRDD = CsvRDDHelper$.MODULE$.csvFileScanRDD(sparkSession, carbonLoadModel, configuration2);
        RDD mapPartitionsWithIndex = csvFileScanRDD.mapPartitionsWithIndex((obj, iterator) -> {
            return $anonfun$getSampleRDD$3(broadcast, dataField, BoxesRunTime.unboxToInt(obj), iterator);
        }, csvFileScanRDD.mapPartitionsWithIndex$default$2(), ClassTag$.MODULE$.apply(CarbonRow.class));
        Broadcast<SerializableConfiguration> broadCastHadoopConf = SparkSQLUtil$.MODULE$.broadCastHadoopConf(sparkSession.sparkContext(), configuration);
        return mapPartitionsWithIndex.mapPartitionsWithIndex((obj2, iterator2) -> {
            return $anonfun$getSampleRDD$4(broadCastHadoopConf, dataField, broadcast, BoxesRunTime.unboxToInt(obj2), iterator2);
        }, mapPartitionsWithIndex.mapPartitionsWithIndex$default$2(), ClassTag$.MODULE$.apply(CarbonRow.class)).filter(carbonRow -> {
            return BoxesRunTime.boxToBoolean($anonfun$getSampleRDD$5(carbonRow));
        }).map(carbonRow2 -> {
            return new Tuple2(carbonRow2.getObject(0), (Object) null);
        }, ClassTag$.MODULE$.apply(Tuple2.class));
    }

    private int getNumPartitions(CarbonDataLoadConfiguration carbonDataLoadConfiguration, CarbonLoadModel carbonLoadModel, RDD<CarbonRow> rdd) {
        int globalSortPartitions = CarbonDataProcessorUtil.getGlobalSortPartitions(carbonDataLoadConfiguration.getDataLoadProperty(CarbonCommonConstants.LOAD_GLOBAL_SORT_PARTITIONS));
        if (globalSortPartitions <= 0) {
            globalSortPartitions = carbonLoadModel.getTotalSize() <= 0 ? rdd.partitions().length : getNumPartitionsBasedOnSize(carbonLoadModel.getTotalSize(), carbonLoadModel.getCarbonDataLoadSchema().getCarbonTable(), carbonLoadModel, false);
        }
        return globalSortPartitions;
    }

    public int getNumPartitionsBasedOnSize(double d, CarbonTable carbonTable, CarbonLoadModel carbonLoadModel, boolean z) {
        long blockSizeInMB = 1048576 * carbonTable.getBlockSizeInMB();
        long blockletSizeInMB = 1048576 * carbonTable.getBlockletSizeInMB();
        return (int) Math.ceil(d / (Math.max(blockletSizeInMB, blockSizeInMB - blockletSizeInMB) * (z ? 1 : carbonLoadModel.getScaleFactor() == 0 ? CarbonProperties.getInstance().getRangeColumnScaleFactor() : carbonLoadModel.getScaleFactor())));
    }

    private int indexOfColumn(CarbonColumn carbonColumn, DataField[] dataFieldArr) {
        return BoxesRunTime.unboxToInt(RichInt$.MODULE$.until$extension0(Predef$.MODULE$.intWrapper(0), dataFieldArr.length).find(i -> {
            return dataFieldArr[i].getColumn().getColName().equals(carbonColumn.getColName());
        }).get());
    }

    private Ordering<Object> createOrderingForColumn(CarbonColumn carbonColumn) {
        if (!Predef$.MODULE$.Boolean2boolean(carbonColumn.isDimension())) {
            return new PrimitiveOrdering(carbonColumn.getDataType());
        }
        org.apache.carbondata.core.metadata.datatype.DataType dataType = ((CarbonDimension) carbonColumn).getDataType();
        org.apache.carbondata.core.metadata.datatype.DataType dataType2 = DataTypes.DATE;
        return (dataType != null ? !dataType.equals(dataType2) : dataType2 != null) ? DataTypeUtil.isPrimitiveColumn(carbonColumn.getDataType()) ? new PrimitiveOrdering(carbonColumn.getDataType()) : new ByteArrayOrdering() : new PrimitiveOrdering(DataTypes.INT);
    }

    public void setTaskListener(String str, String str2, CollectionAccumulator<Map<String, SegmentMetaDataInfo>> collectionAccumulator) {
        TaskContext$.MODULE$.get().addTaskCompletionListener(new InsertTaskCompletionListener(null, null, collectionAccumulator, str, str2));
        TaskMetricsMap.initializeThreadLocal();
        CarbonTaskInfo carbonTaskInfo = new CarbonTaskInfo();
        carbonTaskInfo.setTaskId(CarbonUtil.generateUUID());
        ThreadLocalTaskInfo.setCarbonTaskInfo(carbonTaskInfo);
    }

    public CarbonLoadModel createLoadModelForGlobalSort(SparkSession sparkSession, CarbonTable carbonTable) {
        Configuration newHadoopConf = SparkSQLUtil$.MODULE$.sessionState(sparkSession).newHadoopConf();
        CarbonTableOutputFormat.setDatabaseName(newHadoopConf, carbonTable.getDatabaseName());
        CarbonTableOutputFormat.setTableName(newHadoopConf, carbonTable.getTableName());
        CarbonTableOutputFormat.setCarbonTable(newHadoopConf, carbonTable);
        CarbonTableOutputFormat.setInputSchema(newHadoopConf, new StructType((List) JavaConverters$.MODULE$.bufferAsJavaListConverter((Buffer) ((TraversableLike) JavaConverters$.MODULE$.asScalaBufferConverter(carbonTable.getCreateOrderColumn()).asScala()).map(carbonColumn -> {
            return new StructField(carbonColumn.getColName(), carbonColumn.getDataType());
        }, Buffer$.MODULE$.canBuildFrom())).asJava()));
        CarbonLoadModel loadModel = CarbonTableOutputFormat.getLoadModel(newHadoopConf);
        loadModel.setSerializationNullFormat(new StringBuilder(3).append(TableOptionConstant.SERIALIZATION_NULL_FORMAT.getName()).append(",\\N").toString());
        loadModel.setBadRecordsLoggerEnable(new StringBuilder(6).append(TableOptionConstant.BAD_RECORDS_LOGGER_ENABLE.getName()).append(",false").toString());
        loadModel.setBadRecordsAction(new StringBuilder(6).append(TableOptionConstant.BAD_RECORDS_ACTION.getName()).append(",force").toString());
        loadModel.setIsEmptyDataBadRecord("IS_EMPTY_DATA_BAD_RECORD,false");
        String str = carbonTable.getTableInfo().getFactTable().getTableProperties().get("global_sort_partitions");
        if (str != null) {
            loadModel.setGlobalSortPartitions(str);
        }
        return loadModel;
    }

    public Dataset<Row> createInputDataFrame(SparkSession sparkSession, CarbonTable carbonTable, Seq<InputSplit> seq) {
        String[] strArr = (String[]) ((TraversableOnce) ((TraversableLike) JavaConverters$.MODULE$.asScalaBufferConverter(carbonTable.getCreateOrderColumn()).asScala()).map(carbonColumn -> {
            return carbonColumn.getColName();
        }, Buffer$.MODULE$.canBuildFrom())).toArray(ClassTag$.MODULE$.apply(String.class));
        org.apache.spark.sql.types.StructType createSparkSchema = SparkTypeConverter$.MODULE$.createSparkSchema(carbonTable, Predef$.MODULE$.wrapRefArray(strArr));
        return SparkSQLUtil$.MODULE$.execute(new CarbonScanRDD(sparkSession, new CarbonProjection(strArr), null, carbonTable.getAbsoluteTableIdentifier(), carbonTable.getTableInfo().serialize(), carbonTable.getTableInfo(), new CarbonInputMetrics(), null, SparkDataTypeConverterImpl.class, CarbonRowReadSupport.class, (List) JavaConverters$.MODULE$.seqAsJavaListConverter(seq).asJava(), CarbonScanRDD$.MODULE$.$lessinit$greater$default$12(), ClassTag$.MODULE$.apply(CarbonRow.class)).map(carbonRow -> {
            return new GenericInternalRow(carbonRow.getData());
        }, ClassTag$.MODULE$.apply(InternalRow.class)), createSparkSchema, sparkSession);
    }

    public static final /* synthetic */ Iterator $anonfun$loadDataUsingGlobalSort$2(Broadcast broadcast, LongAccumulator longAccumulator, int i, Iterator iterator) {
        Tuple2 tuple2 = new Tuple2(BoxesRunTime.boxToInteger(i), iterator);
        if (tuple2 == null) {
            throw new MatchError(tuple2);
        }
        int _1$mcI$sp = tuple2._1$mcI$sp();
        return DataLoadProcessorStepOnSpark$.MODULE$.inputFuncForCsvRows((Iterator) tuple2._2(), _1$mcI$sp, broadcast, longAccumulator);
    }

    public static final /* synthetic */ Iterator $anonfun$loadDataUsingGlobalSort$4(Broadcast broadcast, LongAccumulator longAccumulator, int i, Iterator iterator) {
        Tuple2 tuple2 = new Tuple2(BoxesRunTime.boxToInteger(i), iterator);
        if (tuple2 == null) {
            throw new MatchError(tuple2);
        }
        int _1$mcI$sp = tuple2._1$mcI$sp();
        return DataLoadProcessorStepOnSpark$.MODULE$.inputFunc((Iterator) tuple2._2(), _1$mcI$sp, broadcast, longAccumulator);
    }

    public static final /* synthetic */ Iterator $anonfun$loadDataUsingGlobalSort$5(Broadcast broadcast, Broadcast broadcast2, LongAccumulator longAccumulator, LongAccumulator longAccumulator2, boolean z, int i, Iterator iterator) {
        Tuple2 tuple2 = new Tuple2(BoxesRunTime.boxToInteger(i), iterator);
        if (tuple2 == null) {
            throw new MatchError(tuple2);
        }
        int _1$mcI$sp = tuple2._1$mcI$sp();
        Iterator<CarbonRow> iterator2 = (Iterator) tuple2._2();
        ThreadLocalSessionInfo.setConfigurationToCurrentThread(((SerializableConfiguration) broadcast.value()).value());
        return DataLoadProcessorStepOnSpark$.MODULE$.convertFunc(iterator2, _1$mcI$sp, broadcast2, longAccumulator, longAccumulator2, false, z);
    }

    public static final /* synthetic */ boolean $anonfun$loadDataUsingGlobalSort$6(CarbonRow carbonRow) {
        return carbonRow != null;
    }

    private static final /* synthetic */ DataLoadProcessBuilderOnSpark$RowOrdering$1$ RowOrdering$lzycompute$1(LazyRef lazyRef, final Comparator comparator) {
        DataLoadProcessBuilderOnSpark$RowOrdering$1$ dataLoadProcessBuilderOnSpark$RowOrdering$1$;
        synchronized (lazyRef) {
            dataLoadProcessBuilderOnSpark$RowOrdering$1$ = lazyRef.initialized() ? (DataLoadProcessBuilderOnSpark$RowOrdering$1$) lazyRef.value() : (DataLoadProcessBuilderOnSpark$RowOrdering$1$) lazyRef.initialize(new Ordering<Object[]>(comparator) { // from class: org.apache.carbondata.spark.load.DataLoadProcessBuilderOnSpark$RowOrdering$1$
                private final Comparator rowComparator$1;

                /* renamed from: tryCompare, reason: merged with bridge method [inline-methods] */
                public Some m3758tryCompare(Object obj, Object obj2) {
                    return Ordering.tryCompare$(this, obj, obj2);
                }

                public boolean lteq(Object obj, Object obj2) {
                    return Ordering.lteq$(this, obj, obj2);
                }

                public boolean gteq(Object obj, Object obj2) {
                    return Ordering.gteq$(this, obj, obj2);
                }

                public boolean lt(Object obj, Object obj2) {
                    return Ordering.lt$(this, obj, obj2);
                }

                public boolean gt(Object obj, Object obj2) {
                    return Ordering.gt$(this, obj, obj2);
                }

                public boolean equiv(Object obj, Object obj2) {
                    return Ordering.equiv$(this, obj, obj2);
                }

                public Object max(Object obj, Object obj2) {
                    return Ordering.max$(this, obj, obj2);
                }

                public Object min(Object obj, Object obj2) {
                    return Ordering.min$(this, obj, obj2);
                }

                /* renamed from: reverse, reason: merged with bridge method [inline-methods] */
                public Ordering<Object[]> m3757reverse() {
                    return Ordering.reverse$(this);
                }

                public <U> Ordering<U> on(Function1<U, Object[]> function1) {
                    return Ordering.on$(this, function1);
                }

                public Ordering.Ops mkOrderingOps(Object obj) {
                    return Ordering.mkOrderingOps$(this, obj);
                }

                public int compare(Object[] objArr, Object[] objArr2) {
                    return this.rowComparator$1.compare(objArr, objArr2);
                }

                {
                    this.rowComparator$1 = comparator;
                    PartialOrdering.$init$(this);
                    Ordering.$init$(this);
                }
            });
        }
        return dataLoadProcessBuilderOnSpark$RowOrdering$1$;
    }

    private final DataLoadProcessBuilderOnSpark$RowOrdering$1$ RowOrdering$2(LazyRef lazyRef, Comparator comparator) {
        return lazyRef.initialized() ? (DataLoadProcessBuilderOnSpark$RowOrdering$1$) lazyRef.value() : RowOrdering$lzycompute$1(lazyRef, comparator);
    }

    public static final /* synthetic */ Iterator $anonfun$loadDataUsingGlobalSort$8(Broadcast broadcast, LongAccumulator longAccumulator, int i, Iterator iterator) {
        Tuple2 tuple2 = new Tuple2(BoxesRunTime.boxToInteger(i), iterator);
        if (tuple2 == null) {
            throw new MatchError(tuple2);
        }
        int _1$mcI$sp = tuple2._1$mcI$sp();
        return DataLoadProcessorStepOnSpark$.MODULE$.convertTo3Parts((Iterator) tuple2._2(), _1$mcI$sp, broadcast, longAccumulator);
    }

    public static final /* synthetic */ void $anonfun$loadDataUsingGlobalSort$9(CarbonLoadModel carbonLoadModel, CollectionAccumulator collectionAccumulator, Broadcast broadcast, LongAccumulator longAccumulator, Broadcast broadcast2, TaskContext taskContext, Iterator iterator) {
        MODULE$.setTaskListener(carbonLoadModel.getTableName(), carbonLoadModel.getSegmentId(), collectionAccumulator);
        CarbonLoadModel copyWithTaskNo = ((CarbonLoadModel) broadcast.value()).getCopyWithTaskNo(BoxesRunTime.boxToInteger(taskContext.partitionId()).toString());
        copyWithTaskNo.setMetrics(new DataLoadMetrics());
        DataLoadProcessorStepOnSpark$.MODULE$.writeFunc(iterator, taskContext.partitionId(), copyWithTaskNo, longAccumulator, ((SerializableConfiguration) broadcast2.value()).value());
        SparkSQLUtil$.MODULE$.setOutputMetrics(taskContext.taskMetrics().outputMetrics(), copyWithTaskNo.getMetrics());
    }

    public static final /* synthetic */ Iterator $anonfun$insertDataUsingGlobalSortWithInternalRow$5(CarbonLoadModel carbonLoadModel, LongAccumulator longAccumulator, int i, Iterator iterator) {
        Tuple2 tuple2 = new Tuple2(BoxesRunTime.boxToInteger(i), iterator);
        if (tuple2 == null) {
            throw new MatchError(tuple2);
        }
        int _1$mcI$sp = tuple2._1$mcI$sp();
        return DataLoadProcessorStepOnSpark$.MODULE$.convertTo3PartsFromObjectArray((Iterator) tuple2._2(), _1$mcI$sp, carbonLoadModel, longAccumulator);
    }

    public static final /* synthetic */ void $anonfun$insertDataUsingGlobalSortWithInternalRow$6(CarbonLoadModel carbonLoadModel, CollectionAccumulator collectionAccumulator, Broadcast broadcast, LongAccumulator longAccumulator, Broadcast broadcast2, TaskContext taskContext, Iterator iterator) {
        MODULE$.setTaskListener(carbonLoadModel.getTableName(), carbonLoadModel.getSegmentId(), collectionAccumulator);
        CarbonLoadModel copyWithTaskNo = ((CarbonLoadModel) broadcast.value()).getCopyWithTaskNo(BoxesRunTime.boxToInteger(taskContext.partitionId()).toString());
        copyWithTaskNo.setMetrics(new DataLoadMetrics());
        DataLoadProcessorStepOnSpark$.MODULE$.writeFunc(iterator, taskContext.partitionId(), copyWithTaskNo, longAccumulator, ((SerializableConfiguration) broadcast2.value()).value());
        SparkSQLUtil$.MODULE$.setOutputMetrics(taskContext.taskMetrics().outputMetrics(), copyWithTaskNo.getMetrics());
    }

    public static final /* synthetic */ Iterator $anonfun$loadDataUsingRangeSort$1(Broadcast broadcast, LongAccumulator longAccumulator, int i, Iterator iterator) {
        Tuple2 tuple2 = new Tuple2(BoxesRunTime.boxToInteger(i), iterator);
        if (tuple2 == null) {
            throw new MatchError(tuple2);
        }
        int _1$mcI$sp = tuple2._1$mcI$sp();
        return DataLoadProcessorStepOnSpark$.MODULE$.internalInputFunc((Iterator) tuple2._2(), _1$mcI$sp, broadcast, Option$.MODULE$.apply(longAccumulator), Option$.MODULE$.empty());
    }

    public static final /* synthetic */ Iterator $anonfun$loadDataUsingRangeSort$2(Broadcast broadcast, Broadcast broadcast2, LongAccumulator longAccumulator, LongAccumulator longAccumulator2, int i, Iterator iterator) {
        Tuple2 tuple2 = new Tuple2(BoxesRunTime.boxToInteger(i), iterator);
        if (tuple2 == null) {
            throw new MatchError(tuple2);
        }
        int _1$mcI$sp = tuple2._1$mcI$sp();
        Iterator<CarbonRow> iterator2 = (Iterator) tuple2._2();
        ThreadLocalSessionInfo.setConfigurationToCurrentThread(((SerializableConfiguration) broadcast.value()).value());
        return DataLoadProcessorStepOnSpark$.MODULE$.convertFunc(iterator2, _1$mcI$sp, broadcast2, longAccumulator, longAccumulator2, DataLoadProcessorStepOnSpark$.MODULE$.convertFunc$default$6(), DataLoadProcessorStepOnSpark$.MODULE$.convertFunc$default$7());
    }

    public static final /* synthetic */ boolean $anonfun$loadDataUsingRangeSort$3(CarbonRow carbonRow) {
        return carbonRow != null;
    }

    public static final /* synthetic */ void $anonfun$loadDataUsingRangeSort$6(CarbonLoadModel carbonLoadModel, CollectionAccumulator collectionAccumulator, Broadcast broadcast, LongAccumulator longAccumulator, Broadcast broadcast2, TaskContext taskContext, Iterator iterator) {
        MODULE$.setTaskListener(carbonLoadModel.getTableName(), carbonLoadModel.getSegmentId(), collectionAccumulator);
        DataLoadProcessorStepOnSpark$.MODULE$.sortAndWriteFunc(iterator, taskContext.partitionId(), broadcast, longAccumulator, ((SerializableConfiguration) broadcast2.value()).value());
    }

    public static final /* synthetic */ boolean $anonfun$getSampleRDD$2(CarbonColumn carbonColumn, DataField dataField) {
        return dataField.getColumn().getColName().equals(carbonColumn.getColName());
    }

    public static final /* synthetic */ Iterator $anonfun$getSampleRDD$3(Broadcast broadcast, DataField dataField, int i, Iterator iterator) {
        Tuple2 tuple2 = new Tuple2(BoxesRunTime.boxToInteger(i), iterator);
        if (tuple2 == null) {
            throw new MatchError(tuple2);
        }
        int _1$mcI$sp = tuple2._1$mcI$sp();
        return DataLoadProcessorStepOnSpark$.MODULE$.internalInputFunc((Iterator) tuple2._2(), _1$mcI$sp, broadcast, Option$.MODULE$.empty(), Option$.MODULE$.apply(dataField));
    }

    public static final /* synthetic */ Iterator $anonfun$getSampleRDD$4(Broadcast broadcast, DataField dataField, Broadcast broadcast2, int i, Iterator iterator) {
        Tuple2 tuple2 = new Tuple2(BoxesRunTime.boxToInteger(i), iterator);
        if (tuple2 == null) {
            throw new MatchError(tuple2);
        }
        int _1$mcI$sp = tuple2._1$mcI$sp();
        Iterator<CarbonRow> iterator2 = (Iterator) tuple2._2();
        ThreadLocalSessionInfo.setConfigurationToCurrentThread(((SerializableConfiguration) broadcast.value()).value());
        return DataLoadProcessorStepOnSpark$.MODULE$.sampleConvertFunc(iterator2, dataField, _1$mcI$sp, broadcast2);
    }

    public static final /* synthetic */ boolean $anonfun$getSampleRDD$5(CarbonRow carbonRow) {
        return carbonRow != null;
    }

    private DataLoadProcessBuilderOnSpark$() {
        MODULE$ = this;
        this.LOGGER = LogServiceFactory.getLogService(getClass().getCanonicalName());
    }
}
