HiveTableRelation 相关代码
HiveStrategies.scala
当 relation.tableMeta.stats.isEmpty 是, 即调用 hiveTableWithStats
class DetermineTableStats(session: SparkSession) extends Rule[LogicalPlan] {
private def hiveTableWithStats(relation: HiveTableRelation): HiveTableRelation = {
val table = relation.tableMeta
val partitionCols = relation.partitionCols
// For partitioned tables, the partition directory may be outside of the table directory.
// Which is expensive to get table size. Please see how we implemented it in the AnalyzeTable.
val sizeInBytes = if (conf.fallBackToHdfsForStatsEnabled && partitionCols.isEmpty) {
try {
val hadoopConf = session.sessionState.newHadoopConf()
val tablePath = new Path(table.location)
val fs: FileSystem = tablePath.getFileSystem(hadoopConf)
fs.getContentSummary(tablePath).getLength
} catch {
case e: IOException =>
logWarning("Failed to get table size from HDFS.", e)
conf.defaultSizeInBytes
}
} else {
conf.defaultSizeInBytes
}
val stats = Some(Statistics(sizeInBytes = BigInt(sizeInBytes)))
relation.copy(tableStats = stats)
}
override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
case relation: HiveTableRelation
if DDLUtils.isHiveTable(relation.tableMeta) && relation.tableMeta.stats.isEmpty =>
hiveTableWithStats(relation)
// handles InsertIntoStatement specially as the table in InsertIntoStatement is not added in its
// children, hence not matched directly by previous HiveTableRelation case.
case i @ InsertIntoStatement(relation: HiveTableRelation, _, _, _, _, _)
if DDLUtils.isHiveTable(relation.tableMeta) && relation.tableMeta.stats.isEmpty =>
i.copy(table = hiveTableWithStats(relation))
}
}
- HiveTableRelation
/**
* A `LogicalPlan` that represents a hive table.
*
* TODO: remove this after we completely make hive as a data source.
*/
case class HiveTableRelation(
tableMeta: CatalogTable,
dataCols: Seq[AttributeReference],
partitionCols: Seq[AttributeReference],
tableStats: Option[Statistics] = None,
@transient prunedPartitions: Option[Seq[CatalogTablePartition]] = None)
本站资源均来自互联网,仅供研究学习,禁止违法使用和商用,产生法律纠纷本站概不负责!如果侵犯了您的权益请与我们联系!
转载请注明出处: 免费源码网-免费的源码资源网站 » Spark 之 HiveStrategies
发表评论 取消回复