|
|
diff --git a/Makefile.am b/Makefile.am
|
|
|
index 18ad36d..9c299a9 100644
|
|
|
--- a/Makefile.am
|
|
|
+++ b/Makefile.am
|
|
|
@@ -1,3 +1,6 @@
|
|
|
+
|
|
|
+ACLOCAL_AMFLAGS = -I config
|
|
|
+
|
|
|
include $(top_srcdir)/config/rpm.am
|
|
|
include $(top_srcdir)/config/deb.am
|
|
|
include $(top_srcdir)/config/tgz.am
|
|
|
diff --git a/autogen.sh b/autogen.sh
|
|
|
index 343265c..427394a 100755
|
|
|
--- a/autogen.sh
|
|
|
+++ b/autogen.sh
|
|
|
@@ -1,7 +1,4 @@
|
|
|
#!/bin/sh
|
|
|
|
|
|
-aclocal -I config
|
|
|
-libtoolize --automake --copy
|
|
|
-autoheader
|
|
|
-automake --add-missing --include-deps --copy
|
|
|
-autoconf
|
|
|
+autoreconf -fiv
|
|
|
+rm -Rf autom4te.cache
|
|
|
diff --git a/cmd/Makefile.am b/cmd/Makefile.am
|
|
|
index 1a51ddc..bad1af6 100644
|
|
|
--- a/cmd/Makefile.am
|
|
|
+++ b/cmd/Makefile.am
|
|
|
@@ -1,2 +1,2 @@
|
|
|
SUBDIRS = zfs zpool zdb zhack zinject zstreamdump ztest zpios
|
|
|
-SUBDIRS += mount_zfs fsck_zfs zvol_id vdev_id
|
|
|
+SUBDIRS += mount_zfs fsck_zfs zvol_id vdev_id arcstat
|
|
|
diff --git a/cmd/arcstat/Makefile.am b/cmd/arcstat/Makefile.am
|
|
|
new file mode 100644
|
|
|
index 0000000..802b8e1
|
|
|
--- /dev/null
|
|
|
+++ b/cmd/arcstat/Makefile.am
|
|
|
@@ -0,0 +1,2 @@
|
|
|
+bin_SCRIPTS = arcstat.py
|
|
|
+EXTRA_DIST = $(bin_SCRIPTS)
|
|
|
diff --git a/cmd/arcstat/arcstat.py b/cmd/arcstat/arcstat.py
|
|
|
new file mode 100755
|
|
|
index 0000000..e01dd8b
|
|
|
--- /dev/null
|
|
|
+++ b/cmd/arcstat/arcstat.py
|
|
|
@@ -0,0 +1,440 @@
|
|
|
+#!/usr/bin/python
|
|
|
+#
|
|
|
+# Print out ZFS ARC Statistics exported via kstat(1)
|
|
|
+# For a definition of fields, or usage, use arctstat.pl -v
|
|
|
+#
|
|
|
+# This script is a fork of the original arcstat.pl (0.1) by
|
|
|
+# Neelakanth Nadgir, originally published on his Sun blog on
|
|
|
+# 09/18/2007
|
|
|
+# http://blogs.sun.com/realneel/entry/zfs_arc_statistics
|
|
|
+#
|
|
|
+# This version aims to improve upon the original by adding features
|
|
|
+# and fixing bugs as needed. This version is maintained by
|
|
|
+# Mike Harsch and is hosted in a public open source repository:
|
|
|
+# http://github.com/mharsch/arcstat
|
|
|
+#
|
|
|
+# Comments, Questions, or Suggestions are always welcome.
|
|
|
+# Contact the maintainer at ( mike at harschsystems dot com )
|
|
|
+#
|
|
|
+# CDDL HEADER START
|
|
|
+#
|
|
|
+# The contents of this file are subject to the terms of the
|
|
|
+# Common Development and Distribution License, Version 1.0 only
|
|
|
+# (the "License"). You may not use this file except in compliance
|
|
|
+# with the License.
|
|
|
+#
|
|
|
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
|
|
+# or http://www.opensolaris.org/os/licensing.
|
|
|
+# See the License for the specific language governing permissions
|
|
|
+# and limitations under the License.
|
|
|
+#
|
|
|
+# When distributing Covered Code, include this CDDL HEADER in each
|
|
|
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
|
|
+# If applicable, add the following below this CDDL HEADER, with the
|
|
|
+# fields enclosed by brackets "[]" replaced with your own identifying
|
|
|
+# information: Portions Copyright [yyyy] [name of copyright owner]
|
|
|
+#
|
|
|
+# CDDL HEADER END
|
|
|
+#
|
|
|
+#
|
|
|
+# Fields have a fixed width. Every interval, we fill the "v"
|
|
|
+# hash with its corresponding value (v[field]=value) using calculate().
|
|
|
+# @hdr is the array of fields that needs to be printed, so we
|
|
|
+# just iterate over this array and print the values using our pretty printer.
|
|
|
+#
|
|
|
+
|
|
|
+
|
|
|
+import sys
|
|
|
+import time
|
|
|
+import getopt
|
|
|
+import re
|
|
|
+import copy
|
|
|
+
|
|
|
+from decimal import Decimal
|
|
|
+from signal import signal, SIGINT
|
|
|
+
|
|
|
+cols = {
|
|
|
+ # HDR: [Size, Scale, Description]
|
|
|
+ "time": [8, -1, "Time"],
|
|
|
+ "hits": [4, 1000, "ARC reads per second"],
|
|
|
+ "miss": [4, 1000, "ARC misses per second"],
|
|
|
+ "read": [4, 1000, "Total ARC accesses per second"],
|
|
|
+ "hit%": [4, 100, "ARC Hit percentage"],
|
|
|
+ "miss%": [5, 100, "ARC miss percentage"],
|
|
|
+ "dhit": [4, 1000, "Demand Data hits per second"],
|
|
|
+ "dmis": [4, 1000, "Demand Data misses per second"],
|
|
|
+ "dh%": [3, 100, "Demand Data hit percentage"],
|
|
|
+ "dm%": [3, 100, "Demand Data miss percentage"],
|
|
|
+ "phit": [4, 1000, "Prefetch hits per second"],
|
|
|
+ "pmis": [4, 1000, "Prefetch misses per second"],
|
|
|
+ "ph%": [3, 100, "Prefetch hits percentage"],
|
|
|
+ "pm%": [3, 100, "Prefetch miss percentage"],
|
|
|
+ "mhit": [4, 1000, "Metadata hits per second"],
|
|
|
+ "mmis": [4, 1000, "Metadata misses per second"],
|
|
|
+ "mread": [4, 1000, "Metadata accesses per second"],
|
|
|
+ "mh%": [3, 100, "Metadata hit percentage"],
|
|
|
+ "mm%": [3, 100, "Metadata miss percentage"],
|
|
|
+ "arcsz": [5, 1024, "ARC Size"],
|
|
|
+ "c": [4, 1024, "ARC Target Size"],
|
|
|
+ "mfu": [4, 1000, "MFU List hits per second"],
|
|
|
+ "mru": [4, 1000, "MRU List hits per second"],
|
|
|
+ "mfug": [4, 1000, "MFU Ghost List hits per second"],
|
|
|
+ "mrug": [4, 1000, "MRU Ghost List hits per second"],
|
|
|
+ "eskip": [5, 1000, "evict_skip per second"],
|
|
|
+ "mtxmis": [6, 1000, "mutex_miss per second"],
|
|
|
+ "rmis": [4, 1000, "recycle_miss per second"],
|
|
|
+ "dread": [5, 1000, "Demand data accesses per second"],
|
|
|
+ "pread": [5, 1000, "Prefetch accesses per second"],
|
|
|
+ "l2hits": [6, 1000, "L2ARC hits per second"],
|
|
|
+ "l2miss": [6, 1000, "L2ARC misses per second"],
|
|
|
+ "l2read": [6, 1000, "Total L2ARC accesses per second"],
|
|
|
+ "l2hit%": [6, 100, "L2ARC access hit percentage"],
|
|
|
+ "l2miss%": [7, 100, "L2ARC access miss percentage"],
|
|
|
+ "l2size": [6, 1024, "Size of the L2ARC"],
|
|
|
+ "l2bytes": [7, 1024, "bytes read per second from the L2ARC"],
|
|
|
+}
|
|
|
+
|
|
|
+v = {}
|
|
|
+hdr = ["time", "read", "miss", "miss%", "dmis", "dm%", "pmis", "pm%", "mmis",
|
|
|
+ "mm%", "arcsz", "c"]
|
|
|
+xhdr = ["time", "mfu", "mru", "mfug", "mrug", "eskip", "mtxmis", "rmis",
|
|
|
+ "dread", "pread", "read"]
|
|
|
+sint = 1 # Default interval is 1 second
|
|
|
+count = 1 # Default count is 1
|
|
|
+hdr_intr = 20 # Print header every 20 lines of output
|
|
|
+opfile = None
|
|
|
+sep = " " # Default separator is 2 spaces
|
|
|
+version = "0.4"
|
|
|
+l2exist = False
|
|
|
+cmd = ("Usage: arcstat [-hvx] [-f fields] [-o file] [-s string] [interval "
|
|
|
+ "[count]]\n")
|
|
|
+cur = {}
|
|
|
+d = {}
|
|
|
+out = None
|
|
|
+kstat = None
|
|
|
+float_pobj = re.compile("^[0-9]+(\.[0-9]+)?$")
|
|
|
+
|
|
|
+
|
|
|
+def detailed_usage():
|
|
|
+ sys.stderr.write("%s\n" % cmd)
|
|
|
+ sys.stderr.write("Field definitions are as follows:\n")
|
|
|
+ for key in cols:
|
|
|
+ sys.stderr.write("%11s : %s\n" % (key, cols[key][2]))
|
|
|
+ sys.stderr.write("\n")
|
|
|
+
|
|
|
+ sys.exit(1)
|
|
|
+
|
|
|
+
|
|
|
+def usage():
|
|
|
+ sys.stderr.write("%s\n" % cmd)
|
|
|
+ sys.stderr.write("\t -h : Print this help message\n")
|
|
|
+ sys.stderr.write("\t -v : List all possible field headers and definitions"
|
|
|
+ "\n")
|
|
|
+ sys.stderr.write("\t -x : Print extended stats\n")
|
|
|
+ sys.stderr.write("\t -f : Specify specific fields to print (see -v)\n")
|
|
|
+ sys.stderr.write("\t -o : Redirect output to the specified file\n")
|
|
|
+ sys.stderr.write("\t -s : Override default field separator with custom "
|
|
|
+ "character or string\n")
|
|
|
+ sys.stderr.write("\nExamples:\n")
|
|
|
+ sys.stderr.write("\tarcstat -o /tmp/a.log 2 10\n")
|
|
|
+ sys.stderr.write("\tarcstat -s \",\" -o /tmp/a.log 2 10\n")
|
|
|
+ sys.stderr.write("\tarcstat -v\n")
|
|
|
+ sys.stderr.write("\tarcstat -f time,hit%,dh%,ph%,mh% 1\n")
|
|
|
+ sys.stderr.write("\n")
|
|
|
+
|
|
|
+ sys.exit(1)
|
|
|
+
|
|
|
+
|
|
|
+def kstat_update():
|
|
|
+ global kstat
|
|
|
+
|
|
|
+ k = [line.strip() for line in open('/proc/spl/kstat/zfs/arcstats')]
|
|
|
+
|
|
|
+ if not k:
|
|
|
+ sys.exit(1)
|
|
|
+
|
|
|
+ del k[0:2]
|
|
|
+ kstat = {}
|
|
|
+
|
|
|
+ for s in k:
|
|
|
+ if not s:
|
|
|
+ continue
|
|
|
+
|
|
|
+ name, unused, value = s.split()
|
|
|
+ kstat[name] = Decimal(value)
|
|
|
+
|
|
|
+
|
|
|
+def snap_stats():
|
|
|
+ global cur
|
|
|
+ global kstat
|
|
|
+
|
|
|
+ prev = copy.deepcopy(cur)
|
|
|
+ kstat_update()
|
|
|
+
|
|
|
+ cur = kstat
|
|
|
+ for key in cur:
|
|
|
+ if re.match(key, "class"):
|
|
|
+ continue
|
|
|
+ if key in prev:
|
|
|
+ d[key] = cur[key] - prev[key]
|
|
|
+ else:
|
|
|
+ d[key] = cur[key]
|
|
|
+
|
|
|
+
|
|
|
+def prettynum(sz, scale, num=0):
|
|
|
+ suffix = [' ', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']
|
|
|
+ index = 0
|
|
|
+ save = 0
|
|
|
+
|
|
|
+ # Special case for date field
|
|
|
+ if scale == -1:
|
|
|
+ return "%s" % num
|
|
|
+
|
|
|
+ # Rounding error, return 0
|
|
|
+ elif num > 0 and num < 1:
|
|
|
+ num = 0
|
|
|
+
|
|
|
+ while num > scale and index < 5:
|
|
|
+ save = num
|
|
|
+ num = num / scale
|
|
|
+ index += 1
|
|
|
+
|
|
|
+ if index == 0:
|
|
|
+ return "%*d" % (sz, num)
|
|
|
+
|
|
|
+ if (save / scale) < 10:
|
|
|
+ return "%*.1f%s" % (sz - 1, num, suffix[index])
|
|
|
+ else:
|
|
|
+ return "%*d%s" % (sz - 1, num, suffix[index])
|
|
|
+
|
|
|
+
|
|
|
+def print_values():
|
|
|
+ global hdr
|
|
|
+ global sep
|
|
|
+ global v
|
|
|
+
|
|
|
+ for col in hdr:
|
|
|
+ sys.stdout.write("%s%s" % (
|
|
|
+ prettynum(cols[col][0], cols[col][1], v[col]),
|
|
|
+ sep
|
|
|
+ ))
|
|
|
+ sys.stdout.write("\n")
|
|
|
+
|
|
|
+
|
|
|
+def print_header():
|
|
|
+ global hdr
|
|
|
+ global sep
|
|
|
+
|
|
|
+ for col in hdr:
|
|
|
+ sys.stdout.write("%*s%s" % (cols[col][0], col, sep))
|
|
|
+ sys.stdout.write("\n")
|
|
|
+
|
|
|
+
|
|
|
+def init():
|
|
|
+ global sint
|
|
|
+ global count
|
|
|
+ global hdr
|
|
|
+ global xhdr
|
|
|
+ global opfile
|
|
|
+ global sep
|
|
|
+ global out
|
|
|
+ global l2exist
|
|
|
+
|
|
|
+ desired_cols = None
|
|
|
+ xflag = False
|
|
|
+ hflag = False
|
|
|
+ vflag = False
|
|
|
+ i = 1
|
|
|
+
|
|
|
+ try:
|
|
|
+ opts, args = getopt.getopt(
|
|
|
+ sys.argv[1:],
|
|
|
+ "xo:hvs:f:",
|
|
|
+ [
|
|
|
+ "extended",
|
|
|
+ "outfile",
|
|
|
+ "help",
|
|
|
+ "verbose",
|
|
|
+ "seperator",
|
|
|
+ "columns"
|
|
|
+ ]
|
|
|
+ )
|
|
|
+
|
|
|
+ except getopt.error, msg:
|
|
|
+ sys.stderr.write(msg)
|
|
|
+ usage()
|
|
|
+
|
|
|
+ for opt, arg in opts:
|
|
|
+ if opt in ('-x', '--extended'):
|
|
|
+ xflag = True
|
|
|
+ if opt in ('-o', '--outfile'):
|
|
|
+ opfile = arg
|
|
|
+ i += 1
|
|
|
+ if opt in ('-h', '--help'):
|
|
|
+ hflag = True
|
|
|
+ if opt in ('-v', '--verbose'):
|
|
|
+ vflag = True
|
|
|
+ if opt in ('-s', '--seperator'):
|
|
|
+ sep = arg
|
|
|
+ i += 1
|
|
|
+ if opt in ('-f', '--columns'):
|
|
|
+ desired_cols = arg
|
|
|
+ i += 1
|
|
|
+ i += 1
|
|
|
+
|
|
|
+ argv = sys.argv[i:]
|
|
|
+ sint = Decimal(argv[0]) if argv else sint
|
|
|
+ count = int(argv[1]) if len(argv) > 1 else count
|
|
|
+
|
|
|
+ if len(argv) > 1:
|
|
|
+ sint = Decimal(argv[0])
|
|
|
+ count = int(argv[1])
|
|
|
+
|
|
|
+ elif len(argv) > 0:
|
|
|
+ sint = Decimal(argv[0])
|
|
|
+ count = 0
|
|
|
+
|
|
|
+ if hflag or (xflag and desired_cols):
|
|
|
+ usage()
|
|
|
+
|
|
|
+ if vflag:
|
|
|
+ detailed_usage()
|
|
|
+
|
|
|
+ if xflag:
|
|
|
+ hdr = xhdr
|
|
|
+
|
|
|
+ # check if L2ARC exists
|
|
|
+ snap_stats()
|
|
|
+ l2_size = cur.get("l2_size")
|
|
|
+ if l2_size:
|
|
|
+ l2exist = True
|
|
|
+
|
|
|
+ if desired_cols:
|
|
|
+ hdr = desired_cols.split(",")
|
|
|
+
|
|
|
+ invalid = []
|
|
|
+ incompat = []
|
|
|
+ for ele in hdr:
|
|
|
+ if ele not in cols:
|
|
|
+ invalid.append(ele)
|
|
|
+ elif not l2exist and ele.startswith("l2"):
|
|
|
+ sys.stdout.write("No L2ARC Here\n%s\n" % ele)
|
|
|
+ incompat.append(ele)
|
|
|
+
|
|
|
+ if len(invalid) > 0:
|
|
|
+ sys.stderr.write("Invalid column definition! -- %s\n" % invalid)
|
|
|
+ usage()
|
|
|
+
|
|
|
+ if len(incompat) > 0:
|
|
|
+ sys.stderr.write("Incompatible field specified! -- %s\n" % (
|
|
|
+ incompat,
|
|
|
+ ))
|
|
|
+ usage()
|
|
|
+
|
|
|
+ if opfile:
|
|
|
+ try:
|
|
|
+ out = open(opfile, "w")
|
|
|
+ sys.stdout = out
|
|
|
+
|
|
|
+ except:
|
|
|
+ sys.stderr.write("Cannot open %s for writing\n" % opfile)
|
|
|
+ sys.exit(1)
|
|
|
+
|
|
|
+
|
|
|
+def calculate():
|
|
|
+ global d
|
|
|
+ global v
|
|
|
+ global l2exist
|
|
|
+
|
|
|
+ v = {}
|
|
|
+ v["time"] = time.strftime("%H:%M:%S", time.localtime())
|
|
|
+ v["hits"] = d["hits"] / sint
|
|
|
+ v["miss"] = d["misses"] / sint
|
|
|
+ v["read"] = v["hits"] + v["miss"]
|
|
|
+ v["hit%"] = 100 * v["hits"] / v["read"] if v["read"] > 0 else 0
|
|
|
+ v["miss%"] = 100 - v["hit%"] if v["read"] > 0 else 0
|
|
|
+
|
|
|
+ v["dhit"] = (d["demand_data_hits"] + d["demand_metadata_hits"]) / sint
|
|
|
+ v["dmis"] = (d["demand_data_misses"] + d["demand_metadata_misses"]) / sint
|
|
|
+
|
|
|
+ v["dread"] = v["dhit"] + v["dmis"]
|
|
|
+ v["dh%"] = 100 * v["dhit"] / v["dread"] if v["dread"] > 0 else 0
|
|
|
+ v["dm%"] = 100 - v["dh%"] if v["dread"] > 0 else 0
|
|
|
+
|
|
|
+ v["phit"] = (d["prefetch_data_hits"] + d["prefetch_metadata_hits"]) / sint
|
|
|
+ v["pmis"] = (d["prefetch_data_misses"] +
|
|
|
+ d["prefetch_metadata_misses"]) / sint
|
|
|
+
|
|
|
+ v["pread"] = v["phit"] + v["pmis"]
|
|
|
+ v["ph%"] = 100 * v["phit"] / v["pread"] if v["pread"] > 0 else 0
|
|
|
+ v["pm%"] = 100 - v["ph%"] if v["pread"] > 0 else 0
|
|
|
+
|
|
|
+ v["mhit"] = (d["prefetch_metadata_hits"] +
|
|
|
+ d["demand_metadata_hits"]) / sint
|
|
|
+ v["mmis"] = (d["prefetch_metadata_misses"] +
|
|
|
+ d["demand_metadata_misses"]) / sint
|
|
|
+
|
|
|
+ v["mread"] = v["mhit"] + v["mmis"]
|
|
|
+ v["mh%"] = 100 * v["mhit"] / v["mread"] if v["mread"] > 0 else 0
|
|
|
+ v["mm%"] = 100 - v["mh%"] if v["mread"] > 0 else 0
|
|
|
+
|
|
|
+ v["arcsz"] = cur["size"]
|
|
|
+ v["c"] = cur["c"]
|
|
|
+ v["mfu"] = d["mfu_hits"] / sint
|
|
|
+ v["mru"] = d["mru_hits"] / sint
|
|
|
+ v["mrug"] = d["mru_ghost_hits"] / sint
|
|
|
+ v["mfug"] = d["mfu_ghost_hits"] / sint
|
|
|
+ v["eskip"] = d["evict_skip"] / sint
|
|
|
+ v["rmis"] = d["recycle_miss"] / sint
|
|
|
+ v["mtxmis"] = d["mutex_miss"] / sint
|
|
|
+
|
|
|
+ if l2exist:
|
|
|
+ v["l2hits"] = d["l2_hits"] / sint
|
|
|
+ v["l2miss"] = d["l2_misses"] / sint
|
|
|
+ v["l2read"] = v["l2hits"] + v["l2miss"]
|
|
|
+ v["l2hit%"] = 100 * v["l2hits"] / v["l2read"] if v["l2read"] > 0 else 0
|
|
|
+
|
|
|
+ v["l2miss%"] = 100 - v["l2hit%"] if v["l2read"] > 0 else 0
|
|
|
+ v["l2size"] = cur["l2_size"]
|
|
|
+ v["l2bytes"] = d["l2_read_bytes"] / sint
|
|
|
+
|
|
|
+
|
|
|
+def sighandler(*args):
|
|
|
+ sys.exit(0)
|
|
|
+
|
|
|
+
|
|
|
+def main():
|
|
|
+ global sint
|
|
|
+ global count
|
|
|
+ global hdr_intr
|
|
|
+
|
|
|
+ i = 0
|
|
|
+ count_flag = 0
|
|
|
+
|
|
|
+ init()
|
|
|
+ if count > 0:
|
|
|
+ count_flag = 1
|
|
|
+
|
|
|
+ signal(SIGINT, sighandler)
|
|
|
+ while True:
|
|
|
+ if i == 0:
|
|
|
+ print_header()
|
|
|
+
|
|
|
+ snap_stats()
|
|
|
+ calculate()
|
|
|
+ print_values()
|
|
|
+
|
|
|
+ if count_flag == 1:
|
|
|
+ if count <= 1:
|
|
|
+ break
|
|
|
+ count -= 1
|
|
|
+
|
|
|
+ i = 0 if i == hdr_intr else i + 1
|
|
|
+ time.sleep(sint)
|
|
|
+
|
|
|
+ if out:
|
|
|
+ out.close()
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == '__main__':
|
|
|
+ main()
|
|
|
diff --git a/cmd/mount_zfs/mount_zfs.c b/cmd/mount_zfs/mount_zfs.c
|
|
|
index cd27314..4db33ed 100644
|
|
|
--- a/cmd/mount_zfs/mount_zfs.c
|
|
|
+++ b/cmd/mount_zfs/mount_zfs.c
|
|
|
@@ -131,15 +131,6 @@ parse_option(char *mntopt, unsigned long *mntflags,
|
|
|
if (strncmp(name, opt->name, strlen(name)) == 0) {
|
|
|
*mntflags |= opt->mntmask;
|
|
|
*zfsflags |= opt->zfsmask;
|
|
|
-
|
|
|
- /* MS_USERS implies default user options */
|
|
|
- if (opt->mntmask & (MS_USERS))
|
|
|
- *mntflags |= (MS_NOEXEC|MS_NOSUID|MS_NODEV);
|
|
|
-
|
|
|
- /* MS_OWNER|MS_GROUP imply default owner options */
|
|
|
- if (opt->mntmask & (MS_OWNER | MS_GROUP))
|
|
|
- *mntflags |= (MS_NOSUID|MS_NODEV);
|
|
|
-
|
|
|
error = 0;
|
|
|
goto out;
|
|
|
}
|
|
|
@@ -220,17 +211,57 @@ out:
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
- * If a file or directory in your current working directory is named
|
|
|
- * 'dataset' then mount(8) will prepend your current working directory
|
|
|
- * to dataset. The is no way to prevent this behavior so we simply
|
|
|
- * check for it and strip the prepended patch when it is added.
|
|
|
+ * Return the pool/dataset to mount given the name passed to mount. This
|
|
|
+ * is expected to be of the form pool/dataset, however may also refer to
|
|
|
+ * a block device if that device contains a valid zfs label.
|
|
|
*/
|
|
|
static char *
|
|
|
parse_dataset(char *dataset)
|
|
|
{
|
|
|
char cwd[PATH_MAX];
|
|
|
+ struct stat64 statbuf;
|
|
|
+ int error;
|
|
|
int len;
|
|
|
|
|
|
+ /*
|
|
|
+ * We expect a pool/dataset to be provided, however if we're
|
|
|
+ * given a device which is a member of a zpool we attempt to
|
|
|
+ * extract the pool name stored in the label. Given the pool
|
|
|
+ * name we can mount the root dataset.
|
|
|
+ */
|
|
|
+ error = stat64(dataset, &statbuf);
|
|
|
+ if (error == 0) {
|
|
|
+ nvlist_t *config;
|
|
|
+ char *name;
|
|
|
+ int fd;
|
|
|
+
|
|
|
+ fd = open(dataset, O_RDONLY);
|
|
|
+ if (fd < 0)
|
|
|
+ goto out;
|
|
|
+
|
|
|
+ error = zpool_read_label(fd, &config);
|
|
|
+ (void) close(fd);
|
|
|
+ if (error)
|
|
|
+ goto out;
|
|
|
+
|
|
|
+ error = nvlist_lookup_string(config,
|
|
|
+ ZPOOL_CONFIG_POOL_NAME, &name);
|
|
|
+ if (error) {
|
|
|
+ nvlist_free(config);
|
|
|
+ } else {
|
|
|
+ dataset = strdup(name);
|
|
|
+ nvlist_free(config);
|
|
|
+ return (dataset);
|
|
|
+ }
|
|
|
+ }
|
|
|
+out:
|
|
|
+ /*
|
|
|
+ * If a file or directory in your current working directory is
|
|
|
+ * named 'dataset' then mount(8) will prepend your current working
|
|
|
+ * directory to the dataset. There is no way to prevent this
|
|
|
+ * behavior so we simply check for it and strip the prepended
|
|
|
+ * patch when it is added.
|
|
|
+ */
|
|
|
if (getcwd(cwd, PATH_MAX) == NULL)
|
|
|
return (dataset);
|
|
|
|
|
|
diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c
|
|
|
index 70b4ba2..82491ad 100644
|
|
|
--- a/cmd/zdb/zdb.c
|
|
|
+++ b/cmd/zdb/zdb.c
|
|
|
@@ -86,6 +86,7 @@ extern void dump_intent_log(zilog_t *);
|
|
|
uint64_t *zopt_object = NULL;
|
|
|
int zopt_objects = 0;
|
|
|
libzfs_handle_t *g_zfs;
|
|
|
+uint64_t max_inflight = 200;
|
|
|
|
|
|
/*
|
|
|
* These libumem hooks provide a reasonable set of defaults for the allocator's
|
|
|
@@ -108,13 +109,14 @@ usage(void)
|
|
|
{
|
|
|
(void) fprintf(stderr,
|
|
|
"Usage: %s [-CumdibcsDvhLXFPA] [-t txg] [-e [-p path...]] "
|
|
|
- "poolname [object...]\n"
|
|
|
- " %s [-divPA] [-e -p path...] dataset [object...]\n"
|
|
|
- " %s -m [-LXFPA] [-t txg] [-e [-p path...]] "
|
|
|
+ "[-U config] [-M inflight I/Os] poolname [object...]\n"
|
|
|
+ " %s [-divPA] [-e -p path...] [-U config] dataset "
|
|
|
+ "[object...]\n"
|
|
|
+ " %s -m [-LXFPA] [-t txg] [-e [-p path...]] [-U config] "
|
|
|
"poolname [vdev [metaslab...]]\n"
|
|
|
" %s -R [-A] [-e [-p path...]] poolname "
|
|
|
"vdev:offset:size[:flags]\n"
|
|
|
- " %s -S [-PA] [-e [-p path...]] poolname\n"
|
|
|
+ " %s -S [-PA] [-e [-p path...]] [-U config] poolname\n"
|
|
|
" %s -l [-uA] device\n"
|
|
|
" %s -C [-A] [-U config]\n\n",
|
|
|
cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname);
|
|
|
@@ -161,6 +163,8 @@ usage(void)
|
|
|
(void) fprintf(stderr, " -P print numbers in parseable form\n");
|
|
|
(void) fprintf(stderr, " -t <txg> -- highest txg to use when "
|
|
|
"searching for uberblocks\n");
|
|
|
+ (void) fprintf(stderr, " -M <number of inflight I/Os> -- "
|
|
|
+ "specify the maximum number of checksumming I/Os [default is 200]\n");
|
|
|
(void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
|
|
|
"to make only that option verbose\n");
|
|
|
(void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
|
|
|
@@ -544,7 +548,7 @@ static void
|
|
|
dump_metaslab_stats(metaslab_t *msp)
|
|
|
{
|
|
|
char maxbuf[32];
|
|
|
- space_map_t *sm = &msp->ms_map;
|
|
|
+ space_map_t *sm = msp->ms_map;
|
|
|
avl_tree_t *t = sm->sm_pp_root;
|
|
|
int free_pct = sm->sm_space * 100 / sm->sm_size;
|
|
|
|
|
|
@@ -560,7 +564,7 @@ dump_metaslab(metaslab_t *msp)
|
|
|
{
|
|
|
vdev_t *vd = msp->ms_group->mg_vd;
|
|
|
spa_t *spa = vd->vdev_spa;
|
|
|
- space_map_t *sm = &msp->ms_map;
|
|
|
+ space_map_t *sm = msp->ms_map;
|
|
|
space_map_obj_t *smo = &msp->ms_smo;
|
|
|
char freebuf[32];
|
|
|
|
|
|
@@ -992,7 +996,7 @@ visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
|
|
|
arc_buf_t *buf;
|
|
|
uint64_t fill = 0;
|
|
|
|
|
|
- err = arc_read_nolock(NULL, spa, bp, arc_getbuf_func, &buf,
|
|
|
+ err = arc_read(NULL, spa, bp, arc_getbuf_func, &buf,
|
|
|
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
|
|
|
if (err)
|
|
|
return (err);
|
|
|
@@ -1323,6 +1327,61 @@ dump_uidgid(objset_t *os, uint64_t uid, uint64_t gid)
|
|
|
print_idstr(gid, "gid");
|
|
|
}
|
|
|
|
|
|
+static void
|
|
|
+dump_znode_sa_xattr(sa_handle_t *hdl)
|
|
|
+{
|
|
|
+ nvlist_t *sa_xattr;
|
|
|
+ nvpair_t *elem = NULL;
|
|
|
+ int sa_xattr_size = 0;
|
|
|
+ int sa_xattr_entries = 0;
|
|
|
+ int error;
|
|
|
+ char *sa_xattr_packed;
|
|
|
+
|
|
|
+ error = sa_size(hdl, sa_attr_table[ZPL_DXATTR], &sa_xattr_size);
|
|
|
+ if (error || sa_xattr_size == 0)
|
|
|
+ return;
|
|
|
+
|
|
|
+ sa_xattr_packed = malloc(sa_xattr_size);
|
|
|
+ if (sa_xattr_packed == NULL)
|
|
|
+ return;
|
|
|
+
|
|
|
+ error = sa_lookup(hdl, sa_attr_table[ZPL_DXATTR],
|
|
|
+ sa_xattr_packed, sa_xattr_size);
|
|
|
+ if (error) {
|
|
|
+ free(sa_xattr_packed);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ error = nvlist_unpack(sa_xattr_packed, sa_xattr_size, &sa_xattr, 0);
|
|
|
+ if (error) {
|
|
|
+ free(sa_xattr_packed);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ while ((elem = nvlist_next_nvpair(sa_xattr, elem)) != NULL)
|
|
|
+ sa_xattr_entries++;
|
|
|
+
|
|
|
+ (void) printf("\tSA xattrs: %d bytes, %d entries\n\n",
|
|
|
+ sa_xattr_size, sa_xattr_entries);
|
|
|
+ while ((elem = nvlist_next_nvpair(sa_xattr, elem)) != NULL) {
|
|
|
+ uchar_t *value;
|
|
|
+ uint_t cnt, idx;
|
|
|
+
|
|
|
+ (void) printf("\t\t%s = ", nvpair_name(elem));
|
|
|
+ nvpair_value_byte_array(elem, &value, &cnt);
|
|
|
+ for (idx = 0 ; idx < cnt ; ++idx) {
|
|
|
+ if (isprint(value[idx]))
|
|
|
+ (void) putchar(value[idx]);
|
|
|
+ else
|
|
|
+ (void) printf("\\%3.3o", value[idx]);
|
|
|
+ }
|
|
|
+ (void) putchar('\n');
|
|
|
+ }
|
|
|
+
|
|
|
+ nvlist_free(sa_xattr);
|
|
|
+ free(sa_xattr_packed);
|
|
|
+}
|
|
|
+
|
|
|
/*ARGSUSED*/
|
|
|
static void
|
|
|
dump_znode(objset_t *os, uint64_t object, void *data, size_t size)
|
|
|
@@ -1423,6 +1482,7 @@ dump_znode(objset_t *os, uint64_t object, void *data, size_t size)
|
|
|
if (sa_lookup(hdl, sa_attr_table[ZPL_RDEV], &rdev,
|
|
|
sizeof (uint64_t)) == 0)
|
|
|
(void) printf("\trdev 0x%016llx\n", (u_longlong_t)rdev);
|
|
|
+ dump_znode_sa_xattr(hdl);
|
|
|
sa_handle_destroy(hdl);
|
|
|
}
|
|
|
|
|
|
@@ -2005,9 +2065,47 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
|
|
|
bp, NULL, NULL, ZIO_FLAG_CANFAIL)), ==, 0);
|
|
|
}
|
|
|
|
|
|
-/* ARGSUSED */
|
|
|
+static void
|
|
|
+zdb_blkptr_done(zio_t *zio)
|
|
|
+{
|
|
|
+ spa_t *spa = zio->io_spa;
|
|
|
+ blkptr_t *bp = zio->io_bp;
|
|
|
+ int ioerr = zio->io_error;
|
|
|
+ zdb_cb_t *zcb = zio->io_private;
|
|
|
+ zbookmark_t *zb = &zio->io_bookmark;
|
|
|
+
|
|
|
+ zio_data_buf_free(zio->io_data, zio->io_size);
|
|
|
+
|
|
|
+ mutex_enter(&spa->spa_scrub_lock);
|
|
|
+ spa->spa_scrub_inflight--;
|
|
|
+ cv_broadcast(&spa->spa_scrub_io_cv);
|
|
|
+
|
|
|
+ if (ioerr && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
|
|
|
+ char blkbuf[BP_SPRINTF_LEN];
|
|
|
+
|
|
|
+ zcb->zcb_haderrors = 1;
|
|
|
+ zcb->zcb_errors[ioerr]++;
|
|
|
+
|
|
|
+ if (dump_opt['b'] >= 2)
|
|
|
+ sprintf_blkptr(blkbuf, bp);
|
|
|
+ else
|
|
|
+ blkbuf[0] = '\0';
|
|
|
+
|
|
|
+ (void) printf("zdb_blkptr_cb: "
|
|
|
+ "Got error %d reading "
|
|
|
+ "<%llu, %llu, %lld, %llx> %s -- skipping\n",
|
|
|
+ ioerr,
|
|
|
+ (u_longlong_t)zb->zb_objset,
|
|
|
+ (u_longlong_t)zb->zb_object,
|
|
|
+ (u_longlong_t)zb->zb_level,
|
|
|
+ (u_longlong_t)zb->zb_blkid,
|
|
|
+ blkbuf);
|
|
|
+ }
|
|
|
+ mutex_exit(&spa->spa_scrub_lock);
|
|
|
+}
|
|
|
+
|
|
|
static int
|
|
|
-zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
|
|
|
+zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
|
|
const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
|
|
|
{
|
|
|
zdb_cb_t *zcb = arg;
|
|
|
@@ -2026,39 +2124,23 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
|
|
|
is_metadata = (BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type));
|
|
|
|
|
|
if (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata)) {
|
|
|
- int ioerr;
|
|
|
size_t size = BP_GET_PSIZE(bp);
|
|
|
- void *data = malloc(size);
|
|
|
+ void *data = zio_data_buf_alloc(size);
|
|
|
int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW;
|
|
|
|
|
|
/* If it's an intent log block, failure is expected. */
|
|
|
if (zb->zb_level == ZB_ZIL_LEVEL)
|
|
|
flags |= ZIO_FLAG_SPECULATIVE;
|
|
|
|
|
|
- ioerr = zio_wait(zio_read(NULL, spa, bp, data, size,
|
|
|
- NULL, NULL, ZIO_PRIORITY_ASYNC_READ, flags, zb));
|
|
|
-
|
|
|
- free(data);
|
|
|
+ mutex_enter(&spa->spa_scrub_lock);
|
|
|
+ while (spa->spa_scrub_inflight > max_inflight)
|
|
|
+ cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
|
|
|
+ spa->spa_scrub_inflight++;
|
|
|
+ mutex_exit(&spa->spa_scrub_lock);
|
|
|
|
|
|
- if (ioerr && !(flags & ZIO_FLAG_SPECULATIVE)) {
|
|
|
- zcb->zcb_haderrors = 1;
|
|
|
- zcb->zcb_errors[ioerr]++;
|
|
|
+ zio_nowait(zio_read(NULL, spa, bp, data, size,
|
|
|
+ zdb_blkptr_done, zcb, ZIO_PRIORITY_ASYNC_READ, flags, zb));
|
|
|
|
|
|
- if (dump_opt['b'] >= 2)
|
|
|
- sprintf_blkptr(blkbuf, bp);
|
|
|
- else
|
|
|
- blkbuf[0] = '\0';
|
|
|
-
|
|
|
- (void) printf("zdb_blkptr_cb: "
|
|
|
- "Got error %d reading "
|
|
|
- "<%llu, %llu, %lld, %llx> %s -- skipping\n",
|
|
|
- ioerr,
|
|
|
- (u_longlong_t)zb->zb_objset,
|
|
|
- (u_longlong_t)zb->zb_object,
|
|
|
- (u_longlong_t)zb->zb_level,
|
|
|
- (u_longlong_t)zb->zb_blkid,
|
|
|
- blkbuf);
|
|
|
- }
|
|
|
}
|
|
|
|
|
|
zcb->zcb_readfails = 0;
|
|
|
@@ -2167,11 +2249,11 @@ zdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
|
|
|
for (m = 0; m < vd->vdev_ms_count; m++) {
|
|
|
metaslab_t *msp = vd->vdev_ms[m];
|
|
|
mutex_enter(&msp->ms_lock);
|
|
|
- space_map_unload(&msp->ms_map);
|
|
|
- VERIFY(space_map_load(&msp->ms_map,
|
|
|
+ space_map_unload(msp->ms_map);
|
|
|
+ VERIFY(space_map_load(msp->ms_map,
|
|
|
&zdb_space_map_ops, SM_ALLOC, &msp->ms_smo,
|
|
|
spa->spa_meta_objset) == 0);
|
|
|
- msp->ms_map.sm_ppd = vd;
|
|
|
+ msp->ms_map->sm_ppd = vd;
|
|
|
mutex_exit(&msp->ms_lock);
|
|
|
}
|
|
|
}
|
|
|
@@ -2196,7 +2278,7 @@ zdb_leak_fini(spa_t *spa)
|
|
|
for (m = 0; m < vd->vdev_ms_count; m++) {
|
|
|
metaslab_t *msp = vd->vdev_ms[m];
|
|
|
mutex_enter(&msp->ms_lock);
|
|
|
- space_map_unload(&msp->ms_map);
|
|
|
+ space_map_unload(msp->ms_map);
|
|
|
mutex_exit(&msp->ms_lock);
|
|
|
}
|
|
|
}
|
|
|
@@ -2266,6 +2348,18 @@ dump_block_stats(spa_t *spa)
|
|
|
|
|
|
zcb.zcb_haderrors |= traverse_pool(spa, 0, flags, zdb_blkptr_cb, &zcb);
|
|
|
|
|
|
+ /*
|
|
|
+ * If we've traversed the data blocks then we need to wait for those
|
|
|
+ * I/Os to complete. We leverage "The Godfather" zio to wait on
|
|
|
+ * all async I/Os to complete.
|
|
|
+ */
|
|
|
+ if (dump_opt['c']) {
|
|
|
+ (void) zio_wait(spa->spa_async_zio_root);
|
|
|
+ spa->spa_async_zio_root = zio_root(spa, NULL, NULL,
|
|
|
+ ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE |
|
|
|
+ ZIO_FLAG_GODFATHER);
|
|
|
+ }
|
|
|
+
|
|
|
if (zcb.zcb_haderrors) {
|
|
|
(void) printf("\nError counts:\n\n");
|
|
|
(void) printf("\t%5s %s\n", "errno", "count");
|
|
|
@@ -2418,7 +2512,7 @@ typedef struct zdb_ddt_entry {
|
|
|
/* ARGSUSED */
|
|
|
static int
|
|
|
zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
|
|
- arc_buf_t *pbuf, const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
|
|
|
+ const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
|
|
|
{
|
|
|
avl_tree_t *t = arg;
|
|
|
avl_index_t where;
|
|
|
@@ -2806,7 +2900,7 @@ zdb_read_block(char *thing, spa_t *spa)
|
|
|
psize = size;
|
|
|
lsize = size;
|
|
|
|
|
|
- pbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
|
|
|
+ pbuf = umem_alloc_aligned(SPA_MAXBLOCKSIZE, 512, UMEM_NOFAIL);
|
|
|
lbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
|
|
|
|
|
|
BP_ZERO(bp);
|
|
|
@@ -3020,13 +3114,23 @@ main(int argc, char **argv)
|
|
|
nvlist_t *policy = NULL;
|
|
|
uint64_t max_txg = UINT64_MAX;
|
|
|
int rewind = ZPOOL_NEVER_REWIND;
|
|
|
+ char *spa_config_path_env;
|
|
|
|
|
|
(void) setrlimit(RLIMIT_NOFILE, &rl);
|
|
|
(void) enable_extended_FILE_stdio(-1, -1);
|
|
|
|
|
|
dprintf_setup(&argc, argv);
|
|
|
|
|
|
- while ((c = getopt(argc, argv, "bcdhilmsuCDRSAFLXevp:t:U:P")) != -1) {
|
|
|
+ /*
|
|
|
+ * If there is an environment variable SPA_CONFIG_PATH it overrides
|
|
|
+ * default spa_config_path setting. If -U flag is specified it will
|
|
|
+ * override this environment variable settings once again.
|
|
|
+ */
|
|
|
+ spa_config_path_env = getenv("SPA_CONFIG_PATH");
|
|
|
+ if (spa_config_path_env != NULL)
|
|
|
+ spa_config_path = spa_config_path_env;
|
|
|
+
|
|
|
+ while ((c = getopt(argc, argv, "bcdhilmM:suCDRSAFLXevp:t:U:P")) != -1) {
|
|
|
switch (c) {
|
|
|
case 'b':
|
|
|
case 'c':
|
|
|
@@ -3055,6 +3159,15 @@ main(int argc, char **argv)
|
|
|
case 'v':
|
|
|
verbose++;
|
|
|
break;
|
|
|
+ case 'M':
|
|
|
+ max_inflight = strtoull(optarg, NULL, 0);
|
|
|
+ if (max_inflight == 0) {
|
|
|
+ (void) fprintf(stderr, "maximum number "
|
|
|
+ "of inflight I/Os must be greater "
|
|
|
+ "than 0\n");
|
|
|
+ usage();
|
|
|
+ }
|
|
|
+ break;
|
|
|
case 'p':
|
|
|
if (searchdirs == NULL) {
|
|
|
searchdirs = umem_alloc(sizeof (char *),
|
|
|
diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c
|
|
|
index 818ab91..9671c0c 100644
|
|
|
--- a/cmd/zfs/zfs_main.c
|
|
|
+++ b/cmd/zfs/zfs_main.c
|
|
|
@@ -5383,7 +5383,7 @@ get_one_dataset(zfs_handle_t *zhp, void *data)
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
- * Interate over any nested datasets.
|
|
|
+ * Iterate over any nested datasets.
|
|
|
*/
|
|
|
if (zfs_iter_filesystems(zhp, get_one_dataset, data) != 0) {
|
|
|
zfs_close(zhp);
|
|
|
diff --git a/cmd/zinject/translate.c b/cmd/zinject/translate.c
|
|
|
index 1607866..fc16127 100644
|
|
|
--- a/cmd/zinject/translate.c
|
|
|
+++ b/cmd/zinject/translate.c
|
|
|
@@ -20,6 +20,7 @@
|
|
|
*/
|
|
|
/*
|
|
|
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
|
|
|
+ * Copyright (c) 2012 by Delphix. All rights reserved.
|
|
|
*/
|
|
|
|
|
|
#include <libzfs.h>
|
|
|
@@ -476,6 +477,20 @@ translate_device(const char *pool, const char *device, err_type_t label_type,
|
|
|
&record->zi_guid) == 0);
|
|
|
}
|
|
|
|
|
|
+ /*
|
|
|
+ * Device faults can take on three different forms:
|
|
|
+ * 1). delayed or hanging I/O
|
|
|
+ * 2). zfs label faults
|
|
|
+ * 3). generic disk faults
|
|
|
+ */
|
|
|
+ if (record->zi_timer != 0) {
|
|
|
+ record->zi_cmd = ZINJECT_DELAY_IO;
|
|
|
+ } else if (label_type != TYPE_INVAL) {
|
|
|
+ record->zi_cmd = ZINJECT_LABEL_FAULT;
|
|
|
+ } else {
|
|
|
+ record->zi_cmd = ZINJECT_DEVICE_FAULT;
|
|
|
+ }
|
|
|
+
|
|
|
switch (label_type) {
|
|
|
default:
|
|
|
break;
|
|
|
diff --git a/cmd/zinject/zinject.c b/cmd/zinject/zinject.c
|
|
|
index d584ead..13d067d 100644
|
|
|
--- a/cmd/zinject/zinject.c
|
|
|
+++ b/cmd/zinject/zinject.c
|
|
|
@@ -20,6 +20,7 @@
|
|
|
*/
|
|
|
/*
|
|
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
|
|
+ * Copyright (c) 2012 by Delphix. All rights reserved.
|
|
|
*/
|
|
|
|
|
|
/*
|
|
|
@@ -235,8 +236,8 @@ usage(void)
|
|
|
"\t\t'pad1', or 'pad2'.\n"
|
|
|
"\t\t'errno' can be 'nxio' (the default), 'io', or 'dtl'.\n"
|
|
|
"\n"
|
|
|
- "\tzinject -d device -A <degrade|fault> pool\n"
|
|
|
- "\t\tPerform a specific action on a particular device\n"
|
|
|
+ "\tzinject -d device -A <degrade|fault> -D <delay secs> pool\n"
|
|
|
+ "\t\tPerform a specific action on a particular device.\n"
|
|
|
"\n"
|
|
|
"\tzinject -I [-s <seconds> | -g <txgs>] pool\n"
|
|
|
"\t\tCause the pool to stop writing blocks yet not\n"
|
|
|
@@ -573,6 +574,16 @@ main(int argc, char **argv)
|
|
|
int ret;
|
|
|
int flags = 0;
|
|
|
|
|
|
+ if ((g_zfs = libzfs_init()) == NULL)
|
|
|
+ return (1);
|
|
|
+
|
|
|
+ libzfs_print_on_error(g_zfs, B_TRUE);
|
|
|
+
|
|
|
+ if ((zfs_fd = open(ZFS_DEV, O_RDWR)) < 0) {
|
|
|
+ (void) fprintf(stderr, "failed to open ZFS device\n");
|
|
|
+ return (1);
|
|
|
+ }
|
|
|
+
|
|
|
if (argc == 1) {
|
|
|
/*
|
|
|
* No arguments. Print the available handlers. If there are no
|
|
|
@@ -589,7 +600,7 @@ main(int argc, char **argv)
|
|
|
}
|
|
|
|
|
|
while ((c = getopt(argc, argv,
|
|
|
- ":aA:b:d:f:Fg:qhIc:t:T:l:mr:s:e:uL:p:")) != -1) {
|
|
|
+ ":aA:b:d:D:f:Fg:qhIc:t:T:l:mr:s:e:uL:p:")) != -1) {
|
|
|
switch (c) {
|
|
|
case 'a':
|
|
|
flags |= ZINJECT_FLUSH_ARC;
|
|
|
@@ -615,6 +626,16 @@ main(int argc, char **argv)
|
|
|
case 'd':
|
|
|
device = optarg;
|
|
|
break;
|
|
|
+ case 'D':
|
|
|
+ errno = 0;
|
|
|
+ record.zi_timer = strtoull(optarg, &end, 10);
|
|
|
+ if (errno != 0 || *end != '\0') {
|
|
|
+ (void) fprintf(stderr, "invalid i/o delay "
|
|
|
+ "value: '%s'\n", optarg);
|
|
|
+ usage();
|
|
|
+ return (1);
|
|
|
+ }
|
|
|
+ break;
|
|
|
case 'e':
|
|
|
if (strcasecmp(optarg, "io") == 0) {
|
|
|
error = EIO;
|
|
|
@@ -679,6 +700,7 @@ main(int argc, char **argv)
|
|
|
case 'p':
|
|
|
(void) strlcpy(record.zi_func, optarg,
|
|
|
sizeof (record.zi_func));
|
|
|
+ record.zi_cmd = ZINJECT_PANIC;
|
|
|
break;
|
|
|
case 'q':
|
|
|
quiet = 1;
|
|
|
@@ -752,23 +774,15 @@ main(int argc, char **argv)
|
|
|
argc -= optind;
|
|
|
argv += optind;
|
|
|
|
|
|
- if ((g_zfs = libzfs_init()) == NULL)
|
|
|
- return (1);
|
|
|
-
|
|
|
- libzfs_print_on_error(g_zfs, B_TRUE);
|
|
|
-
|
|
|
- if ((zfs_fd = open(ZFS_DEV, O_RDWR)) < 0) {
|
|
|
- (void) fprintf(stderr, "failed to open ZFS device\n");
|
|
|
- return (1);
|
|
|
- }
|
|
|
+ if (record.zi_duration != 0)
|
|
|
+ record.zi_cmd = ZINJECT_IGNORED_WRITES;
|
|
|
|
|
|
if (cancel != NULL) {
|
|
|
/*
|
|
|
* '-c' is invalid with any other options.
|
|
|
*/
|
|
|
if (raw != NULL || range != NULL || type != TYPE_INVAL ||
|
|
|
- level != 0 || record.zi_func[0] != '\0' ||
|
|
|
- record.zi_duration != 0) {
|
|
|
+ level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED) {
|
|
|
(void) fprintf(stderr, "cancel (-c) incompatible with "
|
|
|
"any other options\n");
|
|
|
usage();
|
|
|
@@ -800,8 +814,7 @@ main(int argc, char **argv)
|
|
|
* for doing injection, so handle it separately here.
|
|
|
*/
|
|
|
if (raw != NULL || range != NULL || type != TYPE_INVAL ||
|
|
|
- level != 0 || record.zi_func[0] != '\0' ||
|
|
|
- record.zi_duration != 0) {
|
|
|
+ level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED) {
|
|
|
(void) fprintf(stderr, "device (-d) incompatible with "
|
|
|
"data error injection\n");
|
|
|
usage();
|
|
|
@@ -835,7 +848,7 @@ main(int argc, char **argv)
|
|
|
|
|
|
} else if (raw != NULL) {
|
|
|
if (range != NULL || type != TYPE_INVAL || level != 0 ||
|
|
|
- record.zi_func[0] != '\0' || record.zi_duration != 0) {
|
|
|
+ record.zi_cmd != ZINJECT_UNINITIALIZED) {
|
|
|
(void) fprintf(stderr, "raw (-b) format with "
|
|
|
"any other options\n");
|
|
|
usage();
|
|
|
@@ -858,13 +871,14 @@ main(int argc, char **argv)
|
|
|
return (1);
|
|
|
}
|
|
|
|
|
|
+ record.zi_cmd = ZINJECT_DATA_FAULT;
|
|
|
if (translate_raw(raw, &record) != 0)
|
|
|
return (1);
|
|
|
if (!error)
|
|
|
error = EIO;
|
|
|
- } else if (record.zi_func[0] != '\0') {
|
|
|
+ } else if (record.zi_cmd == ZINJECT_PANIC) {
|
|
|
if (raw != NULL || range != NULL || type != TYPE_INVAL ||
|
|
|
- level != 0 || device != NULL || record.zi_duration != 0) {
|
|
|
+ level != 0 || device != NULL) {
|
|
|
(void) fprintf(stderr, "panic (-p) incompatible with "
|
|
|
"other options\n");
|
|
|
usage();
|
|
|
@@ -882,7 +896,7 @@ main(int argc, char **argv)
|
|
|
if (argv[1] != NULL)
|
|
|
record.zi_type = atoi(argv[1]);
|
|
|
dataset[0] = '\0';
|
|
|
- } else if (record.zi_duration != 0) {
|
|
|
+ } else if (record.zi_cmd == ZINJECT_IGNORED_WRITES) {
|
|
|
if (nowrites == 0) {
|
|
|
(void) fprintf(stderr, "-s or -g meaningless "
|
|
|
"without -I (ignore writes)\n");
|
|
|
@@ -936,6 +950,7 @@ main(int argc, char **argv)
|
|
|
return (1);
|
|
|
}
|
|
|
|
|
|
+ record.zi_cmd = ZINJECT_DATA_FAULT;
|
|
|
if (translate_record(type, argv[0], range, level, &record, pool,
|
|
|
dataset) != 0)
|
|
|
return (1);
|
|
|
diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c
|
|
|
index 3200698..b96fbe4 100644
|
|
|
--- a/cmd/zpool/zpool_main.c
|
|
|
+++ b/cmd/zpool/zpool_main.c
|
|
|
@@ -63,6 +63,7 @@ static int zpool_do_destroy(int, char **);
|
|
|
|
|
|
static int zpool_do_add(int, char **);
|
|
|
static int zpool_do_remove(int, char **);
|
|
|
+static int zpool_do_labelclear(int, char **);
|
|
|
|
|
|
static int zpool_do_list(int, char **);
|
|
|
static int zpool_do_iostat(int, char **);
|
|
|
@@ -123,6 +124,7 @@ typedef enum {
|
|
|
HELP_HISTORY,
|
|
|
HELP_IMPORT,
|
|
|
HELP_IOSTAT,
|
|
|
+ HELP_LABELCLEAR,
|
|
|
HELP_LIST,
|
|
|
HELP_OFFLINE,
|
|
|
HELP_ONLINE,
|
|
|
@@ -162,6 +164,8 @@ static zpool_command_t command_table[] = {
|
|
|
{ "add", zpool_do_add, HELP_ADD },
|
|
|
{ "remove", zpool_do_remove, HELP_REMOVE },
|
|
|
{ NULL },
|
|
|
+ { "labelclear", zpool_do_labelclear, HELP_LABELCLEAR },
|
|
|
+ { NULL },
|
|
|
{ "list", zpool_do_list, HELP_LIST },
|
|
|
{ "iostat", zpool_do_iostat, HELP_IOSTAT },
|
|
|
{ "status", zpool_do_status, HELP_STATUS },
|
|
|
@@ -233,8 +237,10 @@ get_usage(zpool_help_t idx) {
|
|
|
case HELP_IOSTAT:
|
|
|
return (gettext("\tiostat [-v] [-T d|u] [pool] ... [interval "
|
|
|
"[count]]\n"));
|
|
|
+ case HELP_LABELCLEAR:
|
|
|
+ return (gettext("\tlabelclear [-f] <vdev>\n"));
|
|
|
case HELP_LIST:
|
|
|
- return (gettext("\tlist [-H] [-o property[,...]] "
|
|
|
+ return (gettext("\tlist [-Hv] [-o property[,...]] "
|
|
|
"[-T d|u] [pool] ... [interval [count]]\n"));
|
|
|
case HELP_OFFLINE:
|
|
|
return (gettext("\toffline [-t] <pool> <device> ...\n"));
|
|
|
@@ -246,7 +252,7 @@ get_usage(zpool_help_t idx) {
|
|
|
case HELP_REMOVE:
|
|
|
return (gettext("\tremove <pool> <device> ...\n"));
|
|
|
case HELP_REOPEN:
|
|
|
- return (""); /* Undocumented command */
|
|
|
+ return (gettext("\treopen <pool>\n"));
|
|
|
case HELP_SCRUB:
|
|
|
return (gettext("\tscrub [-s] <pool> ...\n"));
|
|
|
case HELP_STATUS:
|
|
|
@@ -642,6 +648,127 @@ zpool_do_remove(int argc, char **argv)
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
+ * zpool labelclear <vdev>
|
|
|
+ *
|
|
|
+ * Verifies that the vdev is not active and zeros out the label information
|
|
|
+ * on the device.
|
|
|
+ */
|
|
|
+int
|
|
|
+zpool_do_labelclear(int argc, char **argv)
|
|
|
+{
|
|
|
+ char *vdev, *name;
|
|
|
+ int c, fd = -1, ret = 0;
|
|
|
+ pool_state_t state;
|
|
|
+ boolean_t inuse = B_FALSE;
|
|
|
+ boolean_t force = B_FALSE;
|
|
|
+
|
|
|
+ /* check options */
|
|
|
+ while ((c = getopt(argc, argv, "f")) != -1) {
|
|
|
+ switch (c) {
|
|
|
+ case 'f':
|
|
|
+ force = B_TRUE;
|
|
|
+ break;
|
|
|
+ default:
|
|
|
+ (void) fprintf(stderr, gettext("invalid option '%c'\n"),
|
|
|
+ optopt);
|
|
|
+ usage(B_FALSE);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ argc -= optind;
|
|
|
+ argv += optind;
|
|
|
+
|
|
|
+ /* get vdev name */
|
|
|
+ if (argc < 1) {
|
|
|
+ (void) fprintf(stderr, gettext("missing vdev device name\n"));
|
|
|
+ usage(B_FALSE);
|
|
|
+ }
|
|
|
+
|
|
|
+ vdev = argv[0];
|
|
|
+ if ((fd = open(vdev, O_RDWR)) < 0) {
|
|
|
+ (void) fprintf(stderr, gettext("Unable to open %s\n"), vdev);
|
|
|
+ return (B_FALSE);
|
|
|
+ }
|
|
|
+
|
|
|
+ name = NULL;
|
|
|
+ if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) != 0) {
|
|
|
+ if (force)
|
|
|
+ goto wipe_label;
|
|
|
+
|
|
|
+ (void) fprintf(stderr,
|
|
|
+ gettext("Unable to determine pool state for %s\n"
|
|
|
+ "Use -f to force the clearing any label data\n"), vdev);
|
|
|
+
|
|
|
+ return (1);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (inuse) {
|
|
|
+ switch (state) {
|
|
|
+ default:
|
|
|
+ case POOL_STATE_ACTIVE:
|
|
|
+ case POOL_STATE_SPARE:
|
|
|
+ case POOL_STATE_L2CACHE:
|
|
|
+ (void) fprintf(stderr,
|
|
|
+ gettext("labelclear operation failed.\n"
|
|
|
+ "\tVdev %s is a member (%s), of pool \"%s\".\n"
|
|
|
+ "\tTo remove label information from this device, "
|
|
|
+ "export or destroy\n\tthe pool, or remove %s from "
|
|
|
+ "the configuration of this pool\n\tand retry the "
|
|
|
+ "labelclear operation.\n"),
|
|
|
+ vdev, zpool_pool_state_to_name(state), name, vdev);
|
|
|
+ ret = 1;
|
|
|
+ goto errout;
|
|
|
+
|
|
|
+ case POOL_STATE_EXPORTED:
|
|
|
+ if (force)
|
|
|
+ break;
|
|
|
+
|
|
|
+ (void) fprintf(stderr,
|
|
|
+ gettext("labelclear operation failed.\n\tVdev "
|
|
|
+ "%s is a member of the exported pool \"%s\".\n"
|
|
|
+ "\tUse \"zpool labelclear -f %s\" to force the "
|
|
|
+ "removal of label\n\tinformation.\n"),
|
|
|
+ vdev, name, vdev);
|
|
|
+ ret = 1;
|
|
|
+ goto errout;
|
|
|
+
|
|
|
+ case POOL_STATE_POTENTIALLY_ACTIVE:
|
|
|
+ if (force)
|
|
|
+ break;
|
|
|
+
|
|
|
+ (void) fprintf(stderr,
|
|
|
+ gettext("labelclear operation failed.\n"
|
|
|
+ "\tVdev %s is a member of the pool \"%s\".\n"
|
|
|
+ "\tThis pool is unknown to this system, but may "
|
|
|
+ "be active on\n\tanother system. Use "
|
|
|
+ "\'zpool labelclear -f %s\' to force the\n"
|
|
|
+ "\tremoval of label information.\n"),
|
|
|
+ vdev, name, vdev);
|
|
|
+ ret = 1;
|
|
|
+ goto errout;
|
|
|
+
|
|
|
+ case POOL_STATE_DESTROYED:
|
|
|
+ /* inuse should never be set for a destroyed pool... */
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+wipe_label:
|
|
|
+ if (zpool_clear_label(fd) != 0) {
|
|
|
+ (void) fprintf(stderr,
|
|
|
+ gettext("Label clear failed on vdev %s\n"), vdev);
|
|
|
+ ret = 1;
|
|
|
+ }
|
|
|
+
|
|
|
+errout:
|
|
|
+ close(fd);
|
|
|
+ if (name != NULL)
|
|
|
+ free(name);
|
|
|
+
|
|
|
+ return (ret);
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
* zpool create [-fnd] [-o property=value] ...
|
|
|
* [-O file-system-property=value] ...
|
|
|
* [-R root] [-m mountpoint] <pool> <dev> ...
|
|
|
@@ -2932,7 +3059,7 @@ int
|
|
|
zpool_do_list(int argc, char **argv)
|
|
|
{
|
|
|
int c;
|
|
|
- int ret;
|
|
|
+ int ret = 0;
|
|
|
list_cbdata_t cb = { 0 };
|
|
|
static char default_props[] =
|
|
|
"name,size,allocated,free,capacity,dedupratio,"
|
|
|
@@ -3612,22 +3739,37 @@ zpool_do_reguid(int argc, char **argv)
|
|
|
* zpool reopen <pool>
|
|
|
*
|
|
|
* Reopen the pool so that the kernel can update the sizes of all vdevs.
|
|
|
- *
|
|
|
- * NOTE: This command is currently undocumented. If the command is ever
|
|
|
- * exposed then the appropriate usage() messages will need to be made.
|
|
|
*/
|
|
|
int
|
|
|
zpool_do_reopen(int argc, char **argv)
|
|
|
{
|
|
|
+ int c;
|
|
|
int ret = 0;
|
|
|
zpool_handle_t *zhp;
|
|
|
char *pool;
|
|
|
|
|
|
+ /* check options */
|
|
|
+ while ((c = getopt(argc, argv, "")) != -1) {
|
|
|
+ switch (c) {
|
|
|
+ case '?':
|
|
|
+ (void) fprintf(stderr, gettext("invalid option '%c'\n"),
|
|
|
+ optopt);
|
|
|
+ usage(B_FALSE);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
argc--;
|
|
|
argv++;
|
|
|
|
|
|
- if (argc != 1)
|
|
|
- return (2);
|
|
|
+ if (argc < 1) {
|
|
|
+ (void) fprintf(stderr, gettext("missing pool name\n"));
|
|
|
+ usage(B_FALSE);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (argc > 1) {
|
|
|
+ (void) fprintf(stderr, gettext("too many arguments\n"));
|
|
|
+ usage(B_FALSE);
|
|
|
+ }
|
|
|
|
|
|
pool = argv[0];
|
|
|
if ((zhp = zpool_open_canfail(g_zfs, pool)) == NULL)
|
|
|
@@ -5228,6 +5370,7 @@ zpool_do_events_next(ev_opts_t *opts)
|
|
|
zpool_do_events_nvprint(nvl, 8);
|
|
|
printf(gettext("\n"));
|
|
|
}
|
|
|
+ (void) fflush(stdout);
|
|
|
|
|
|
nvlist_free(nvl);
|
|
|
}
|
|
|
diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c
|
|
|
index bb495b6..93a5f1e 100644
|
|
|
--- a/cmd/ztest/ztest.c
|
|
|
+++ b/cmd/ztest/ztest.c
|
|
|
@@ -1039,7 +1039,7 @@ ztest_dsl_prop_set_uint64(char *osname, zfs_prop_t prop, uint64_t value,
|
|
|
ztest_record_enospc(FTAG);
|
|
|
return (error);
|
|
|
}
|
|
|
- ASSERT3U(error, ==, 0);
|
|
|
+ ASSERT0(error);
|
|
|
|
|
|
setpoint = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
|
|
|
VERIFY3U(dsl_prop_get(osname, propname, sizeof (curval),
|
|
|
@@ -1073,7 +1073,7 @@ ztest_spa_prop_set_uint64(zpool_prop_t prop, uint64_t value)
|
|
|
ztest_record_enospc(FTAG);
|
|
|
return (error);
|
|
|
}
|
|
|
- ASSERT3U(error, ==, 0);
|
|
|
+ ASSERT0(error);
|
|
|
|
|
|
return (error);
|
|
|
}
|
|
|
@@ -1776,7 +1776,7 @@ ztest_replay_setattr(ztest_ds_t *zd, lr_setattr_t *lr, boolean_t byteswap)
|
|
|
|
|
|
ASSERT3U(lr->lr_size, >=, sizeof (*bbt));
|
|
|
ASSERT3U(lr->lr_size, <=, db->db_size);
|
|
|
- VERIFY3U(dmu_set_bonus(db, lr->lr_size, tx), ==, 0);
|
|
|
+ VERIFY0(dmu_set_bonus(db, lr->lr_size, tx));
|
|
|
bbt = ztest_bt_bonus(db);
|
|
|
|
|
|
ztest_bt_generate(bbt, os, lr->lr_foid, -1ULL, lr->lr_mode, txg, crtxg);
|
|
|
@@ -2581,7 +2581,7 @@ ztest_vdev_aux_add_remove(ztest_ds_t *zd, uint64_t id)
|
|
|
zs->zs_vdev_aux = 0;
|
|
|
for (;;) {
|
|
|
int c;
|
|
|
- (void) snprintf(path, sizeof (path), ztest_aux_template,
|
|
|
+ (void) snprintf(path, MAXPATHLEN, ztest_aux_template,
|
|
|
ztest_opts.zo_dir, ztest_opts.zo_pool, aux,
|
|
|
zs->zs_vdev_aux);
|
|
|
for (c = 0; c < sav->sav_count; c++)
|
|
|
@@ -3200,7 +3200,7 @@ ztest_objset_destroy_cb(const char *name, void *arg)
|
|
|
error = dmu_object_info(os, ZTEST_DIROBJ, &doi);
|
|
|
if (error != ENOENT) {
|
|
|
/* We could have crashed in the middle of destroying it */
|
|
|
- ASSERT3U(error, ==, 0);
|
|
|
+ ASSERT0(error);
|
|
|
ASSERT3U(doi.doi_type, ==, DMU_OT_ZAP_OTHER);
|
|
|
ASSERT3S(doi.doi_physical_blocks_512, >=, 0);
|
|
|
}
|
|
|
@@ -3673,10 +3673,10 @@ ztest_dmu_read_write(ztest_ds_t *zd, uint64_t id)
|
|
|
*/
|
|
|
error = dmu_read(os, packobj, packoff, packsize, packbuf,
|
|
|
DMU_READ_PREFETCH);
|
|
|
- ASSERT3U(error, ==, 0);
|
|
|
+ ASSERT0(error);
|
|
|
error = dmu_read(os, bigobj, bigoff, bigsize, bigbuf,
|
|
|
DMU_READ_PREFETCH);
|
|
|
- ASSERT3U(error, ==, 0);
|
|
|
+ ASSERT0(error);
|
|
|
|
|
|
/*
|
|
|
* Get a tx for the mods to both packobj and bigobj.
|
|
|
@@ -3999,10 +3999,10 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id)
|
|
|
if (i != 0 || ztest_random(2) != 0) {
|
|
|
error = dmu_read(os, packobj, packoff,
|
|
|
packsize, packbuf, DMU_READ_PREFETCH);
|
|
|
- ASSERT3U(error, ==, 0);
|
|
|
+ ASSERT0(error);
|
|
|
error = dmu_read(os, bigobj, bigoff, bigsize,
|
|
|
bigbuf, DMU_READ_PREFETCH);
|
|
|
- ASSERT3U(error, ==, 0);
|
|
|
+ ASSERT0(error);
|
|
|
}
|
|
|
compare_and_update_pbbufs(s, packbuf, bigbuf, bigsize,
|
|
|
n, chunksize, txg);
|
|
|
@@ -4287,7 +4287,7 @@ ztest_zap(ztest_ds_t *zd, uint64_t id)
|
|
|
if (error == ENOENT)
|
|
|
goto out;
|
|
|
|
|
|
- ASSERT3U(error, ==, 0);
|
|
|
+ ASSERT0(error);
|
|
|
|
|
|
tx = dmu_tx_create(os);
|
|
|
dmu_tx_hold_zap(tx, object, B_TRUE, NULL);
|
|
|
@@ -4494,7 +4494,7 @@ ztest_commit_callback(void *arg, int error)
|
|
|
data->zcd_called = B_TRUE;
|
|
|
|
|
|
if (error == ECANCELED) {
|
|
|
- ASSERT3U(data->zcd_txg, ==, 0);
|
|
|
+ ASSERT0(data->zcd_txg);
|
|
|
ASSERT(!data->zcd_added);
|
|
|
|
|
|
/*
|
|
|
@@ -4704,7 +4704,7 @@ ztest_spa_prop_get_set(ztest_ds_t *zd, uint64_t id)
|
|
|
(void) ztest_spa_prop_set_uint64(ZPOOL_PROP_DEDUPDITTO,
|
|
|
ZIO_DEDUPDITTO_MIN + ztest_random(ZIO_DEDUPDITTO_MIN));
|
|
|
|
|
|
- VERIFY3U(spa_prop_get(ztest_spa, &props), ==, 0);
|
|
|
+ VERIFY0(spa_prop_get(ztest_spa, &props));
|
|
|
|
|
|
if (ztest_opts.zo_verbose >= 6)
|
|
|
dump_nvlist(props, 4);
|
|
|
@@ -5557,7 +5557,7 @@ ztest_dataset_open(int d)
|
|
|
}
|
|
|
ASSERT(error == 0 || error == EEXIST);
|
|
|
|
|
|
- VERIFY3U(dmu_objset_hold(name, zd, &os), ==, 0);
|
|
|
+ VERIFY0(dmu_objset_hold(name, zd, &os));
|
|
|
(void) rw_exit(&ztest_name_lock);
|
|
|
|
|
|
ztest_zd_init(zd, ZTEST_GET_SHARED_DS(d), os);
|
|
|
@@ -5757,7 +5757,7 @@ ztest_run(ztest_shared_t *zs)
|
|
|
|
|
|
/* Verify that at least one commit cb was called in a timely fashion */
|
|
|
if (zc_cb_counter >= ZTEST_COMMIT_CB_MIN_REG)
|
|
|
- VERIFY3U(zc_min_txg_delay, ==, 0);
|
|
|
+ VERIFY0(zc_min_txg_delay);
|
|
|
|
|
|
spa_close(spa, FTAG);
|
|
|
|
|
|
diff --git a/config/Rules.am b/config/Rules.am
|
|
|
index bd4f0ea..e3fa5b5 100644
|
|
|
--- a/config/Rules.am
|
|
|
+++ b/config/Rules.am
|
|
|
@@ -3,6 +3,6 @@ DEFAULT_INCLUDES = -include ${top_builddir}/zfs_config.h
|
|
|
AM_LIBTOOLFLAGS = --silent
|
|
|
AM_CFLAGS = -Wall -Wstrict-prototypes
|
|
|
AM_CFLAGS += -fno-strict-aliasing ${NO_UNUSED_BUT_SET_VARIABLE} ${DEBUG_CFLAGS}
|
|
|
-AM_CFLAGS += -D_GNU_SOURCE -D__EXTENSIONS__ -D_REENTRANT
|
|
|
-AM_CFLAGS += -D_POSIX_PTHREAD_SEMANTICS -D_FILE_OFFSET_BITS=64
|
|
|
-AM_CFLAGS += -D_LARGEFILE64_SOURCE -DTEXT_DOMAIN=\"zfs-linux-user\"
|
|
|
+AM_CPPFLAGS = -D_GNU_SOURCE -D__EXTENSIONS__ -D_REENTRANT
|
|
|
+AM_CPPFLAGS += -D_POSIX_PTHREAD_SEMANTICS -D_FILE_OFFSET_BITS=64
|
|
|
+AM_CPPFLAGS += -D_LARGEFILE64_SOURCE -DTEXT_DOMAIN=\"zfs-linux-user\"
|
|
|
diff --git a/config/deb.am b/config/deb.am
|
|
|
index 078dd0c..78b01f0 100644
|
|
|
--- a/config/deb.am
|
|
|
+++ b/config/deb.am
|
|
|
@@ -20,7 +20,7 @@ if CONFIG_KERNEL
|
|
|
version=${VERSION}-${RELEASE}; \
|
|
|
arch=`$(RPM) -qp $${name}-kmod-$${version}.src.rpm --qf %{arch} | tail -1`; \
|
|
|
pkg1=kmod-$${name}*$${version}.$${arch}.rpm; \
|
|
|
- fakeroot $(ALIEN) --scripts --to-deb $$pkg1; \
|
|
|
+ fakeroot $(ALIEN) --bump=0 --scripts --to-deb $$pkg1; \
|
|
|
$(RM) $$pkg1
|
|
|
endif
|
|
|
|
|
|
@@ -33,7 +33,7 @@ if CONFIG_USER
|
|
|
pkg2=$${name}-devel-$${version}.$${arch}.rpm; \
|
|
|
pkg3=$${name}-test-$${version}.$${arch}.rpm; \
|
|
|
pkg4=$${name}-dracut-$${version}.$${arch}.rpm; \
|
|
|
- fakeroot $(ALIEN) --scripts --to-deb $$pkg1 $$pkg2 $$pkg3 $$pkg4; \
|
|
|
+ fakeroot $(ALIEN) --bump=0 --scripts --to-deb $$pkg1 $$pkg2 $$pkg3 $$pkg4; \
|
|
|
$(RM) $$pkg1 $$pkg2 $$pkg3 $$pkg4
|
|
|
endif
|
|
|
|
|
|
diff --git a/config/kernel-bdev-block-device-operations.m4 b/config/kernel-bdev-block-device-operations.m4
|
|
|
index 8b5e0a3..faacc19 100644
|
|
|
--- a/config/kernel-bdev-block-device-operations.m4
|
|
|
+++ b/config/kernel-bdev-block-device-operations.m4
|
|
|
@@ -10,7 +10,6 @@ AC_DEFUN([ZFS_AC_KERNEL_BDEV_BLOCK_DEVICE_OPERATIONS], [
|
|
|
|
|
|
int blk_open(struct block_device *bdev, fmode_t mode)
|
|
|
{ return 0; }
|
|
|
- int blk_release(struct gendisk *g, fmode_t mode) { return 0; }
|
|
|
int blk_ioctl(struct block_device *bdev, fmode_t mode,
|
|
|
unsigned x, unsigned long y) { return 0; }
|
|
|
int blk_compat_ioctl(struct block_device * bdev, fmode_t mode,
|
|
|
@@ -19,7 +18,7 @@ AC_DEFUN([ZFS_AC_KERNEL_BDEV_BLOCK_DEVICE_OPERATIONS], [
|
|
|
static const struct block_device_operations
|
|
|
bops __attribute__ ((unused)) = {
|
|
|
.open = blk_open,
|
|
|
- .release = blk_release,
|
|
|
+ .release = NULL,
|
|
|
.ioctl = blk_ioctl,
|
|
|
.compat_ioctl = blk_compat_ioctl,
|
|
|
};
|
|
|
diff --git a/config/kernel-block-device-operations-release-void.m4 b/config/kernel-block-device-operations-release-void.m4
|
|
|
new file mode 100644
|
|
|
index 0000000..a73f858
|
|
|
--- /dev/null
|
|
|
+++ b/config/kernel-block-device-operations-release-void.m4
|
|
|
@@ -0,0 +1,29 @@
|
|
|
+dnl #
|
|
|
+dnl # 3.10.x API change
|
|
|
+dnl #
|
|
|
+AC_DEFUN([ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID], [
|
|
|
+ AC_MSG_CHECKING([whether block_device_operations.release is void])
|
|
|
+ tmp_flags="$EXTRA_KCFLAGS"
|
|
|
+ EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}"
|
|
|
+ ZFS_LINUX_TRY_COMPILE([
|
|
|
+ #include <linux/blkdev.h>
|
|
|
+
|
|
|
+ void blk_release(struct gendisk *g, fmode_t mode) { return; }
|
|
|
+
|
|
|
+ static const struct block_device_operations
|
|
|
+ bops __attribute__ ((unused)) = {
|
|
|
+ .open = NULL,
|
|
|
+ .release = blk_release,
|
|
|
+ .ioctl = NULL,
|
|
|
+ .compat_ioctl = NULL,
|
|
|
+ };
|
|
|
+ ],[
|
|
|
+ ],[
|
|
|
+ AC_MSG_RESULT(void)
|
|
|
+ AC_DEFINE(HAVE_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID, 1,
|
|
|
+ [struct block_device_operations.release returns void])
|
|
|
+ ],[
|
|
|
+ AC_MSG_RESULT(int)
|
|
|
+ ])
|
|
|
+ EXTRA_KCFLAGS="$tmp_flags"
|
|
|
+])
|
|
|
diff --git a/config/kernel-lseek-execute.m4 b/config/kernel-lseek-execute.m4
|
|
|
new file mode 100644
|
|
|
index 0000000..8c4032b
|
|
|
--- /dev/null
|
|
|
+++ b/config/kernel-lseek-execute.m4
|
|
|
@@ -0,0 +1,23 @@
|
|
|
+dnl #
|
|
|
+dnl # 3.11 API change
|
|
|
+dnl # lseek_execute helper exported
|
|
|
+dnl #
|
|
|
+AC_DEFUN([ZFS_AC_KERNEL_LSEEK_EXECUTE],
|
|
|
+ [AC_MSG_CHECKING([whether lseek_execute() is available])
|
|
|
+ ZFS_LINUX_TRY_COMPILE_SYMBOL([
|
|
|
+ #include <linux/fs.h>
|
|
|
+ ], [
|
|
|
+ struct file *fp __attribute__ ((unused)) = NULL;
|
|
|
+ struct inode *ip __attribute__ ((unused)) = NULL;
|
|
|
+ loff_t offset __attribute__ ((unused)) = 0;
|
|
|
+ loff_t maxsize __attribute__ ((unused)) = 0;
|
|
|
+
|
|
|
+ lseek_execute(fp, ip, offset, maxsize);
|
|
|
+ ], [lseek_exclusive], [fs/read_write.c], [
|
|
|
+ AC_MSG_RESULT(yes)
|
|
|
+ AC_DEFINE(HAVE_LSEEK_EXECUTE, 1,
|
|
|
+ [lseek_execute() is available])
|
|
|
+ ], [
|
|
|
+ AC_MSG_RESULT(no)
|
|
|
+ ])
|
|
|
+])
|
|
|
diff --git a/config/kernel-shrink.m4 b/config/kernel-shrink.m4
|
|
|
index 479fb3a..1c211ed 100644
|
|
|
--- a/config/kernel-shrink.m4
|
|
|
+++ b/config/kernel-shrink.m4
|
|
|
@@ -28,6 +28,45 @@ AC_DEFUN([ZFS_AC_KERNEL_SHRINK], [
|
|
|
])
|
|
|
])
|
|
|
|
|
|
+dnl #
|
|
|
+dnl # 3.3 API change
|
|
|
+dnl # The super_block structure was changed to use an hlist_node instead
|
|
|
+dnl # of a list_head for the .s_instance linkage.
|
|
|
+dnl #
|
|
|
+dnl # This was done in part to resolve a race in the iterate_supers_type()
|
|
|
+dnl # function which was introduced in Linux 3.0 kernel. The iterator
|
|
|
+dnl # was supposed to provide a safe way to call an arbitrary function on
|
|
|
+dnl # all super blocks of a specific type. Unfortunately, because a
|
|
|
+dnl # list_head was used it was possible for iterate_supers_type() to
|
|
|
+dnl # get stuck spinning a super block which was just deactivated.
|
|
|
+dnl #
|
|
|
+dnl # This can occur because when the list head is removed from the
|
|
|
+dnl # fs_supers list it is reinitialized to point to itself. If the
|
|
|
+dnl # iterate_supers_type() function happened to be processing the
|
|
|
+dnl # removed list_head it will get stuck spinning on that list_head.
|
|
|
+dnl #
|
|
|
+dnl # To resolve the issue for existing 3.0 - 3.2 kernels we detect when
|
|
|
+dnl # a list_head is used. Then to prevent the spinning from occurring
|
|
|
+dnl # the .next pointer is set to the fs_supers list_head which ensures
|
|
|
+dnl # the iterate_supers_type() function will always terminate.
|
|
|
+dnl #
|
|
|
+AC_DEFUN([ZFS_AC_KERNEL_S_INSTANCES_LIST_HEAD], [
|
|
|
+ AC_MSG_CHECKING([whether super_block has s_instances list_head])
|
|
|
+ ZFS_LINUX_TRY_COMPILE([
|
|
|
+ #include <linux/fs.h>
|
|
|
+ ],[
|
|
|
+ struct super_block sb __attribute__ ((unused));
|
|
|
+
|
|
|
+ INIT_LIST_HEAD(&sb.s_instances);
|
|
|
+ ],[
|
|
|
+ AC_MSG_RESULT(yes)
|
|
|
+ AC_DEFINE(HAVE_S_INSTANCES_LIST_HEAD, 1,
|
|
|
+ [struct super_block has s_instances list_head])
|
|
|
+ ],[
|
|
|
+ AC_MSG_RESULT(no)
|
|
|
+ ])
|
|
|
+])
|
|
|
+
|
|
|
AC_DEFUN([ZFS_AC_KERNEL_NR_CACHED_OBJECTS], [
|
|
|
AC_MSG_CHECKING([whether sops->nr_cached_objects() exists])
|
|
|
ZFS_LINUX_TRY_COMPILE([
|
|
|
diff --git a/config/kernel.m4 b/config/kernel.m4
|
|
|
index 88590be..8742bc5 100644
|
|
|
--- a/config/kernel.m4
|
|
|
+++ b/config/kernel.m4
|
|
|
@@ -7,6 +7,7 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
|
|
|
ZFS_AC_TEST_MODULE
|
|
|
ZFS_AC_KERNEL_CONFIG
|
|
|
ZFS_AC_KERNEL_BDEV_BLOCK_DEVICE_OPERATIONS
|
|
|
+ ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID
|
|
|
ZFS_AC_KERNEL_TYPE_FMODE_T
|
|
|
ZFS_AC_KERNEL_KOBJ_NAME_LEN
|
|
|
ZFS_AC_KERNEL_3ARG_BLKDEV_GET
|
|
|
@@ -71,12 +72,14 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
|
|
|
ZFS_AC_KERNEL_CALLBACK_SECURITY_INODE_INIT_SECURITY
|
|
|
ZFS_AC_KERNEL_MOUNT_NODEV
|
|
|
ZFS_AC_KERNEL_SHRINK
|
|
|
+ ZFS_AC_KERNEL_S_INSTANCES_LIST_HEAD
|
|
|
ZFS_AC_KERNEL_S_D_OP
|
|
|
ZFS_AC_KERNEL_BDI
|
|
|
ZFS_AC_KERNEL_BDI_SETUP_AND_REGISTER
|
|
|
ZFS_AC_KERNEL_SET_NLINK
|
|
|
ZFS_AC_KERNEL_ELEVATOR_CHANGE
|
|
|
ZFS_AC_KERNEL_5ARG_SGET
|
|
|
+ ZFS_AC_KERNEL_LSEEK_EXECUTE
|
|
|
|
|
|
AS_IF([test "$LINUX_OBJ" != "$LINUX"], [
|
|
|
KERNELMAKE_PARAMS="$KERNELMAKE_PARAMS O=$LINUX_OBJ"
|
|
|
@@ -109,7 +112,7 @@ AC_DEFUN([ZFS_AC_MODULE_SYMVERS], [
|
|
|
AS_IF([test ! -f "$LINUX_OBJ/$LINUX_SYMBOLS"], [
|
|
|
AC_MSG_ERROR([
|
|
|
*** Please make sure the kernel devel package for your distribution
|
|
|
- *** is installed. If your building with a custom kernel make sure the
|
|
|
+ *** is installed. If you are building with a custom kernel, make sure the
|
|
|
*** kernel is configured, built, and the '--with-linux=PATH' configure
|
|
|
*** option refers to the location of the kernel source.])
|
|
|
])
|
|
|
@@ -163,7 +166,7 @@ AC_DEFUN([ZFS_AC_KERNEL], [
|
|
|
AS_IF([test ! -d "$kernelsrc"], [
|
|
|
AC_MSG_ERROR([
|
|
|
*** Please make sure the kernel devel package for your distribution
|
|
|
- *** is installed then try again. If that fails you can specify the
|
|
|
+ *** is installed and then try again. If that fails, you can specify the
|
|
|
*** location of the kernel source with the '--with-linux=PATH' option.])
|
|
|
])
|
|
|
|
|
|
@@ -229,54 +232,6 @@ AC_DEFUN([ZFS_AC_KERNEL], [
|
|
|
ZFS_AC_MODULE_SYMVERS
|
|
|
])
|
|
|
|
|
|
-dnl #
|
|
|
-dnl # Detect name used for the additional SPL Module.symvers file. If one
|
|
|
-dnl # does not exist this is likely because the SPL has been configured
|
|
|
-dnl # but not built. The '--with-spl-timeout' option can be passed
|
|
|
-dnl # to pause here, waiting for the file to appear from a concurrently
|
|
|
-dnl # building SPL package. If the file does not appear in time, a good
|
|
|
-dnl # guess is made as to what this file will be named based on what it
|
|
|
-dnl # is named in the kernel build products. This file will first be
|
|
|
-dnl # used at link time so if the guess is wrong the build will fail
|
|
|
-dnl # then. This unfortunately means the ZFS package does not contain a
|
|
|
-dnl # reliable mechanism to detect symbols exported by the SPL at
|
|
|
-dnl # configure time.
|
|
|
-dnl #
|
|
|
-AC_DEFUN([ZFS_AC_SPL_MODULE_SYMVERS], [
|
|
|
- AC_ARG_WITH([spl-timeout],
|
|
|
- AS_HELP_STRING([--with-spl-timeout=SECS],
|
|
|
- [Wait SECS for symvers file to appear @<:@default=0@:>@]),
|
|
|
- [timeout="$withval"], [timeout=0])
|
|
|
-
|
|
|
- AC_MSG_CHECKING([spl file name for module symbols])
|
|
|
- SPL_SYMBOLS=NONE
|
|
|
-
|
|
|
- while true; do
|
|
|
- AS_IF([test -r $SPL_OBJ/Module.symvers], [
|
|
|
- SPL_SYMBOLS=Module.symvers
|
|
|
- ], [test -r $SPL_OBJ/Modules.symvers], [
|
|
|
- SPL_SYMBOLS=Modules.symvers
|
|
|
- ], [test -r $SPL_OBJ/module/Module.symvers], [
|
|
|
- SPL_SYMBOLS=Module.symvers
|
|
|
- ], [test -r $SPL_OBJ/module/Modules.symvers], [
|
|
|
- SPL_SYMBOLS=Modules.symvers
|
|
|
- ])
|
|
|
-
|
|
|
- AS_IF([test $SPL_SYMBOLS != NONE -o $timeout -le 0], [
|
|
|
- break;
|
|
|
- ], [
|
|
|
- sleep 1
|
|
|
- timeout=$((timeout-1))
|
|
|
- ])
|
|
|
- done
|
|
|
-
|
|
|
- AS_IF([test "$SPL_SYMBOLS" = NONE], [
|
|
|
- SPL_SYMBOLS=$LINUX_SYMBOLS
|
|
|
- ])
|
|
|
-
|
|
|
- AC_MSG_RESULT([$SPL_SYMBOLS])
|
|
|
- AC_SUBST(SPL_SYMBOLS)
|
|
|
-])
|
|
|
|
|
|
dnl #
|
|
|
dnl # Detect the SPL module to be built against
|
|
|
@@ -292,6 +247,11 @@ AC_DEFUN([ZFS_AC_SPL], [
|
|
|
[Path to spl build objects]),
|
|
|
[splbuild="$withval"])
|
|
|
|
|
|
+ AC_ARG_WITH([spl-timeout],
|
|
|
+ AS_HELP_STRING([--with-spl-timeout=SECS],
|
|
|
+ [Wait SECS for SPL header and symver file @<:@default=0@:>@]),
|
|
|
+ [timeout="$withval"], [timeout=0])
|
|
|
+
|
|
|
dnl #
|
|
|
dnl # The existence of spl.release.in is used to identify a valid
|
|
|
dnl # source directory. In order of preference:
|
|
|
@@ -338,16 +298,30 @@ AC_DEFUN([ZFS_AC_SPL], [
|
|
|
dnl # directory are the same, however the objects may also reside
|
|
|
dnl # is a subdirectory named after the kernel version.
|
|
|
dnl #
|
|
|
+ dnl # This file is supposed to be available after DKMS finishes
|
|
|
+ dnl # building the SPL kernel module for the target kernel. The
|
|
|
+ dnl # '--with-spl-timeout' option can be passed to pause here,
|
|
|
+ dnl # waiting for the file to appear from a concurrently building
|
|
|
+ dnl # SPL package.
|
|
|
+ dnl #
|
|
|
AC_MSG_CHECKING([spl build directory])
|
|
|
- AS_IF([test -z "$splbuild"], [
|
|
|
- AS_IF([ test -e "${splsrc}/${LINUX_VERSION}/spl_config.h" ], [
|
|
|
- splbuild="${splsrc}/${LINUX_VERSION}"
|
|
|
- ], [ test -e "${splsrc}/spl_config.h" ], [
|
|
|
- splbuild="${splsrc}"
|
|
|
+ while true; do
|
|
|
+ AS_IF([test -z "$splbuild"], [
|
|
|
+ AS_IF([ test -e "${splsrc}/${LINUX_VERSION}/spl_config.h" ], [
|
|
|
+ splbuild="${splsrc}/${LINUX_VERSION}"
|
|
|
+ ], [ test -e "${splsrc}/spl_config.h" ], [
|
|
|
+ splbuild="${splsrc}"
|
|
|
+ ], [
|
|
|
+ splbuild="[Not found]"
|
|
|
+ ])
|
|
|
+ ])
|
|
|
+ AS_IF([test -e "$splbuild/spl_config.h" -o $timeout -le 0], [
|
|
|
+ break;
|
|
|
], [
|
|
|
- splbuild="[Not found]"
|
|
|
+ sleep 1
|
|
|
+ timeout=$((timeout-1))
|
|
|
])
|
|
|
- ])
|
|
|
+ done
|
|
|
|
|
|
AC_MSG_RESULT([$splbuild])
|
|
|
AS_IF([ ! test -e "$splbuild/spl_config.h"], [
|
|
|
@@ -385,7 +359,47 @@ AC_DEFUN([ZFS_AC_SPL], [
|
|
|
AC_SUBST(SPL_OBJ)
|
|
|
AC_SUBST(SPL_VERSION)
|
|
|
|
|
|
- ZFS_AC_SPL_MODULE_SYMVERS
|
|
|
+ dnl #
|
|
|
+ dnl # Detect the name used for the SPL Module.symvers file. If one
|
|
|
+ dnl # does not exist this is likely because the SPL has been configured
|
|
|
+ dnl # but not built. The '--with-spl-timeout' option can be passed
|
|
|
+ dnl # to pause here, waiting for the file to appear from a concurrently
|
|
|
+ dnl # building SPL package. If the file does not appear in time, a good
|
|
|
+ dnl # guess is made as to what this file will be named based on what it
|
|
|
+ dnl # is named in the kernel build products. This file will first be
|
|
|
+ dnl # used at link time so if the guess is wrong the build will fail
|
|
|
+ dnl # then. This unfortunately means the ZFS package does not contain a
|
|
|
+ dnl # reliable mechanism to detect symbols exported by the SPL at
|
|
|
+ dnl # configure time.
|
|
|
+ dnl #
|
|
|
+ AC_MSG_CHECKING([spl file name for module symbols])
|
|
|
+ SPL_SYMBOLS=NONE
|
|
|
+
|
|
|
+ while true; do
|
|
|
+ AS_IF([test -r $SPL_OBJ/Module.symvers], [
|
|
|
+ SPL_SYMBOLS=Module.symvers
|
|
|
+ ], [test -r $SPL_OBJ/Modules.symvers], [
|
|
|
+ SPL_SYMBOLS=Modules.symvers
|
|
|
+ ], [test -r $SPL_OBJ/module/Module.symvers], [
|
|
|
+ SPL_SYMBOLS=Module.symvers
|
|
|
+ ], [test -r $SPL_OBJ/module/Modules.symvers], [
|
|
|
+ SPL_SYMBOLS=Modules.symvers
|
|
|
+ ])
|
|
|
+
|
|
|
+ AS_IF([test $SPL_SYMBOLS != NONE -o $timeout -le 0], [
|
|
|
+ break;
|
|
|
+ ], [
|
|
|
+ sleep 1
|
|
|
+ timeout=$((timeout-1))
|
|
|
+ ])
|
|
|
+ done
|
|
|
+
|
|
|
+ AS_IF([test "$SPL_SYMBOLS" = NONE], [
|
|
|
+ SPL_SYMBOLS=$LINUX_SYMBOLS
|
|
|
+ ])
|
|
|
+
|
|
|
+ AC_MSG_RESULT([$SPL_SYMBOLS])
|
|
|
+ AC_SUBST(SPL_SYMBOLS)
|
|
|
])
|
|
|
|
|
|
dnl #
|
|
|
diff --git a/config/zfs-build.m4 b/config/zfs-build.m4
|
|
|
index cccd87f..005185b 100644
|
|
|
--- a/config/zfs-build.m4
|
|
|
+++ b/config/zfs-build.m4
|
|
|
@@ -138,7 +138,7 @@ AC_DEFUN([ZFS_AC_RPM], [
|
|
|
AC_MSG_RESULT([$HAVE_RPMBUILD])
|
|
|
])
|
|
|
|
|
|
- RPM_DEFINE_COMMON=
|
|
|
+ RPM_DEFINE_COMMON='--define "$(DEBUG_ZFS) 1" --define "$(DEBUG_DMU_TX) 1"'
|
|
|
RPM_DEFINE_UTIL=
|
|
|
RPM_DEFINE_KMOD='--define "kernels $(LINUX_VERSION)"'
|
|
|
RPM_DEFINE_DKMS=
|
|
|
diff --git a/config/zfs-meta.m4 b/config/zfs-meta.m4
|
|
|
index 2deda71..20a61cf 100644
|
|
|
--- a/config/zfs-meta.m4
|
|
|
+++ b/config/zfs-meta.m4
|
|
|
@@ -1,14 +1,35 @@
|
|
|
-###############################################################################
|
|
|
-# Written by Chris Dunlap <cdunlap@llnl.gov>.
|
|
|
-# Modified by Brian Behlendorf <behlendorf1@llnl.gov>.
|
|
|
-###############################################################################
|
|
|
-# ZFS_AC_META: Read metadata from the META file. When building from a
|
|
|
-# git repository the ZFS_META_RELEASE field will be overwritten if there
|
|
|
-# is an annotated tag matching the form ZFS_META_NAME-ZFS_META_VERSION-*.
|
|
|
-# This allows for working builds to be uniquely identified using the git
|
|
|
-# commit hash.
|
|
|
-###############################################################################
|
|
|
-
|
|
|
+dnl #
|
|
|
+dnl # DESCRIPTION:
|
|
|
+dnl # Read meta data from the META file. When building from a git repository
|
|
|
+dnl # the ZFS_META_RELEASE field will be overwritten if there is an annotated
|
|
|
+dnl # tag matching the form ZFS_META_NAME-ZFS_META_VERSION-*. This allows
|
|
|
+dnl # for working builds to be uniquely identified using the git commit hash.
|
|
|
+dnl #
|
|
|
+dnl # The META file format is as follows:
|
|
|
+dnl # ^[ ]*KEY:[ \t]+VALUE$
|
|
|
+dnl #
|
|
|
+dnl # In other words:
|
|
|
+dnl # - KEY is separated from VALUE by a colon and one or more spaces/tabs.
|
|
|
+dnl # - KEY and VALUE are case sensitive.
|
|
|
+dnl # - Leading spaces are ignored.
|
|
|
+dnl # - First match wins for duplicate keys.
|
|
|
+dnl #
|
|
|
+dnl # A line can be commented out by preceding it with a '#' (or technically
|
|
|
+dnl # any non-space character since that will prevent the regex from
|
|
|
+dnl # matching).
|
|
|
+dnl #
|
|
|
+dnl # WARNING:
|
|
|
+dnl # Placing a colon followed by a space or tab (ie, ":[ \t]+") within the
|
|
|
+dnl # VALUE will prematurely terminate the string since that sequence is
|
|
|
+dnl # used as the awk field separator.
|
|
|
+dnl #
|
|
|
+dnl # KEYS:
|
|
|
+dnl # The following META keys are recognized:
|
|
|
+dnl # Name, Version, Release, Date, Author, LT_Current, LT_Revision, LT_Age
|
|
|
+dnl #
|
|
|
+dnl # Written by Chris Dunlap <cdunlap@llnl.gov>.
|
|
|
+dnl # Modified by Brian Behlendorf <behlendorf1@llnl.gov>.
|
|
|
+dnl #
|
|
|
AC_DEFUN([ZFS_AC_META], [
|
|
|
|
|
|
AH_BOTTOM([
|
|
|
@@ -21,6 +42,7 @@ AC_DEFUN([ZFS_AC_META], [
|
|
|
#undef STDC_HEADERS
|
|
|
#undef VERSION])
|
|
|
|
|
|
+ AC_PROG_AWK
|
|
|
AC_MSG_CHECKING([metadata])
|
|
|
|
|
|
META="$srcdir/META"
|
|
|
@@ -28,7 +50,7 @@ AC_DEFUN([ZFS_AC_META], [
|
|
|
if test -f "$META"; then
|
|
|
_zfs_ac_meta_type="META file"
|
|
|
|
|
|
- ZFS_META_NAME=_ZFS_AC_META_GETVAL([(?:NAME|PROJECT|PACKAGE)]);
|
|
|
+ ZFS_META_NAME=_ZFS_AC_META_GETVAL([(Name|Project|Package)]);
|
|
|
if test -n "$ZFS_META_NAME"; then
|
|
|
AC_DEFINE_UNQUOTED([ZFS_META_NAME], ["$ZFS_META_NAME"],
|
|
|
[Define the project name.]
|
|
|
@@ -36,7 +58,7 @@ AC_DEFUN([ZFS_AC_META], [
|
|
|
AC_SUBST([ZFS_META_NAME])
|
|
|
fi
|
|
|
|
|
|
- ZFS_META_VERSION=_ZFS_AC_META_GETVAL([VERSION]);
|
|
|
+ ZFS_META_VERSION=_ZFS_AC_META_GETVAL([Version]);
|
|
|
if test -n "$ZFS_META_VERSION"; then
|
|
|
AC_DEFINE_UNQUOTED([ZFS_META_VERSION], ["$ZFS_META_VERSION"],
|
|
|
[Define the project version.]
|
|
|
@@ -44,8 +66,8 @@ AC_DEFUN([ZFS_AC_META], [
|
|
|
AC_SUBST([ZFS_META_VERSION])
|
|
|
fi
|
|
|
|
|
|
- ZFS_META_RELEASE=_ZFS_AC_META_GETVAL([RELEASE]);
|
|
|
- if git rev-parse --git-dir > /dev/null 2>&1; then
|
|
|
+ ZFS_META_RELEASE=_ZFS_AC_META_GETVAL([Release]);
|
|
|
+ if test ! -f ".nogitrelease" && git rev-parse --git-dir > /dev/null 2>&1; then
|
|
|
_match="${ZFS_META_NAME}-${ZFS_META_VERSION}*"
|
|
|
_alias=$(git describe --match=${_match} 2>/dev/null)
|
|
|
_release=$(echo ${_alias}|cut -f3- -d'-'|sed 's/-/_/g')
|
|
|
@@ -65,7 +87,7 @@ AC_DEFUN([ZFS_AC_META], [
|
|
|
AC_SUBST([RELEASE])
|
|
|
fi
|
|
|
|
|
|
- ZFS_META_LICENSE=_ZFS_AC_META_GETVAL([LICENSE]);
|
|
|
+ ZFS_META_LICENSE=_ZFS_AC_META_GETVAL([License]);
|
|
|
if test -n "$ZFS_META_LICENSE"; then
|
|
|
AC_DEFINE_UNQUOTED([ZFS_META_LICENSE], ["$ZFS_META_LICENSE"],
|
|
|
[Define the project license.]
|
|
|
@@ -84,7 +106,7 @@ AC_DEFUN([ZFS_AC_META], [
|
|
|
AC_SUBST([ZFS_META_ALIAS])
|
|
|
fi
|
|
|
|
|
|
- ZFS_META_DATA=_ZFS_AC_META_GETVAL([DATE]);
|
|
|
+ ZFS_META_DATA=_ZFS_AC_META_GETVAL([Date]);
|
|
|
if test -n "$ZFS_META_DATA"; then
|
|
|
AC_DEFINE_UNQUOTED([ZFS_META_DATA], ["$ZFS_META_DATA"],
|
|
|
[Define the project release date.]
|
|
|
@@ -92,7 +114,7 @@ AC_DEFUN([ZFS_AC_META], [
|
|
|
AC_SUBST([ZFS_META_DATA])
|
|
|
fi
|
|
|
|
|
|
- ZFS_META_AUTHOR=_ZFS_AC_META_GETVAL([AUTHOR]);
|
|
|
+ ZFS_META_AUTHOR=_ZFS_AC_META_GETVAL([Author]);
|
|
|
if test -n "$ZFS_META_AUTHOR"; then
|
|
|
AC_DEFINE_UNQUOTED([ZFS_META_AUTHOR], ["$ZFS_META_AUTHOR"],
|
|
|
[Define the project author.]
|
|
|
@@ -101,9 +123,9 @@ AC_DEFUN([ZFS_AC_META], [
|
|
|
fi
|
|
|
|
|
|
m4_pattern_allow([^LT_(CURRENT|REVISION|AGE)$])
|
|
|
- ZFS_META_LT_CURRENT=_ZFS_AC_META_GETVAL([LT_CURRENT]);
|
|
|
- ZFS_META_LT_REVISION=_ZFS_AC_META_GETVAL([LT_REVISION]);
|
|
|
- ZFS_META_LT_AGE=_ZFS_AC_META_GETVAL([LT_AGE]);
|
|
|
+ ZFS_META_LT_CURRENT=_ZFS_AC_META_GETVAL([LT_Current]);
|
|
|
+ ZFS_META_LT_REVISION=_ZFS_AC_META_GETVAL([LT_Revision]);
|
|
|
+ ZFS_META_LT_AGE=_ZFS_AC_META_GETVAL([LT_Age]);
|
|
|
if test -n "$ZFS_META_LT_CURRENT" \
|
|
|
-o -n "$ZFS_META_LT_REVISION" \
|
|
|
-o -n "$ZFS_META_LT_AGE"; then
|
|
|
@@ -134,15 +156,18 @@ AC_DEFUN([ZFS_AC_META], [
|
|
|
]
|
|
|
)
|
|
|
|
|
|
-AC_DEFUN([_ZFS_AC_META_GETVAL],
|
|
|
- [`perl -n\
|
|
|
- -e "BEGIN { \\$key=shift @ARGV; }"\
|
|
|
- -e "next unless s/^\s*\\$key@<:@:=@:>@//i;"\
|
|
|
- -e "s/^((?:@<:@^'\"#@:>@*(?:(@<:@'\"@:>@)@<:@^\2@:>@*\2)*)*)#.*/\\@S|@1/;"\
|
|
|
- -e "s/^\s+//;"\
|
|
|
- -e "s/\s+$//;"\
|
|
|
- -e "s/^(@<:@'\"@:>@)(.*)\1/\\@S|@2/;"\
|
|
|
- -e "\\$val=\\$_;"\
|
|
|
- -e "END { print \\$val if defined \\$val; }"\
|
|
|
- '$1' $META`]dnl
|
|
|
+dnl # _ZFS_AC_META_GETVAL (KEY_NAME_OR_REGEX)
|
|
|
+dnl #
|
|
|
+dnl # Returns the META VALUE associated with the given KEY_NAME_OR_REGEX expr.
|
|
|
+dnl #
|
|
|
+dnl # Despite their resemblance to line noise,
|
|
|
+dnl # the "@<:@" and "@:>@" constructs are quadrigraphs for "[" and "]".
|
|
|
+dnl # <www.gnu.org/software/autoconf/manual/autoconf.html#Quadrigraphs>
|
|
|
+dnl #
|
|
|
+dnl # The "$[]1" and "$[]2" constructs prevent M4 parameter expansion
|
|
|
+dnl # so a literal $1 and $2 will be passed to the resulting awk script,
|
|
|
+dnl # whereas the "$1" will undergo M4 parameter expansion for the META key.
|
|
|
+dnl #
|
|
|
+AC_DEFUN([_ZFS_AC_META_GETVAL],
|
|
|
+ [`$AWK -F ':@<:@ \t@:>@+' '$[]1 ~ /^ *$1$/ { print $[]2; exit }' $META`]dnl
|
|
|
)
|
|
|
diff --git a/configure.ac b/configure.ac
|
|
|
index 413114b..58e2158 100644
|
|
|
--- a/configure.ac
|
|
|
+++ b/configure.ac
|
|
|
@@ -34,9 +34,10 @@ AC_INIT
|
|
|
AC_LANG(C)
|
|
|
ZFS_AC_META
|
|
|
AC_CONFIG_AUX_DIR([config])
|
|
|
+AC_CONFIG_MACRO_DIR([config])
|
|
|
AC_CANONICAL_SYSTEM
|
|
|
AM_MAINTAINER_MODE
|
|
|
-AM_SILENT_RULES
|
|
|
+m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
|
|
|
AM_INIT_AUTOMAKE([$ZFS_META_NAME], [$ZFS_META_VERSION])
|
|
|
AC_CONFIG_HEADERS([zfs_config.h], [
|
|
|
(mv zfs_config.h zfs_config.h.tmp &&
|
|
|
@@ -101,6 +102,7 @@ AC_CONFIG_FILES([
|
|
|
cmd/fsck_zfs/Makefile
|
|
|
cmd/zvol_id/Makefile
|
|
|
cmd/vdev_id/Makefile
|
|
|
+ cmd/arcstat/Makefile
|
|
|
module/Makefile
|
|
|
module/avl/Makefile
|
|
|
module/nvpair/Makefile
|
|
|
diff --git a/etc/init.d/zfs.fedora.in b/etc/init.d/zfs.fedora.in
|
|
|
index 04f2077..3cece9b 100644
|
|
|
--- a/etc/init.d/zfs.fedora.in
|
|
|
+++ b/etc/init.d/zfs.fedora.in
|
|
|
@@ -25,6 +25,11 @@
|
|
|
|
|
|
export PATH=/usr/local/sbin:/usr/bin:/bin:/usr/local/sbin:/usr/sbin:/sbin
|
|
|
|
|
|
+if [ -z "$init" ]; then
|
|
|
+ # Not interactive
|
|
|
+ grep -Eqi 'zfs=off|zfs=no' /proc/cmdline && exit 3
|
|
|
+fi
|
|
|
+
|
|
|
# Source function library & LSB routines
|
|
|
. /etc/rc.d/init.d/functions
|
|
|
|
|
|
diff --git a/etc/init.d/zfs.gentoo.in b/etc/init.d/zfs.gentoo.in
|
|
|
index df883cf..0034e02 100644
|
|
|
--- a/etc/init.d/zfs.gentoo.in
|
|
|
+++ b/etc/init.d/zfs.gentoo.in
|
|
|
@@ -3,11 +3,23 @@
|
|
|
# Released under the 2-clause BSD license.
|
|
|
# $Header: /var/cvsroot/gentoo-x86/sys-fs/zfs/files/zfs,v 0.9 2011/04/30 10:13:43 devsk Exp $
|
|
|
|
|
|
+if [ -z "$init" ]; then
|
|
|
+ # Not interactive
|
|
|
+ grep -Eqi 'zfs=off|zfs=no' /proc/cmdline && exit 3
|
|
|
+fi
|
|
|
+
|
|
|
depend()
|
|
|
{
|
|
|
+ # Try to allow people to mix and match fstab with ZFS in a way that makes sense.
|
|
|
+ if [ "$(mountinfo -s /)" = 'zfs' ]
|
|
|
+ then
|
|
|
+ before localmount
|
|
|
+ else
|
|
|
+ after localmount
|
|
|
+ fi
|
|
|
+
|
|
|
# bootmisc will log to /var which may be a different zfs than root.
|
|
|
- before net bootmisc
|
|
|
- after udev localmount
|
|
|
+ before bootmisc logger
|
|
|
keyword -lxc -openvz -prefix -vserver
|
|
|
}
|
|
|
|
|
|
diff --git a/etc/init.d/zfs.lsb.in b/etc/init.d/zfs.lsb.in
|
|
|
index 3d04206..0d0ffb4 100644
|
|
|
--- a/etc/init.d/zfs.lsb.in
|
|
|
+++ b/etc/init.d/zfs.lsb.in
|
|
|
@@ -36,6 +36,11 @@ ZPOOL_CACHE="@sysconfdir@/zfs/zpool.cache"
|
|
|
[ -x "$ZPOOL" ] || exit 1
|
|
|
[ -x "$ZFS" ] || exit 2
|
|
|
|
|
|
+if [ -z "$init" ]; then
|
|
|
+ # Not interactive
|
|
|
+ grep -Eqi 'zfs=off|zfs=no' /proc/cmdline && exit 3
|
|
|
+fi
|
|
|
+
|
|
|
start()
|
|
|
{
|
|
|
[ -f "$LOCKFILE" ] && return 3
|
|
|
diff --git a/etc/init.d/zfs.lunar.in b/etc/init.d/zfs.lunar.in
|
|
|
index 97384dc..3cf79ce 100644
|
|
|
--- a/etc/init.d/zfs.lunar.in
|
|
|
+++ b/etc/init.d/zfs.lunar.in
|
|
|
@@ -14,6 +14,11 @@ ZFS="@sbindir@/zfs"
|
|
|
ZPOOL="@sbindir@/zpool"
|
|
|
ZPOOL_CACHE="@sysconfdir@/zfs/zpool.cache"
|
|
|
|
|
|
+if [ -z "$init" ]; then
|
|
|
+ # Not interactive
|
|
|
+ grep -Eqi 'zfs=off|zfs=no' /proc/cmdline && exit 3
|
|
|
+fi
|
|
|
+
|
|
|
case $1 in
|
|
|
start) echo "$1ing ZFS filesystems"
|
|
|
|
|
|
diff --git a/etc/init.d/zfs.redhat.in b/etc/init.d/zfs.redhat.in
|
|
|
index ae797c1..fb5187f 100644
|
|
|
--- a/etc/init.d/zfs.redhat.in
|
|
|
+++ b/etc/init.d/zfs.redhat.in
|
|
|
@@ -25,6 +25,11 @@
|
|
|
|
|
|
export PATH=/usr/local/sbin:/usr/bin:/bin:/usr/local/sbin:/usr/sbin:/sbin
|
|
|
|
|
|
+if [ -z "$init" ]; then
|
|
|
+ # Not interactive
|
|
|
+ grep -Eqi 'zfs=off|zfs=no' /proc/cmdline && exit 3
|
|
|
+fi
|
|
|
+
|
|
|
# Source function library & LSB routines
|
|
|
. /etc/rc.d/init.d/functions
|
|
|
|
|
|
diff --git a/include/libzfs.h b/include/libzfs.h
|
|
|
index 82cb66a..3472b76 100644
|
|
|
--- a/include/libzfs.h
|
|
|
+++ b/include/libzfs.h
|
|
|
@@ -212,6 +212,7 @@ extern void zpool_close(zpool_handle_t *);
|
|
|
extern const char *zpool_get_name(zpool_handle_t *);
|
|
|
extern int zpool_get_state(zpool_handle_t *);
|
|
|
extern char *zpool_state_to_name(vdev_state_t, vdev_aux_t);
|
|
|
+extern const char *zpool_pool_state_to_name(pool_state_t);
|
|
|
extern void zpool_free_handles(libzfs_handle_t *);
|
|
|
|
|
|
/*
|
|
|
diff --git a/include/linux/blkdev_compat.h b/include/linux/blkdev_compat.h
|
|
|
index 47f569b..ec9926f 100644
|
|
|
--- a/include/linux/blkdev_compat.h
|
|
|
+++ b/include/linux/blkdev_compat.h
|
|
|
@@ -485,6 +485,6 @@ blk_queue_discard_granularity(struct request_queue *q, unsigned int dg)
|
|
|
* user space processes which don't pass this value will get EBUSY. This is
|
|
|
* currently required for the correct operation of hot spares.
|
|
|
*/
|
|
|
-#define VDEV_HOLDER ((void *)0x2f5401de7)
|
|
|
+#define VDEV_HOLDER ((void *)0x2401de7)
|
|
|
|
|
|
#endif /* _ZFS_BLKDEV_H */
|
|
|
diff --git a/include/linux/vfs_compat.h b/include/linux/vfs_compat.h
|
|
|
index c9fa76e..17fa3ff 100644
|
|
|
--- a/include/linux/vfs_compat.h
|
|
|
+++ b/include/linux/vfs_compat.h
|
|
|
@@ -149,4 +149,29 @@ typedef int zpl_umode_t;
|
|
|
#define zpl_sget(type, cmp, set, fl, mtd) sget(type, cmp, set, mtd)
|
|
|
#endif /* HAVE_5ARG_SGET */
|
|
|
|
|
|
+#define ZFS_IOC_GETFLAGS FS_IOC_GETFLAGS
|
|
|
+#define ZFS_IOC_SETFLAGS FS_IOC_SETFLAGS
|
|
|
+
|
|
|
+#if defined(SEEK_HOLE) && defined(SEEK_DATA) && !defined(HAVE_LSEEK_EXECUTE)
|
|
|
+static inline loff_t
|
|
|
+lseek_execute(struct file *filp, struct inode *inode,
|
|
|
+ loff_t offset, loff_t maxsize)
|
|
|
+{
|
|
|
+ if (offset < 0 && !(filp->f_mode & FMODE_UNSIGNED_OFFSET))
|
|
|
+ return (-EINVAL);
|
|
|
+
|
|
|
+ if (offset > maxsize)
|
|
|
+ return (-EINVAL);
|
|
|
+
|
|
|
+ if (offset != filp->f_pos) {
|
|
|
+ spin_lock(&filp->f_lock);
|
|
|
+ filp->f_pos = offset;
|
|
|
+ filp->f_version = 0;
|
|
|
+ spin_unlock(&filp->f_lock);
|
|
|
+ }
|
|
|
+
|
|
|
+ return (offset);
|
|
|
+}
|
|
|
+#endif /* SEEK_HOLE && SEEK_DATA && !HAVE_LSEEK_EXECUTE */
|
|
|
+
|
|
|
#endif /* _ZFS_VFS_H */
|
|
|
diff --git a/include/sys/arc.h b/include/sys/arc.h
|
|
|
index dbc91ea..396144a 100644
|
|
|
--- a/include/sys/arc.h
|
|
|
+++ b/include/sys/arc.h
|
|
|
@@ -20,6 +20,7 @@
|
|
|
*/
|
|
|
/*
|
|
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
|
|
+ * Copyright (c) 2013 by Delphix. All rights reserved.
|
|
|
*/
|
|
|
|
|
|
#ifndef _SYS_ARC_H
|
|
|
@@ -59,7 +60,6 @@ struct arc_buf {
|
|
|
arc_buf_hdr_t *b_hdr;
|
|
|
arc_buf_t *b_next;
|
|
|
kmutex_t b_evict_lock;
|
|
|
- krwlock_t b_data_lock;
|
|
|
void *b_data;
|
|
|
arc_evict_func_t *b_efunc;
|
|
|
void *b_private;
|
|
|
@@ -103,8 +103,6 @@ void arc_buf_add_ref(arc_buf_t *buf, void *tag);
|
|
|
int arc_buf_remove_ref(arc_buf_t *buf, void *tag);
|
|
|
int arc_buf_size(arc_buf_t *buf);
|
|
|
void arc_release(arc_buf_t *buf, void *tag);
|
|
|
-int arc_release_bp(arc_buf_t *buf, void *tag, blkptr_t *bp, spa_t *spa,
|
|
|
- zbookmark_t *zb);
|
|
|
int arc_released(arc_buf_t *buf);
|
|
|
int arc_has_callback(arc_buf_t *buf);
|
|
|
void arc_buf_freeze(arc_buf_t *buf);
|
|
|
@@ -114,10 +112,7 @@ boolean_t arc_buf_eviction_needed(arc_buf_t *buf);
|
|
|
int arc_referenced(arc_buf_t *buf);
|
|
|
#endif
|
|
|
|
|
|
-int arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_buf_t *pbuf,
|
|
|
- arc_done_func_t *done, void *private, int priority, int zio_flags,
|
|
|
- uint32_t *arc_flags, const zbookmark_t *zb);
|
|
|
-int arc_read_nolock(zio_t *pio, spa_t *spa, const blkptr_t *bp,
|
|
|
+int arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
|
|
|
arc_done_func_t *done, void *private, int priority, int flags,
|
|
|
uint32_t *arc_flags, const zbookmark_t *zb);
|
|
|
zio_t *arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
|
|
|
@@ -127,6 +122,7 @@ zio_t *arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
|
|
|
|
|
|
arc_prune_t *arc_add_prune_callback(arc_prune_func_t *func, void *private);
|
|
|
void arc_remove_prune_callback(arc_prune_t *p);
|
|
|
+void arc_freed(spa_t *spa, const blkptr_t *bp);
|
|
|
|
|
|
void arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *private);
|
|
|
int arc_buf_evict(arc_buf_t *buf);
|
|
|
diff --git a/include/sys/dmu.h b/include/sys/dmu.h
|
|
|
index eb6a465..1785725 100644
|
|
|
--- a/include/sys/dmu.h
|
|
|
+++ b/include/sys/dmu.h
|
|
|
@@ -410,7 +410,7 @@ void dmu_write_policy(objset_t *os, struct dnode *dn, int level, int wp,
|
|
|
* read db_data, dmu_buf_will_dirty() before modifying it, and the
|
|
|
* object must be held in an assigned transaction before calling
|
|
|
* dmu_buf_will_dirty. You may use dmu_buf_set_user() on the bonus
|
|
|
- * buffer as well. You must release your hold with dmu_buf_rele().
|
|
|
+ * buffer as well. You must release what you hold with dmu_buf_rele().
|
|
|
*/
|
|
|
int dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **);
|
|
|
int dmu_bonus_max(void);
|
|
|
@@ -432,8 +432,8 @@ int dmu_spill_hold_existing(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp);
|
|
|
* Obtain the DMU buffer from the specified object which contains the
|
|
|
* specified offset. dmu_buf_hold() puts a "hold" on the buffer, so
|
|
|
* that it will remain in memory. You must release the hold with
|
|
|
- * dmu_buf_rele(). You musn't access the dmu_buf_t after releasing your
|
|
|
- * hold. You must have a hold on any dmu_buf_t* you pass to the DMU.
|
|
|
+ * dmu_buf_rele(). You must not access the dmu_buf_t after releasing
|
|
|
+ * what you hold. You must have a hold on any dmu_buf_t* you pass to the DMU.
|
|
|
*
|
|
|
* You must call dmu_buf_read, dmu_buf_will_dirty, or dmu_buf_will_fill
|
|
|
* on the returned buffer before reading or writing the buffer's
|
|
|
diff --git a/include/sys/dmu_traverse.h b/include/sys/dmu_traverse.h
|
|
|
index 3cbf42f..bc1590b 100644
|
|
|
--- a/include/sys/dmu_traverse.h
|
|
|
+++ b/include/sys/dmu_traverse.h
|
|
|
@@ -40,8 +40,7 @@ struct zilog;
|
|
|
struct arc_buf;
|
|
|
|
|
|
typedef int (blkptr_cb_t)(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
|
|
- struct arc_buf *pbuf, const zbookmark_t *zb, const struct dnode_phys *dnp,
|
|
|
- void *arg);
|
|
|
+ const zbookmark_t *zb, const struct dnode_phys *dnp, void *arg);
|
|
|
|
|
|
#define TRAVERSE_PRE (1<<0)
|
|
|
#define TRAVERSE_POST (1<<1)
|
|
|
diff --git a/include/sys/dnode.h b/include/sys/dnode.h
|
|
|
index 9ad4be3..9f9134d 100644
|
|
|
--- a/include/sys/dnode.h
|
|
|
+++ b/include/sys/dnode.h
|
|
|
@@ -20,6 +20,7 @@
|
|
|
*/
|
|
|
/*
|
|
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
|
|
+ * Copyright (c) 2012 by Delphix. All rights reserved.
|
|
|
*/
|
|
|
|
|
|
#ifndef _SYS_DNODE_H
|
|
|
@@ -276,7 +277,6 @@ void dnode_byteswap(dnode_phys_t *dnp);
|
|
|
void dnode_buf_byteswap(void *buf, size_t size);
|
|
|
void dnode_verify(dnode_t *dn);
|
|
|
int dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx);
|
|
|
-uint64_t dnode_current_max_length(dnode_t *dn);
|
|
|
void dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx);
|
|
|
void dnode_clear_range(dnode_t *dn, uint64_t blkid,
|
|
|
uint64_t nblks, dmu_tx_t *tx);
|
|
|
diff --git a/include/sys/dsl_pool.h b/include/sys/dsl_pool.h
|
|
|
index ff5df14..4a4bf76 100644
|
|
|
--- a/include/sys/dsl_pool.h
|
|
|
+++ b/include/sys/dsl_pool.h
|
|
|
@@ -41,6 +41,8 @@
|
|
|
extern "C" {
|
|
|
#endif
|
|
|
|
|
|
+extern int zfs_txg_synctime_ms;
|
|
|
+
|
|
|
struct objset;
|
|
|
struct dsl_dir;
|
|
|
struct dsl_dataset;
|
|
|
@@ -87,6 +89,7 @@ typedef struct dsl_pool {
|
|
|
uint64_t dp_root_dir_obj;
|
|
|
struct taskq *dp_iput_taskq;
|
|
|
kstat_t *dp_txg_kstat;
|
|
|
+ kstat_t *dp_tx_assign_kstat;
|
|
|
|
|
|
/* No lock needed - sync context only */
|
|
|
blkptr_t dp_meta_rootbp;
|
|
|
@@ -109,6 +112,8 @@ typedef struct dsl_pool {
|
|
|
uint64_t dp_mos_uncompressed_delta;
|
|
|
uint64_t dp_txg_history_size;
|
|
|
list_t dp_txg_history;
|
|
|
+ uint64_t dp_tx_assign_size;
|
|
|
+ kstat_named_t *dp_tx_assign_buckets;
|
|
|
|
|
|
|
|
|
/* Has its own locking */
|
|
|
@@ -145,12 +150,6 @@ void dsl_pool_willuse_space(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx);
|
|
|
void dsl_free(dsl_pool_t *dp, uint64_t txg, const blkptr_t *bpp);
|
|
|
void dsl_free_sync(zio_t *pio, dsl_pool_t *dp, uint64_t txg,
|
|
|
const blkptr_t *bpp);
|
|
|
-int dsl_read(zio_t *pio, spa_t *spa, const blkptr_t *bpp, arc_buf_t *pbuf,
|
|
|
- arc_done_func_t *done, void *private, int priority, int zio_flags,
|
|
|
- uint32_t *arc_flags, const zbookmark_t *zb);
|
|
|
-int dsl_read_nolock(zio_t *pio, spa_t *spa, const blkptr_t *bpp,
|
|
|
- arc_done_func_t *done, void *private, int priority, int zio_flags,
|
|
|
- uint32_t *arc_flags, const zbookmark_t *zb);
|
|
|
void dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx);
|
|
|
void dsl_pool_upgrade_clones(dsl_pool_t *dp, dmu_tx_t *tx);
|
|
|
void dsl_pool_upgrade_dir_clones(dsl_pool_t *dp, dmu_tx_t *tx);
|
|
|
@@ -166,6 +165,8 @@ extern int dsl_pool_user_release(dsl_pool_t *dp, uint64_t dsobj,
|
|
|
extern void dsl_pool_clean_tmp_userrefs(dsl_pool_t *dp);
|
|
|
int dsl_pool_open_special_dir(dsl_pool_t *dp, const char *name, dsl_dir_t **);
|
|
|
|
|
|
+void dsl_pool_tx_assign_add_usecs(dsl_pool_t *dp, uint64_t usecs);
|
|
|
+
|
|
|
txg_history_t *dsl_pool_txg_history_add(dsl_pool_t *dp, uint64_t txg);
|
|
|
txg_history_t *dsl_pool_txg_history_get(dsl_pool_t *dp, uint64_t txg);
|
|
|
void dsl_pool_txg_history_put(txg_history_t *th);
|
|
|
diff --git a/include/sys/fm/fs/zfs.h b/include/sys/fm/fs/zfs.h
|
|
|
index d5c6004..741b99e 100644
|
|
|
--- a/include/sys/fm/fs/zfs.h
|
|
|
+++ b/include/sys/fm/fs/zfs.h
|
|
|
@@ -73,6 +73,8 @@ extern "C" {
|
|
|
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_FRU "vdev_fru"
|
|
|
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_STATE "vdev_state"
|
|
|
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_ASHIFT "vdev_ashift"
|
|
|
+#define FM_EREPORT_PAYLOAD_ZFS_VDEV_COMP_TS "vdev_complete_ts"
|
|
|
+#define FM_EREPORT_PAYLOAD_ZFS_VDEV_DELTA_TS "vdev_delta_ts"
|
|
|
#define FM_EREPORT_PAYLOAD_ZFS_PARENT_GUID "parent_guid"
|
|
|
#define FM_EREPORT_PAYLOAD_ZFS_PARENT_TYPE "parent_type"
|
|
|
#define FM_EREPORT_PAYLOAD_ZFS_PARENT_PATH "parent_path"
|
|
|
@@ -88,6 +90,9 @@ extern "C" {
|
|
|
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_STAGE "zio_stage"
|
|
|
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_PIPELINE "zio_pipeline"
|
|
|
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_DELAY "zio_delay"
|
|
|
+#define FM_EREPORT_PAYLOAD_ZFS_ZIO_TIMESTAMP "zio_timestamp"
|
|
|
+#define FM_EREPORT_PAYLOAD_ZFS_ZIO_DEADLINE "zio_deadline"
|
|
|
+#define FM_EREPORT_PAYLOAD_ZFS_ZIO_DELTA "zio_delta"
|
|
|
#define FM_EREPORT_PAYLOAD_ZFS_PREV_STATE "prev_state"
|
|
|
#define FM_EREPORT_PAYLOAD_ZFS_CKSUM_EXPECTED "cksum_expected"
|
|
|
#define FM_EREPORT_PAYLOAD_ZFS_CKSUM_ACTUAL "cksum_actual"
|
|
|
diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h
|
|
|
index 8c949e7..26c24fc 100644
|
|
|
--- a/include/sys/fs/zfs.h
|
|
|
+++ b/include/sys/fs/zfs.h
|
|
|
@@ -63,7 +63,7 @@ typedef enum {
|
|
|
* Dataset properties are identified by these constants and must be added to
|
|
|
* the end of this list to ensure that external consumers are not affected
|
|
|
* by the change. If you make any changes to this list, be sure to update
|
|
|
- * the property table in usr/src/common/zfs/zfs_prop.c.
|
|
|
+ * the property table in module/zcommon/zfs_prop.c.
|
|
|
*/
|
|
|
typedef enum {
|
|
|
ZFS_PROP_TYPE,
|
|
|
@@ -146,7 +146,7 @@ extern const char *zfs_userquota_prop_prefixes[ZFS_NUM_USERQUOTA_PROPS];
|
|
|
* Pool properties are identified by these constants and must be added to the
|
|
|
* end of this list to ensure that external consumers are not affected
|
|
|
* by the change. If you make any changes to this list, be sure to update
|
|
|
- * the property table in usr/src/common/zfs/zpool_prop.c.
|
|
|
+ * the property table in module/zcommon/zpool_prop.c.
|
|
|
*/
|
|
|
typedef enum {
|
|
|
ZPOOL_PROP_NAME,
|
|
|
diff --git a/include/sys/metaslab_impl.h b/include/sys/metaslab_impl.h
|
|
|
index 6583594..a36baed 100644
|
|
|
--- a/include/sys/metaslab_impl.h
|
|
|
+++ b/include/sys/metaslab_impl.h
|
|
|
@@ -21,7 +21,10 @@
|
|
|
/*
|
|
|
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
|
|
* Use is subject to license terms.
|
|
|
- * Copyright (c) 2011 by Delphix. All rights reserved.
|
|
|
+ */
|
|
|
+
|
|
|
+/*
|
|
|
+ * Copyright (c) 2012 by Delphix. All rights reserved.
|
|
|
*/
|
|
|
|
|
|
#ifndef _SYS_METASLAB_IMPL_H
|
|
|
@@ -64,20 +67,38 @@ struct metaslab_group {
|
|
|
};
|
|
|
|
|
|
/*
|
|
|
- * Each metaslab's free space is tracked in space map object in the MOS,
|
|
|
- * which is only updated in syncing context. Each time we sync a txg,
|
|
|
+ * Each metaslab maintains an in-core free map (ms_map) that contains the
|
|
|
+ * current list of free segments. As blocks are allocated, the allocated
|
|
|
+ * segment is removed from the ms_map and added to a per txg allocation map.
|
|
|
+ * As blocks are freed, they are added to the per txg free map. These per
|
|
|
+ * txg maps allow us to process all allocations and frees in syncing context
|
|
|
+ * where it is safe to update the on-disk space maps.
|
|
|
+ *
|
|
|
+ * Each metaslab's free space is tracked in a space map object in the MOS,
|
|
|
+ * which is only updated in syncing context. Each time we sync a txg,
|
|
|
* we append the allocs and frees from that txg to the space map object.
|
|
|
* When the txg is done syncing, metaslab_sync_done() updates ms_smo
|
|
|
- * to ms_smo_syncing. Everything in ms_smo is always safe to allocate.
|
|
|
+ * to ms_smo_syncing. Everything in ms_smo is always safe to allocate.
|
|
|
+ *
|
|
|
+ * To load the in-core free map we read the space map object from disk.
|
|
|
+ * This object contains a series of alloc and free records that are
|
|
|
+ * combined to make up the list of all free segments in this metaslab. These
|
|
|
+ * segments are represented in-core by the ms_map and are stored in an
|
|
|
+ * AVL tree.
|
|
|
+ *
|
|
|
+ * As the space map objects grows (as a result of the appends) it will
|
|
|
+ * eventually become space-inefficient. When the space map object is
|
|
|
+ * zfs_condense_pct/100 times the size of the minimal on-disk representation,
|
|
|
+ * we rewrite it in its minimized form.
|
|
|
*/
|
|
|
struct metaslab {
|
|
|
kmutex_t ms_lock; /* metaslab lock */
|
|
|
space_map_obj_t ms_smo; /* synced space map object */
|
|
|
space_map_obj_t ms_smo_syncing; /* syncing space map object */
|
|
|
- space_map_t ms_allocmap[TXG_SIZE]; /* allocated this txg */
|
|
|
- space_map_t ms_freemap[TXG_SIZE]; /* freed this txg */
|
|
|
- space_map_t ms_defermap[TXG_DEFER_SIZE]; /* deferred frees */
|
|
|
- space_map_t ms_map; /* in-core free space map */
|
|
|
+ space_map_t *ms_allocmap[TXG_SIZE]; /* allocated this txg */
|
|
|
+ space_map_t *ms_freemap[TXG_SIZE]; /* freed this txg */
|
|
|
+ space_map_t *ms_defermap[TXG_DEFER_SIZE]; /* deferred frees */
|
|
|
+ space_map_t *ms_map; /* in-core free space map */
|
|
|
int64_t ms_deferspace; /* sum of ms_defermap[] space */
|
|
|
uint64_t ms_weight; /* weight vs. others in group */
|
|
|
metaslab_group_t *ms_group; /* metaslab group */
|
|
|
diff --git a/include/sys/nvpair.h b/include/sys/nvpair.h
|
|
|
index cc399fd..c502568 100644
|
|
|
--- a/include/sys/nvpair.h
|
|
|
+++ b/include/sys/nvpair.h
|
|
|
@@ -144,6 +144,7 @@ extern nv_alloc_t *nv_alloc_nosleep;
|
|
|
|
|
|
#if defined(_KERNEL) && !defined(_BOOT)
|
|
|
extern nv_alloc_t *nv_alloc_sleep;
|
|
|
+extern nv_alloc_t *nv_alloc_pushpage;
|
|
|
#endif
|
|
|
|
|
|
int nv_alloc_init(nv_alloc_t *, const nv_alloc_ops_t *, /* args */ ...);
|
|
|
diff --git a/include/sys/spa.h b/include/sys/spa.h
|
|
|
index 8211722..8f2af8a 100644
|
|
|
--- a/include/sys/spa.h
|
|
|
+++ b/include/sys/spa.h
|
|
|
@@ -486,13 +486,7 @@ extern int spa_scan_stop(spa_t *spa);
|
|
|
extern void spa_sync(spa_t *spa, uint64_t txg); /* only for DMU use */
|
|
|
extern void spa_sync_allpools(void);
|
|
|
|
|
|
-/*
|
|
|
- * DEFERRED_FREE must be large enough that regular blocks are not
|
|
|
- * deferred. XXX so can't we change it back to 1?
|
|
|
- */
|
|
|
-#define SYNC_PASS_DEFERRED_FREE 2 /* defer frees after this pass */
|
|
|
-#define SYNC_PASS_DONT_COMPRESS 4 /* don't compress after this pass */
|
|
|
-#define SYNC_PASS_REWRITE 1 /* rewrite new bps after this pass */
|
|
|
+extern int zfs_sync_pass_deferred_free;
|
|
|
|
|
|
/* spa namespace global mutex */
|
|
|
extern kmutex_t spa_namespace_lock;
|
|
|
@@ -570,6 +564,7 @@ extern int spa_offline_log(spa_t *spa);
|
|
|
|
|
|
/* Log claim callback */
|
|
|
extern void spa_claim_notify(zio_t *zio);
|
|
|
+extern void spa_deadman(void *);
|
|
|
|
|
|
/* Accessor functions */
|
|
|
extern boolean_t spa_shutting_down(spa_t *spa);
|
|
|
@@ -604,6 +599,7 @@ extern boolean_t spa_suspended(spa_t *spa);
|
|
|
extern uint64_t spa_bootfs(spa_t *spa);
|
|
|
extern uint64_t spa_delegation(spa_t *spa);
|
|
|
extern objset_t *spa_meta_objset(spa_t *spa);
|
|
|
+extern uint64_t spa_deadman_synctime(spa_t *spa);
|
|
|
|
|
|
/* Miscellaneous support routines */
|
|
|
extern void spa_activate_mos_feature(spa_t *spa, const char *feature);
|
|
|
diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h
|
|
|
index 65edc97..47dfe43 100644
|
|
|
--- a/include/sys/spa_impl.h
|
|
|
+++ b/include/sys/spa_impl.h
|
|
|
@@ -80,16 +80,16 @@ typedef struct spa_config_dirent {
|
|
|
char *scd_path;
|
|
|
} spa_config_dirent_t;
|
|
|
|
|
|
-enum zio_taskq_type {
|
|
|
+typedef enum zio_taskq_type {
|
|
|
ZIO_TASKQ_ISSUE = 0,
|
|
|
ZIO_TASKQ_ISSUE_HIGH,
|
|
|
ZIO_TASKQ_INTERRUPT,
|
|
|
ZIO_TASKQ_INTERRUPT_HIGH,
|
|
|
ZIO_TASKQ_TYPES
|
|
|
-};
|
|
|
+} zio_taskq_type_t;
|
|
|
|
|
|
/*
|
|
|
- * State machine for the zpool-pooname process. The states transitions
|
|
|
+ * State machine for the zpool-poolname process. The states transitions
|
|
|
* are done as follows:
|
|
|
*
|
|
|
* From To Routine
|
|
|
@@ -107,6 +107,11 @@ typedef enum spa_proc_state {
|
|
|
SPA_PROC_GONE /* spa_thread() is exiting, spa_proc = &p0 */
|
|
|
} spa_proc_state_t;
|
|
|
|
|
|
+typedef struct spa_taskqs {
|
|
|
+ uint_t stqs_count;
|
|
|
+ taskq_t **stqs_taskq;
|
|
|
+} spa_taskqs_t;
|
|
|
+
|
|
|
struct spa {
|
|
|
/*
|
|
|
* Fields protected by spa_namespace_lock.
|
|
|
@@ -125,7 +130,7 @@ struct spa {
|
|
|
uint8_t spa_sync_on; /* sync threads are running */
|
|
|
spa_load_state_t spa_load_state; /* current load operation */
|
|
|
uint64_t spa_import_flags; /* import specific flags */
|
|
|
- taskq_t *spa_zio_taskq[ZIO_TYPES][ZIO_TASKQ_TYPES];
|
|
|
+ spa_taskqs_t spa_zio_taskq[ZIO_TYPES][ZIO_TASKQ_TYPES];
|
|
|
dsl_pool_t *spa_dsl_pool;
|
|
|
boolean_t spa_is_initializing; /* true while opening pool */
|
|
|
metaslab_class_t *spa_normal_class; /* normal data class */
|
|
|
@@ -227,6 +232,10 @@ struct spa {
|
|
|
uint64_t spa_feat_for_write_obj; /* required to write to pool */
|
|
|
uint64_t spa_feat_for_read_obj; /* required to read from pool */
|
|
|
uint64_t spa_feat_desc_obj; /* Feature descriptions */
|
|
|
+ taskqid_t spa_deadman_tqid; /* Task id */
|
|
|
+ uint64_t spa_deadman_calls; /* number of deadman calls */
|
|
|
+ uint64_t spa_sync_starttime; /* starting time fo spa_sync */
|
|
|
+ uint64_t spa_deadman_synctime; /* deadman expiration timer */
|
|
|
/*
|
|
|
* spa_refcnt & spa_config_lock must be the last elements
|
|
|
* because refcount_t changes size based on compilation options.
|
|
|
@@ -239,6 +248,12 @@ struct spa {
|
|
|
|
|
|
extern char *spa_config_path;
|
|
|
|
|
|
+extern void spa_taskq_dispatch_ent(spa_t *spa, zio_type_t t, zio_taskq_type_t q,
|
|
|
+ task_func_t *func, void *arg, uint_t flags, taskq_ent_t *ent);
|
|
|
+extern void spa_taskq_dispatch_sync(spa_t *, zio_type_t t, zio_taskq_type_t q,
|
|
|
+ task_func_t *func, void *arg, uint_t flags);
|
|
|
+
|
|
|
+
|
|
|
#ifdef __cplusplus
|
|
|
}
|
|
|
#endif
|
|
|
diff --git a/include/sys/space_map.h b/include/sys/space_map.h
|
|
|
index 3322997..2da80d2 100644
|
|
|
--- a/include/sys/space_map.h
|
|
|
+++ b/include/sys/space_map.h
|
|
|
@@ -23,6 +23,10 @@
|
|
|
* Use is subject to license terms.
|
|
|
*/
|
|
|
|
|
|
+/*
|
|
|
+ * Copyright (c) 2012 by Delphix. All rights reserved.
|
|
|
+ */
|
|
|
+
|
|
|
#ifndef _SYS_SPACE_MAP_H
|
|
|
#define _SYS_SPACE_MAP_H
|
|
|
|
|
|
@@ -36,17 +40,17 @@ extern "C" {
|
|
|
typedef const struct space_map_ops space_map_ops_t;
|
|
|
|
|
|
typedef struct space_map {
|
|
|
- avl_tree_t sm_root; /* AVL tree of map segments */
|
|
|
+ avl_tree_t sm_root; /* offset-ordered segment AVL tree */
|
|
|
uint64_t sm_space; /* sum of all segments in the map */
|
|
|
uint64_t sm_start; /* start of map */
|
|
|
uint64_t sm_size; /* size of map */
|
|
|
uint8_t sm_shift; /* unit shift */
|
|
|
- uint8_t sm_pad[3]; /* unused */
|
|
|
uint8_t sm_loaded; /* map loaded? */
|
|
|
uint8_t sm_loading; /* map loading? */
|
|
|
+ uint8_t sm_condensing; /* map condensing? */
|
|
|
kcondvar_t sm_load_cv; /* map load completion */
|
|
|
space_map_ops_t *sm_ops; /* space map block picker ops vector */
|
|
|
- avl_tree_t *sm_pp_root; /* picker-private AVL tree */
|
|
|
+ avl_tree_t *sm_pp_root; /* size-ordered, picker-private tree */
|
|
|
void *sm_ppd; /* picker-private data */
|
|
|
kmutex_t *sm_lock; /* pointer to lock that protects map */
|
|
|
} space_map_t;
|
|
|
@@ -136,6 +140,8 @@ struct space_map_ops {
|
|
|
|
|
|
typedef void space_map_func_t(space_map_t *sm, uint64_t start, uint64_t size);
|
|
|
|
|
|
+extern void space_map_init(void);
|
|
|
+extern void space_map_fini(void);
|
|
|
extern void space_map_create(space_map_t *sm, uint64_t start, uint64_t size,
|
|
|
uint8_t shift, kmutex_t *lp);
|
|
|
extern void space_map_destroy(space_map_t *sm);
|
|
|
@@ -143,6 +149,7 @@ extern void space_map_add(space_map_t *sm, uint64_t start, uint64_t size);
|
|
|
extern void space_map_remove(space_map_t *sm, uint64_t start, uint64_t size);
|
|
|
extern boolean_t space_map_contains(space_map_t *sm,
|
|
|
uint64_t start, uint64_t size);
|
|
|
+extern void space_map_swap(space_map_t **msrc, space_map_t **mdest);
|
|
|
extern void space_map_vacate(space_map_t *sm,
|
|
|
space_map_func_t *func, space_map_t *mdest);
|
|
|
extern void space_map_walk(space_map_t *sm,
|
|
|
diff --git a/include/sys/vdev.h b/include/sys/vdev.h
|
|
|
index 8f297a9..f49086a 100644
|
|
|
--- a/include/sys/vdev.h
|
|
|
+++ b/include/sys/vdev.h
|
|
|
@@ -78,6 +78,7 @@ extern void vdev_metaslab_fini(vdev_t *vd);
|
|
|
extern void vdev_metaslab_set_size(vdev_t *);
|
|
|
extern void vdev_expand(vdev_t *vd, uint64_t txg);
|
|
|
extern void vdev_split(vdev_t *vd);
|
|
|
+extern void vdev_deadman(vdev_t *vd);
|
|
|
|
|
|
|
|
|
extern void vdev_get_stats(vdev_t *vd, vdev_stat_t *vs);
|
|
|
diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h
|
|
|
index 964ee24..711408e 100644
|
|
|
--- a/include/sys/vdev_impl.h
|
|
|
+++ b/include/sys/vdev_impl.h
|
|
|
@@ -105,6 +105,8 @@ struct vdev_queue {
|
|
|
avl_tree_t vq_read_tree;
|
|
|
avl_tree_t vq_write_tree;
|
|
|
avl_tree_t vq_pending_tree;
|
|
|
+ uint64_t vq_io_complete_ts;
|
|
|
+ uint64_t vq_io_delta_ts;
|
|
|
list_t vq_io_list;
|
|
|
kmutex_t vq_lock;
|
|
|
};
|
|
|
diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h
|
|
|
index 53080f3..4b34259 100644
|
|
|
--- a/include/sys/zfs_context.h
|
|
|
+++ b/include/sys/zfs_context.h
|
|
|
@@ -25,6 +25,7 @@
|
|
|
/*
|
|
|
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
|
|
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
|
|
|
+ * Copyright (c) 2012 by Delphix. All rights reserved.
|
|
|
*/
|
|
|
|
|
|
#ifndef _SYS_ZFS_CONTEXT_H
|
|
|
@@ -71,7 +72,6 @@
|
|
|
#define _SYS_RWLOCK_H
|
|
|
#define _SYS_CONDVAR_H
|
|
|
#define _SYS_SYSTM_H
|
|
|
-#define _SYS_DEBUG_H
|
|
|
#define _SYS_T_LOCK_H
|
|
|
#define _SYS_VNODE_H
|
|
|
#define _SYS_VFS_H
|
|
|
@@ -111,6 +111,7 @@
|
|
|
#include <sys/u8_textprep.h>
|
|
|
#include <sys/fm/fs/zfs.h>
|
|
|
#include <sys/sunddi.h>
|
|
|
+#include <sys/debug.h>
|
|
|
|
|
|
/*
|
|
|
* Stack
|
|
|
@@ -400,13 +401,17 @@ extern taskq_t *taskq_create(const char *, int, pri_t, int, int, uint_t);
|
|
|
#define taskq_create_sysdc(a, b, d, e, p, dc, f) \
|
|
|
(taskq_create(a, b, maxclsyspri, d, e, f))
|
|
|
extern taskqid_t taskq_dispatch(taskq_t *, task_func_t, void *, uint_t);
|
|
|
+extern taskqid_t taskq_dispatch_delay(taskq_t *, task_func_t, void *, uint_t,
|
|
|
+ clock_t);
|
|
|
extern void taskq_dispatch_ent(taskq_t *, task_func_t, void *, uint_t,
|
|
|
taskq_ent_t *);
|
|
|
extern int taskq_empty_ent(taskq_ent_t *);
|
|
|
extern void taskq_init_ent(taskq_ent_t *);
|
|
|
extern void taskq_destroy(taskq_t *);
|
|
|
extern void taskq_wait(taskq_t *);
|
|
|
+extern void taskq_wait_id(taskq_t *, taskqid_t);
|
|
|
extern int taskq_member(taskq_t *, kthread_t *);
|
|
|
+extern int taskq_cancel_id(taskq_t *, taskqid_t);
|
|
|
extern void system_taskq_init(void);
|
|
|
extern void system_taskq_fini(void);
|
|
|
|
|
|
@@ -523,6 +528,11 @@ extern vnode_t *rootdir;
|
|
|
|
|
|
extern void delay(clock_t ticks);
|
|
|
|
|
|
+#define SEC_TO_TICK(sec) ((sec) * hz)
|
|
|
+#define MSEC_TO_TICK(msec) ((msec) / (MILLISEC / hz))
|
|
|
+#define USEC_TO_TICK(usec) ((usec) / (MICROSEC / hz))
|
|
|
+#define NSEC_TO_TICK(usec) ((usec) / (NANOSEC / hz))
|
|
|
+
|
|
|
#define gethrestime_sec() time(NULL)
|
|
|
#define gethrestime(t) \
|
|
|
do {\
|
|
|
diff --git a/include/sys/zfs_ioctl.h b/include/sys/zfs_ioctl.h
|
|
|
index 740d8ed..c0cb470 100644
|
|
|
--- a/include/sys/zfs_ioctl.h
|
|
|
+++ b/include/sys/zfs_ioctl.h
|
|
|
@@ -236,6 +236,8 @@ typedef struct zinject_record {
|
|
|
uint32_t zi_iotype;
|
|
|
int32_t zi_duration;
|
|
|
uint64_t zi_timer;
|
|
|
+ uint32_t zi_cmd;
|
|
|
+ uint32_t zi_pad;
|
|
|
} zinject_record_t;
|
|
|
|
|
|
#define ZINJECT_NULL 0x1
|
|
|
@@ -245,6 +247,16 @@ typedef struct zinject_record {
|
|
|
#define ZEVENT_NONBLOCK 0x1
|
|
|
#define ZEVENT_SIZE 1024
|
|
|
|
|
|
+typedef enum zinject_type {
|
|
|
+ ZINJECT_UNINITIALIZED,
|
|
|
+ ZINJECT_DATA_FAULT,
|
|
|
+ ZINJECT_DEVICE_FAULT,
|
|
|
+ ZINJECT_LABEL_FAULT,
|
|
|
+ ZINJECT_IGNORED_WRITES,
|
|
|
+ ZINJECT_PANIC,
|
|
|
+ ZINJECT_DELAY_IO,
|
|
|
+} zinject_type_t;
|
|
|
+
|
|
|
typedef struct zfs_share {
|
|
|
uint64_t z_exportdata;
|
|
|
uint64_t z_sharedata;
|
|
|
diff --git a/include/sys/zfs_vnops.h b/include/sys/zfs_vnops.h
|
|
|
index 5da5eaf..75f7c12 100644
|
|
|
--- a/include/sys/zfs_vnops.h
|
|
|
+++ b/include/sys/zfs_vnops.h
|
|
|
@@ -38,6 +38,7 @@ extern "C" {
|
|
|
|
|
|
extern int zfs_open(struct inode *ip, int mode, int flag, cred_t *cr);
|
|
|
extern int zfs_close(struct inode *ip, int flag, cred_t *cr);
|
|
|
+extern int zfs_holey(struct inode *ip, int cmd, loff_t *off);
|
|
|
extern int zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr);
|
|
|
extern int zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr);
|
|
|
extern int zfs_access(struct inode *ip, int mode, int flag, cred_t *cr);
|
|
|
diff --git a/include/sys/zio.h b/include/sys/zio.h
|
|
|
index 64efde0..0353033 100644
|
|
|
--- a/include/sys/zio.h
|
|
|
+++ b/include/sys/zio.h
|
|
|
@@ -21,8 +21,6 @@
|
|
|
|
|
|
/*
|
|
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
|
|
- */
|
|
|
-/*
|
|
|
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
|
|
* Copyright (c) 2012 by Delphix. All rights reserved.
|
|
|
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
|
|
|
@@ -411,7 +409,10 @@ struct zio {
|
|
|
const zio_vsd_ops_t *io_vsd_ops;
|
|
|
|
|
|
uint64_t io_offset;
|
|
|
- uint64_t io_deadline;
|
|
|
+ uint64_t io_deadline; /* expires at timestamp + deadline */
|
|
|
+ uint64_t io_timestamp; /* submitted at (ticks) */
|
|
|
+ uint64_t io_delta; /* vdev queue service delta (ticks) */
|
|
|
+ uint64_t io_delay; /* vdev disk service delta (ticks) */
|
|
|
avl_node_t io_offset_node;
|
|
|
avl_node_t io_deadline_node;
|
|
|
avl_tree_t *io_vdev_tree;
|
|
|
@@ -423,7 +424,6 @@ struct zio {
|
|
|
enum zio_flag io_orig_flags;
|
|
|
enum zio_stage io_orig_stage;
|
|
|
enum zio_stage io_orig_pipeline;
|
|
|
- uint64_t io_delay;
|
|
|
int io_error;
|
|
|
int io_child_error[ZIO_CHILD_TYPES];
|
|
|
uint64_t io_children[ZIO_CHILD_TYPES][ZIO_WAIT_TYPES];
|
|
|
@@ -560,6 +560,7 @@ extern int zio_handle_fault_injection(zio_t *zio, int error);
|
|
|
extern int zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error);
|
|
|
extern int zio_handle_label_injection(zio_t *zio, int error);
|
|
|
extern void zio_handle_ignored_writes(zio_t *zio);
|
|
|
+extern uint64_t zio_handle_io_delay(zio_t *zio);
|
|
|
|
|
|
/*
|
|
|
* Checksum ereport functions
|
|
|
diff --git a/include/sys/zio_impl.h b/include/sys/zio_impl.h
|
|
|
index d90bd8b..2d062d0 100644
|
|
|
--- a/include/sys/zio_impl.h
|
|
|
+++ b/include/sys/zio_impl.h
|
|
|
@@ -23,6 +23,10 @@
|
|
|
* Use is subject to license terms.
|
|
|
*/
|
|
|
|
|
|
+/*
|
|
|
+ * Copyright (c) 2012 by Delphix. All rights reserved.
|
|
|
+ */
|
|
|
+
|
|
|
#ifndef _ZIO_IMPL_H
|
|
|
#define _ZIO_IMPL_H
|
|
|
|
|
|
@@ -143,6 +147,7 @@ enum zio_stage {
|
|
|
#define ZIO_FREE_PIPELINE \
|
|
|
(ZIO_INTERLOCK_STAGES | \
|
|
|
ZIO_STAGE_FREE_BP_INIT | \
|
|
|
+ ZIO_STAGE_ISSUE_ASYNC | \
|
|
|
ZIO_STAGE_DVA_FREE)
|
|
|
|
|
|
#define ZIO_DDT_FREE_PIPELINE \
|
|
|
diff --git a/lib/libshare/smb.c b/lib/libshare/smb.c
|
|
|
index e34d142..a545bfb 100644
|
|
|
--- a/lib/libshare/smb.c
|
|
|
+++ b/lib/libshare/smb.c
|
|
|
@@ -423,7 +423,15 @@ static const sa_share_ops_t smb_shareops = {
|
|
|
static boolean_t
|
|
|
smb_available(void)
|
|
|
{
|
|
|
- /* TODO: Sanity check NET_CMD_PATH and SHARE_DIR */
|
|
|
+ struct stat statbuf;
|
|
|
+
|
|
|
+ if (lstat(SHARE_DIR, &statbuf) != 0 ||
|
|
|
+ !S_ISDIR(statbuf.st_mode))
|
|
|
+ return B_FALSE;
|
|
|
+
|
|
|
+ if (access(NET_CMD_PATH, F_OK) != 0)
|
|
|
+ return B_FALSE;
|
|
|
+
|
|
|
return B_TRUE;
|
|
|
}
|
|
|
|
|
|
diff --git a/lib/libspl/include/assert.h b/lib/libspl/include/assert.h
|
|
|
index 7f145b8..3704165 100644
|
|
|
--- a/lib/libspl/include/assert.h
|
|
|
+++ b/lib/libspl/include/assert.h
|
|
|
@@ -80,16 +80,19 @@ extern void __assert(const char *, const char *, int);
|
|
|
#define VERIFY3S(x, y, z) VERIFY3_IMPL(x, y, z, int64_t)
|
|
|
#define VERIFY3U(x, y, z) VERIFY3_IMPL(x, y, z, uint64_t)
|
|
|
#define VERIFY3P(x, y, z) VERIFY3_IMPL(x, y, z, uintptr_t)
|
|
|
+#define VERIFY0(x) VERIFY3_IMPL(x, ==, 0, uint64_t)
|
|
|
|
|
|
#ifdef NDEBUG
|
|
|
#define ASSERT3S(x, y, z) ((void)0)
|
|
|
#define ASSERT3U(x, y, z) ((void)0)
|
|
|
#define ASSERT3P(x, y, z) ((void)0)
|
|
|
+#define ASSERT0(x) ((void)0)
|
|
|
#define ASSERTV(x)
|
|
|
#else
|
|
|
#define ASSERT3S(x, y, z) VERIFY3S(x, y, z)
|
|
|
#define ASSERT3U(x, y, z) VERIFY3U(x, y, z)
|
|
|
#define ASSERT3P(x, y, z) VERIFY3P(x, y, z)
|
|
|
+#define ASSERT0(x) VERIFY0(x)
|
|
|
#define ASSERTV(x) x
|
|
|
#endif /* NDEBUG */
|
|
|
|
|
|
diff --git a/lib/libspl/include/sys/mount.h b/lib/libspl/include/sys/mount.h
|
|
|
index 145e785..7b1e06b 100644
|
|
|
--- a/lib/libspl/include/sys/mount.h
|
|
|
+++ b/lib/libspl/include/sys/mount.h
|
|
|
@@ -51,10 +51,10 @@
|
|
|
#define MS_DIRSYNC S_WRITE
|
|
|
#endif
|
|
|
|
|
|
-#define MS_USERS 0x40000000
|
|
|
-#define MS_OWNER 0x10000000
|
|
|
-#define MS_GROUP 0x08000000
|
|
|
-#define MS_COMMENT 0x02000000
|
|
|
+#define MS_USERS (MS_NOEXEC|MS_NOSUID|MS_NODEV)
|
|
|
+#define MS_OWNER (MS_NOSUID|MS_NODEV)
|
|
|
+#define MS_GROUP (MS_NOSUID|MS_NODEV)
|
|
|
+#define MS_COMMENT 0
|
|
|
|
|
|
/*
|
|
|
* Older glibc <sys/mount.h> headers did not define all the available
|
|
|
diff --git a/lib/libspl/include/umem.h b/lib/libspl/include/umem.h
|
|
|
index 87db1f4..f102f66 100644
|
|
|
--- a/lib/libspl/include/umem.h
|
|
|
+++ b/lib/libspl/include/umem.h
|
|
|
@@ -28,10 +28,10 @@
|
|
|
#define _LIBSPL_UMEM_H
|
|
|
|
|
|
/* XXX: We should use the real portable umem library if it is detected
|
|
|
- * at configure time. However, if the library is not available we can
|
|
|
+ * at configure time. However, if the library is not available, we can
|
|
|
* use a trivial malloc based implementation. This obviously impacts
|
|
|
- * performance but unless you using a full userspace build of zpool for
|
|
|
- * something other than ztest your likely not going to notice or care.
|
|
|
+ * performance, but unless you are using a full userspace build of zpool for
|
|
|
+ * something other than ztest, you are likely not going to notice or care.
|
|
|
*
|
|
|
* https://labs.omniti.com/trac/portableumem
|
|
|
*/
|
|
|
diff --git a/lib/libzfs/libzfs_import.c b/lib/libzfs/libzfs_import.c
|
|
|
index 3d80224..9e79bd9 100644
|
|
|
--- a/lib/libzfs/libzfs_import.c
|
|
|
+++ b/lib/libzfs/libzfs_import.c
|
|
|
@@ -523,13 +523,12 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
|
|
|
* version
|
|
|
* pool guid
|
|
|
* name
|
|
|
- * pool txg (if available)
|
|
|
* comment (if available)
|
|
|
* pool state
|
|
|
* hostid (if available)
|
|
|
* hostname (if available)
|
|
|
*/
|
|
|
- uint64_t state, version, pool_txg;
|
|
|
+ uint64_t state, version;
|
|
|
char *comment = NULL;
|
|
|
|
|
|
version = fnvlist_lookup_uint64(tmp,
|
|
|
@@ -545,11 +544,6 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
|
|
|
fnvlist_add_string(config,
|
|
|
ZPOOL_CONFIG_POOL_NAME, name);
|
|
|
|
|
|
- if (nvlist_lookup_uint64(tmp,
|
|
|
- ZPOOL_CONFIG_POOL_TXG, &pool_txg) == 0)
|
|
|
- fnvlist_add_uint64(config,
|
|
|
- ZPOOL_CONFIG_POOL_TXG, pool_txg);
|
|
|
-
|
|
|
if (nvlist_lookup_string(tmp,
|
|
|
ZPOOL_CONFIG_COMMENT, &comment) == 0)
|
|
|
fnvlist_add_string(config,
|
|
|
@@ -868,7 +862,7 @@ zpool_read_label(int fd, nvlist_t **config)
|
|
|
|
|
|
*config = NULL;
|
|
|
|
|
|
- if (fstat64(fd, &statbuf) == -1)
|
|
|
+ if (fstat64_blk(fd, &statbuf) == -1)
|
|
|
return (0);
|
|
|
size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t);
|
|
|
|
|
|
@@ -906,6 +900,36 @@ zpool_read_label(int fd, nvlist_t **config)
|
|
|
return (0);
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * Given a file descriptor, clear (zero) the label information. This function
|
|
|
+ * is used in the appliance stack as part of the ZFS sysevent module and
|
|
|
+ * to implement the "zpool labelclear" command.
|
|
|
+ */
|
|
|
+int
|
|
|
+zpool_clear_label(int fd)
|
|
|
+{
|
|
|
+ struct stat64 statbuf;
|
|
|
+ int l;
|
|
|
+ vdev_label_t *label;
|
|
|
+ uint64_t size;
|
|
|
+
|
|
|
+ if (fstat64_blk(fd, &statbuf) == -1)
|
|
|
+ return (0);
|
|
|
+ size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t);
|
|
|
+
|
|
|
+ if ((label = calloc(sizeof (vdev_label_t), 1)) == NULL)
|
|
|
+ return (-1);
|
|
|
+
|
|
|
+ for (l = 0; l < VDEV_LABELS; l++) {
|
|
|
+ if (pwrite64(fd, label, sizeof (vdev_label_t),
|
|
|
+ label_offset(size, l)) != sizeof (vdev_label_t))
|
|
|
+ return (-1);
|
|
|
+ }
|
|
|
+
|
|
|
+ free(label);
|
|
|
+ return (0);
|
|
|
+}
|
|
|
+
|
|
|
#ifdef HAVE_LIBBLKID
|
|
|
/*
|
|
|
* Use libblkid to quickly search for zfs devices
|
|
|
diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c
|
|
|
index eca1dc3..a6cacd3 100644
|
|
|
--- a/lib/libzfs/libzfs_pool.c
|
|
|
+++ b/lib/libzfs/libzfs_pool.c
|
|
|
@@ -205,6 +205,36 @@ zpool_state_to_name(vdev_state_t state, vdev_aux_t aux)
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
+ * Map POOL STATE to printed strings.
|
|
|
+ */
|
|
|
+const char *
|
|
|
+zpool_pool_state_to_name(pool_state_t state)
|
|
|
+{
|
|
|
+ switch (state) {
|
|
|
+ default:
|
|
|
+ break;
|
|
|
+ case POOL_STATE_ACTIVE:
|
|
|
+ return (gettext("ACTIVE"));
|
|
|
+ case POOL_STATE_EXPORTED:
|
|
|
+ return (gettext("EXPORTED"));
|
|
|
+ case POOL_STATE_DESTROYED:
|
|
|
+ return (gettext("DESTROYED"));
|
|
|
+ case POOL_STATE_SPARE:
|
|
|
+ return (gettext("SPARE"));
|
|
|
+ case POOL_STATE_L2CACHE:
|
|
|
+ return (gettext("L2CACHE"));
|
|
|
+ case POOL_STATE_UNINITIALIZED:
|
|
|
+ return (gettext("UNINITIALIZED"));
|
|
|
+ case POOL_STATE_UNAVAIL:
|
|
|
+ return (gettext("UNAVAIL"));
|
|
|
+ case POOL_STATE_POTENTIALLY_ACTIVE:
|
|
|
+ return (gettext("POTENTIALLY_ACTIVE"));
|
|
|
+ }
|
|
|
+
|
|
|
+ return (gettext("UNKNOWN"));
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
* Get a zpool property value for 'prop' and return the value in
|
|
|
* a pre-allocated buffer.
|
|
|
*/
|
|
|
diff --git a/lib/libzfs/libzfs_util.c b/lib/libzfs/libzfs_util.c
|
|
|
index a4e1255..5bb88e9 100644
|
|
|
--- a/lib/libzfs/libzfs_util.c
|
|
|
+++ b/lib/libzfs/libzfs_util.c
|
|
|
@@ -704,6 +704,8 @@ libzfs_init(void)
|
|
|
if ((hdl->libzfs_mnttab = fopen(MNTTAB, "r")) == NULL) {
|
|
|
#endif
|
|
|
(void) close(hdl->libzfs_fd);
|
|
|
+ (void) fprintf(stderr,
|
|
|
+ gettext("mtab is not present at %s.\n"), MNTTAB);
|
|
|
free(hdl);
|
|
|
return (NULL);
|
|
|
}
|
|
|
diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c
|
|
|
index c1ce82d..f7aeeb4 100644
|
|
|
--- a/lib/libzpool/kernel.c
|
|
|
+++ b/lib/libzpool/kernel.c
|
|
|
@@ -522,7 +522,7 @@ vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
|
|
|
{
|
|
|
int fd;
|
|
|
vnode_t *vp;
|
|
|
- int old_umask;
|
|
|
+ int old_umask = 0;
|
|
|
char *realpath;
|
|
|
struct stat64 st;
|
|
|
int err;
|
|
|
diff --git a/lib/libzpool/taskq.c b/lib/libzpool/taskq.c
|
|
|
index 803f7dc..96c0d5c 100644
|
|
|
--- a/lib/libzpool/taskq.c
|
|
|
+++ b/lib/libzpool/taskq.c
|
|
|
@@ -147,6 +147,13 @@ taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t tqflags)
|
|
|
return (1);
|
|
|
}
|
|
|
|
|
|
+taskqid_t
|
|
|
+taskq_dispatch_delay(taskq_t *tq, task_func_t func, void *arg, uint_t tqflags,
|
|
|
+ clock_t expire_time)
|
|
|
+{
|
|
|
+ return (0);
|
|
|
+}
|
|
|
+
|
|
|
int
|
|
|
taskq_empty_ent(taskq_ent_t *t)
|
|
|
{
|
|
|
@@ -204,6 +211,12 @@ taskq_wait(taskq_t *tq)
|
|
|
mutex_exit(&tq->tq_lock);
|
|
|
}
|
|
|
|
|
|
+void
|
|
|
+taskq_wait_id(taskq_t *tq, taskqid_t id)
|
|
|
+{
|
|
|
+ taskq_wait(tq);
|
|
|
+}
|
|
|
+
|
|
|
static void
|
|
|
taskq_thread(void *arg)
|
|
|
{
|
|
|
@@ -339,6 +352,12 @@ taskq_member(taskq_t *tq, kthread_t *t)
|
|
|
return (0);
|
|
|
}
|
|
|
|
|
|
+int
|
|
|
+taskq_cancel_id(taskq_t *tq, taskqid_t id)
|
|
|
+{
|
|
|
+ return (ENOENT);
|
|
|
+}
|
|
|
+
|
|
|
void
|
|
|
system_taskq_init(void)
|
|
|
{
|
|
|
diff --git a/man/man8/zdb.8 b/man/man8/zdb.8
|
|
|
index 3ce3265..364cf30 100644
|
|
|
--- a/man/man8/zdb.8
|
|
|
+++ b/man/man8/zdb.8
|
|
|
@@ -11,6 +11,7 @@
|
|
|
.\"
|
|
|
.\"
|
|
|
.\" Copyright 2012, Richard Lowe.
|
|
|
+.\" Copyright (c) 2012 by Delphix. All rights reserved.
|
|
|
.\"
|
|
|
.TH "ZDB" "8" "February 15, 2012" "" ""
|
|
|
|
|
|
@@ -19,21 +20,23 @@
|
|
|
|
|
|
.SH "SYNOPSIS"
|
|
|
\fBzdb\fR [-CumdibcsDvhLXFPA] [-e [-p \fIpath\fR...]] [-t \fItxg\fR]
|
|
|
- \fIpoolname\fR [\fIobject\fR ...]
|
|
|
+ [-U \fIcache\fR] [-M \fIinflight I/Os\fR] [\fIpoolname\fR
|
|
|
+ [\fIobject\fR ...]]
|
|
|
|
|
|
.P
|
|
|
-\fBzdb\fR [-divPA] [-e [-p \fIpath\fR...]] \fIdataset\fR [\fIobject\fR ...]
|
|
|
+\fBzdb\fR [-divPA] [-e [-p \fIpath\fR...]] [-U \fIcache\fR]
|
|
|
+ \fIdataset\fR [\fIobject\fR ...]
|
|
|
|
|
|
.P
|
|
|
-\fBzdb\fR -m [-LXFPA] [-t \fItxg\fR] [-e [-p \fIpath\fR...]] \fIpoolname\fR
|
|
|
- [\fIvdev\fR [\fImetaslab\fR ...]]
|
|
|
+\fBzdb\fR -m [-LXFPA] [-t \fItxg\fR] [-e [-p \fIpath\fR...]] [-U \fIcache\fR]
|
|
|
+ \fIpoolname\fR [\fIvdev\fR [\fImetaslab\fR ...]]
|
|
|
|
|
|
.P
|
|
|
-\fBzdb\fR -R [-A] [-e [-p \fIpath\fR...]] \fIpoolname\fR
|
|
|
+\fBzdb\fR -R [-A] [-e [-p \fIpath\fR...]] [-U \fIcache\fR] \fIpoolname\fR
|
|
|
\fIvdev\fR:\fIoffset\fR:\fIsize\fR[:\fIflags\fR]
|
|
|
|
|
|
.P
|
|
|
-\fBzdb\fR -S [-AP] [-e [-p \fIpath\fR...]] \fIpoolname\fR
|
|
|
+\fBzdb\fR -S [-AP] [-e [-p \fIpath\fR...]] [-U \fIcache\fR] \fIpoolname\fR
|
|
|
|
|
|
.P
|
|
|
\fBzdb\fR -l [-uA] \fIdevice\fR
|
|
|
@@ -357,6 +360,18 @@ transactions.
|
|
|
.sp
|
|
|
.ne 2
|
|
|
.na
|
|
|
+\fB-M \fIinflight I/Os\fR \fR
|
|
|
+.ad
|
|
|
+.sp .6
|
|
|
+.RS 4n
|
|
|
+Limit the number of outstanding checksum I/Os to the specified value. The
|
|
|
+default value is 200. This option affects the performance of the \fB-c\fR
|
|
|
+option.
|
|
|
+.RE
|
|
|
+
|
|
|
+.sp
|
|
|
+.ne 2
|
|
|
+.na
|
|
|
\fB-P\fR
|
|
|
.ad
|
|
|
.sp .6
|
|
|
@@ -384,8 +399,7 @@ and their associated transaction numbers.
|
|
|
.ad
|
|
|
.sp .6
|
|
|
.RS 4n
|
|
|
-Use a cache file other than \fB/etc/zfs/zpool.cache\fR. This option is only
|
|
|
-valid with \fB-C\fR
|
|
|
+Use a cache file other than \fB/etc/zfs/zpool.cache\fR.
|
|
|
.RE
|
|
|
|
|
|
.sp
|
|
|
diff --git a/man/man8/zfs.8 b/man/man8/zfs.8
|
|
|
index 3bf14e9..b91158e 100644
|
|
|
--- a/man/man8/zfs.8
|
|
|
+++ b/man/man8/zfs.8
|
|
|
@@ -1042,7 +1042,17 @@ Because \fBSMB\fR shares requires a resource name, a unique resource name is con
|
|
|
.sp
|
|
|
If the \fBsharesmb\fR property is set to \fBoff\fR, the file systems are unshared.
|
|
|
.sp
|
|
|
-In Linux, the share is created with the acl "Everyone:F" by default, meaning that everyone have read access. This however isn't the full truth: Any access control on the underlaying filesystem supersedes this.
|
|
|
+In Linux, the share is created with the ACL (Access Control List) "Everyone:F" ("F" stands for "full permissions", ie. read and write permissions) and no guest access (which means samba must be able to authenticate a real user, system passwd/shadow, ldap or smbpasswd based) by default. This means that any additional access control (dissalow specific user specific access etc) must be done on the underlaying filesystem.
|
|
|
+.sp
|
|
|
+.in +2
|
|
|
+Example to mount a SMB filesystem shared through ZFS (share/tmp):
|
|
|
+.mk
|
|
|
+Note that a user and his/her password \fBmust\fR be given!
|
|
|
+.sp
|
|
|
+.in +2
|
|
|
+smbmount //127.0.0.1/share_tmp /mnt/tmp -o user=workgroup/turbo,password=obrut,uid=1000
|
|
|
+.in -2
|
|
|
+.in -2
|
|
|
.sp
|
|
|
.ne 2
|
|
|
.mk
|
|
|
@@ -1052,7 +1062,9 @@ In Linux, the share is created with the acl "Everyone:F" by default, meaning tha
|
|
|
.in +2
|
|
|
* Samba will need to listen to 'localhost' (127.0.0.1) for the zfs utilities to communitate with samba. This is the default behavior for most Linux distributions.
|
|
|
.sp
|
|
|
-* See the \fBUSERSHARE\fR section of the \fBsmb.conf\fR(5) man page for all configuration options.
|
|
|
+* Samba must be able to authenticate a user. This can be done in a number of ways, depending on if using the system password file, LDAP or the Samba specific smbpasswd file. How to do this is outside the scope of this manual. Please refer to the smb.conf(5) manpage for more information.
|
|
|
+.sp
|
|
|
+* See the \fBUSERSHARE\fR section of the \fBsmb.conf\fR(5) man page for all configuration options in case you need to modify any options to the share afterwards. Do note that any changes done with the 'net' command will be undone if the share is every unshared (such as at a reboot etc). In the future, ZoL will be able to set specific options directly using sharesmb=<option>.
|
|
|
.sp
|
|
|
.in -2
|
|
|
.RE
|
|
|
diff --git a/man/man8/zpool.8 b/man/man8/zpool.8
|
|
|
index c16cd68..b4b0f46 100644
|
|
|
--- a/man/man8/zpool.8
|
|
|
+++ b/man/man8/zpool.8
|
|
|
@@ -94,7 +94,12 @@ zpool \- configures ZFS storage pools
|
|
|
|
|
|
.LP
|
|
|
.nf
|
|
|
-\fBzpool list\fR [\fB-Hv\fR] [\fB-o\fR \fIproperty\fR[,...]] [\fIpool\fR] ...
|
|
|
+\fBzpool labelclear\fR [\fB-f\fR] \fIdevice\fR
|
|
|
+.fi
|
|
|
+
|
|
|
+.LP
|
|
|
+.nf
|
|
|
+\fBzpool list\fR [\fB-T\fR u | d ] [\fB-Hv\fR] [\fB-o\fR \fIproperty\fR[,...]] [\fIpool\fR] ... [\fIinterval\fR[\fIcount\fR]]
|
|
|
.fi
|
|
|
|
|
|
.LP
|
|
|
@@ -114,6 +119,11 @@ zpool \- configures ZFS storage pools
|
|
|
|
|
|
.LP
|
|
|
.nf
|
|
|
+\fBzpool reopen\fR \fIpool\fR
|
|
|
+.fi
|
|
|
+
|
|
|
+.LP
|
|
|
+.nf
|
|
|
\fBzpool remove\fR \fIpool\fR \fIdevice\fR ...
|
|
|
.fi
|
|
|
|
|
|
@@ -134,6 +144,11 @@ zpool \- configures ZFS storage pools
|
|
|
|
|
|
.LP
|
|
|
.nf
|
|
|
+\fBzpool split\fR [\fB-n\fR] [\fB-R\fR \fIaltroot\fR] [\fB-o\fR \fIproperty=value\fR] \fIpool\fR \fInewpool\fR
|
|
|
+.fi
|
|
|
+
|
|
|
+.LP
|
|
|
+.nf
|
|
|
\fBzpool status\fR [\fB-xv\fR] [\fIpool\fR] ...
|
|
|
.fi
|
|
|
|
|
|
@@ -479,6 +494,7 @@ Percentage of pool space used. This property can also be referred to by its shor
|
|
|
\fB\fBexpandsize\fR\fR
|
|
|
.ad
|
|
|
.RS 20n
|
|
|
+.rt
|
|
|
Amount of uninitialized space within the pool or device that can be used to
|
|
|
increase the total capacity of the pool. Uninitialized space consists of
|
|
|
any space on an EFI labeled vdev which has not been brought online
|
|
|
@@ -487,19 +503,23 @@ any space on an EFI labeled vdev which has not been brought online
|
|
|
|
|
|
.sp
|
|
|
.ne 2
|
|
|
+.mk
|
|
|
.na
|
|
|
\fB\fBfree\fR\fR
|
|
|
.ad
|
|
|
.RS 20n
|
|
|
+.rt
|
|
|
The amount of free space available in the pool.
|
|
|
.RE
|
|
|
|
|
|
.sp
|
|
|
.ne 2
|
|
|
+.mk
|
|
|
.na
|
|
|
\fB\fBfreeing\fR\fR
|
|
|
.ad
|
|
|
.RS 20n
|
|
|
+.rt
|
|
|
After a file system or snapshot is destroyed, the space it was using is
|
|
|
returned to the pool asynchronously. \fB\fBfreeing\fR\fR is the amount of
|
|
|
space remaining to be reclaimed. Over time \fB\fBfreeing\fR\fR will decrease
|
|
|
@@ -508,6 +528,7 @@ while \fB\fBfree\fR\fR increases.
|
|
|
|
|
|
.sp
|
|
|
.ne 2
|
|
|
+.mk
|
|
|
.na
|
|
|
\fB\fBhealth\fR\fR
|
|
|
.ad
|
|
|
@@ -540,10 +561,12 @@ Total size of the storage pool.
|
|
|
|
|
|
.sp
|
|
|
.ne 2
|
|
|
+.mk
|
|
|
.na
|
|
|
\fB\fBunsupported@\fR\fIfeature_guid\fR\fR
|
|
|
.ad
|
|
|
.RS 20n
|
|
|
+.rt
|
|
|
Information about unsupported features that are enabled on the pool. See
|
|
|
\fBzpool-features\fR(5) for details.
|
|
|
.RE
|
|
|
@@ -1412,11 +1435,33 @@ Verbose statistics. Reports usage statistics for individual \fIvdevs\fR within t
|
|
|
.ne 2
|
|
|
.mk
|
|
|
.na
|
|
|
-\fB\fBzpool list\fR [\fB-Hv\fR] [\fB-o\fR \fIprops\fR[,...]] [\fIpool\fR] ...\fR
|
|
|
+\fB\fBzpool labelclear\fR [\fB-f\fR] \fIdevice\fR
|
|
|
+.ad
|
|
|
+.sp .6
|
|
|
+.RS 4n
|
|
|
+Removes ZFS label information from the specified device. The device must not be part of an active pool configuration.
|
|
|
+.sp
|
|
|
+.ne 2
|
|
|
+.mk
|
|
|
+.na
|
|
|
+\fB\fB-f\fR\fR
|
|
|
+.ad
|
|
|
+.RS 12n
|
|
|
+.rt
|
|
|
+Treat exported or foreign devices as inactive.
|
|
|
+.RE
|
|
|
+
|
|
|
+.RE
|
|
|
+
|
|
|
+.sp
|
|
|
+.ne 2
|
|
|
+.mk
|
|
|
+.na
|
|
|
+\fB\fBzpool list\fR [\fB-T\fR \fBu\fR | \fBd\fR] [\fB-Hv\fR] [\fB-o\fR \fIprops\fR[,...]] [\fIpool\fR] ... [\fIinterval\fR[\fIcount\fR]]\fR
|
|
|
.ad
|
|
|
.sp .6
|
|
|
.RS 4n
|
|
|
-Lists the given pools along with a health status and space usage. When given no arguments, all pools in the system are listed.
|
|
|
+Lists the given pools along with a health status and space usage. If no \fIpools\fR are specified, all pools in the system are listed. When given an \fIinterval\fR, the information is printed every \fIinterval\fR seconds until \fBCtrl-C\fR is pressed. If \fIcount\fR is specified, the command exits after \fIcount\fR reports are printed.
|
|
|
.sp
|
|
|
.ne 2
|
|
|
.mk
|
|
|
@@ -1428,6 +1473,18 @@ Lists the given pools along with a health status and space usage. When given no
|
|
|
Scripted mode. Do not display headers, and separate fields by a single tab instead of arbitrary space.
|
|
|
.RE
|
|
|
|
|
|
+.ne 2
|
|
|
+.mk
|
|
|
+.na
|
|
|
+\fB\fB-T\fR \fBu\fR | \fBd\fR\fR
|
|
|
+.ad
|
|
|
+.RS 12n
|
|
|
+.rt
|
|
|
+Display a time stamp.
|
|
|
+.sp
|
|
|
+Specify \fBu\fR for a printed representation of the internal representation of time. See \fBtime\fR(2). Specify \fBd\fR for standard date format. See \fBdate\fR(1).
|
|
|
+.RE
|
|
|
+
|
|
|
.sp
|
|
|
.ne 2
|
|
|
.mk
|
|
|
@@ -1508,8 +1565,18 @@ Expand the device to use all available space. If the device is part of a mirror
|
|
|
.ad
|
|
|
.sp .6
|
|
|
.RS 4n
|
|
|
-Generates a new unique identifier for the pool. You must ensure that all devices in this pool are online and
|
|
|
-healthy before performing this action.
|
|
|
+Generates a new unique identifier for the pool. You must ensure that all
|
|
|
+devices in this pool are online and healthy before performing this action.
|
|
|
+.RE
|
|
|
+
|
|
|
+.sp
|
|
|
+.ne 2
|
|
|
+.na
|
|
|
+\fB\fBzpool reopen\fR \fIpool\fR
|
|
|
+.ad
|
|
|
+.sp .6
|
|
|
+.RS 4n
|
|
|
+Reopen all the vdevs associated with the pool.
|
|
|
.RE
|
|
|
|
|
|
.sp
|
|
|
@@ -1589,6 +1656,51 @@ Sets the given property on the specified pool. See the "Properties" section for
|
|
|
.ne 2
|
|
|
.mk
|
|
|
.na
|
|
|
+\fBzpool split\fR [\fB-n\fR] [\fB-R\fR \fIaltroot\fR] [\fB-o\fR \fIproperty=value\fR] \fIpool\fR \fInewpool\fR
|
|
|
+.ad
|
|
|
+.sp .6
|
|
|
+.RS 4n
|
|
|
+Split devices off \fIpool\fR creating \fInewpool\fR. All \fBvdev\fRs in \fIpool\fR must be mirrors. At the time of the split, \fInewpool\fR will be a replica of \fIpool\fR.
|
|
|
+
|
|
|
+.sp
|
|
|
+.ne 2
|
|
|
+.mk
|
|
|
+.na
|
|
|
+\fB\fB-n\fR \fR
|
|
|
+.ad
|
|
|
+.sp .6
|
|
|
+.RS 4n
|
|
|
+Do dry run, do not actually perform the split. Print out the expected configuration of \fInewpool\fR.
|
|
|
+.RE
|
|
|
+
|
|
|
+.sp
|
|
|
+.ne 2
|
|
|
+.mk
|
|
|
+.na
|
|
|
+\fB\fB-R\fR \fIaltroot\fR \fR
|
|
|
+.ad
|
|
|
+.sp .6
|
|
|
+.RS 4n
|
|
|
+Set \fIaltroot\fR for \fInewpool\fR and automaticaly import it. This can be useful to avoid mountpoint collisions if \fInewpool\fR is imported on the same filesystem as \fIpool\fR.
|
|
|
+.RE
|
|
|
+
|
|
|
+.sp
|
|
|
+.ne 2
|
|
|
+.mk
|
|
|
+.na
|
|
|
+\fB\fB-o\fR \fIproperty=value\fR \fR
|
|
|
+.ad
|
|
|
+.sp .6
|
|
|
+.RS 4n
|
|
|
+Sets the specified property for \fInewpool\fR. See the “Properties” section for more information on the available pool properties.
|
|
|
+.RE
|
|
|
+
|
|
|
+.RE
|
|
|
+
|
|
|
+.sp
|
|
|
+.ne 2
|
|
|
+.mk
|
|
|
+.na
|
|
|
\fB\fBzpool status\fR [\fB-xv\fR] [\fIpool\fR] ...\fR
|
|
|
.ad
|
|
|
.sp .6
|
|
|
diff --git a/module/nvpair/fnvpair.c b/module/nvpair/fnvpair.c
|
|
|
index fa28afc..1758371 100644
|
|
|
--- a/module/nvpair/fnvpair.c
|
|
|
+++ b/module/nvpair/fnvpair.c
|
|
|
@@ -50,7 +50,7 @@ nvlist_t *
|
|
|
fnvlist_alloc(void)
|
|
|
{
|
|
|
nvlist_t *nvl;
|
|
|
- VERIFY3U(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP), ==, 0);
|
|
|
+ VERIFY0(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP));
|
|
|
return (nvl);
|
|
|
}
|
|
|
|
|
|
@@ -64,7 +64,7 @@ size_t
|
|
|
fnvlist_size(nvlist_t *nvl)
|
|
|
{
|
|
|
size_t size;
|
|
|
- VERIFY3U(nvlist_size(nvl, &size, NV_ENCODE_NATIVE), ==, 0);
|
|
|
+ VERIFY0(nvlist_size(nvl, &size, NV_ENCODE_NATIVE));
|
|
|
return (size);
|
|
|
}
|
|
|
|
|
|
@@ -96,7 +96,7 @@ nvlist_t *
|
|
|
fnvlist_unpack(char *buf, size_t buflen)
|
|
|
{
|
|
|
nvlist_t *rv;
|
|
|
- VERIFY3U(nvlist_unpack(buf, buflen, &rv, KM_SLEEP), ==, 0);
|
|
|
+ VERIFY0(nvlist_unpack(buf, buflen, &rv, KM_SLEEP));
|
|
|
return (rv);
|
|
|
}
|
|
|
|
|
|
@@ -104,195 +104,195 @@ nvlist_t *
|
|
|
fnvlist_dup(nvlist_t *nvl)
|
|
|
{
|
|
|
nvlist_t *rv;
|
|
|
- VERIFY3U(nvlist_dup(nvl, &rv, KM_SLEEP), ==, 0);
|
|
|
+ VERIFY0(nvlist_dup(nvl, &rv, KM_SLEEP));
|
|
|
return (rv);
|
|
|
}
|
|
|
|
|
|
void
|
|
|
fnvlist_merge(nvlist_t *dst, nvlist_t *src)
|
|
|
{
|
|
|
- VERIFY3U(nvlist_merge(dst, src, KM_SLEEP), ==, 0);
|
|
|
+ VERIFY0(nvlist_merge(dst, src, KM_SLEEP));
|
|
|
}
|
|
|
|
|
|
void
|
|
|
fnvlist_add_boolean(nvlist_t *nvl, const char *name)
|
|
|
{
|
|
|
- VERIFY3U(nvlist_add_boolean(nvl, name), ==, 0);
|
|
|
+ VERIFY0(nvlist_add_boolean(nvl, name));
|
|
|
}
|
|
|
|
|
|
void
|
|
|
fnvlist_add_boolean_value(nvlist_t *nvl, const char *name, boolean_t val)
|
|
|
{
|
|
|
- VERIFY3U(nvlist_add_boolean_value(nvl, name, val), ==, 0);
|
|
|
+ VERIFY0(nvlist_add_boolean_value(nvl, name, val));
|
|
|
}
|
|
|
|
|
|
void
|
|
|
fnvlist_add_byte(nvlist_t *nvl, const char *name, uchar_t val)
|
|
|
{
|
|
|
- VERIFY3U(nvlist_add_byte(nvl, name, val), ==, 0);
|
|
|
+ VERIFY0(nvlist_add_byte(nvl, name, val));
|
|
|
}
|
|
|
|
|
|
void
|
|
|
fnvlist_add_int8(nvlist_t *nvl, const char *name, int8_t val)
|
|
|
{
|
|
|
- VERIFY3U(nvlist_add_int8(nvl, name, val), ==, 0);
|
|
|
+ VERIFY0(nvlist_add_int8(nvl, name, val));
|
|
|
}
|
|
|
|
|
|
void
|
|
|
fnvlist_add_uint8(nvlist_t *nvl, const char *name, uint8_t val)
|
|
|
{
|
|
|
- VERIFY3U(nvlist_add_uint8(nvl, name, val), ==, 0);
|
|
|
+ VERIFY0(nvlist_add_uint8(nvl, name, val));
|
|
|
}
|
|
|
|
|
|
void
|
|
|
fnvlist_add_int16(nvlist_t *nvl, const char *name, int16_t val)
|
|
|
{
|
|
|
- VERIFY3U(nvlist_add_int16(nvl, name, val), ==, 0);
|
|
|
+ VERIFY0(nvlist_add_int16(nvl, name, val));
|
|
|
}
|
|
|
|
|
|
void
|
|
|
fnvlist_add_uint16(nvlist_t *nvl, const char *name, uint16_t val)
|
|
|
{
|
|
|
- VERIFY3U(nvlist_add_uint16(nvl, name, val), ==, 0);
|
|
|
+ VERIFY0(nvlist_add_uint16(nvl, name, val));
|
|
|
}
|
|
|
|
|
|
void
|
|
|
fnvlist_add_int32(nvlist_t *nvl, const char *name, int32_t val)
|
|
|
{
|
|
|
- VERIFY3U(nvlist_add_int32(nvl, name, val), ==, 0);
|
|
|
+ VERIFY0(nvlist_add_int32(nvl, name, val));
|
|
|
}
|
|
|
|
|
|
void
|
|
|
fnvlist_add_uint32(nvlist_t *nvl, const char *name, uint32_t val)
|
|
|
{
|
|
|
- VERIFY3U(nvlist_add_uint32(nvl, name, val), ==, 0);
|
|
|
+ VERIFY0(nvlist_add_uint32(nvl, name, val));
|
|
|
}
|
|
|
|
|
|
void
|
|
|
fnvlist_add_int64(nvlist_t *nvl, const char *name, int64_t val)
|
|
|
{
|
|
|
- VERIFY3U(nvlist_add_int64(nvl, name, val), ==, 0);
|
|
|
+ VERIFY0(nvlist_add_int64(nvl, name, val));
|
|
|
}
|
|
|
|
|
|
void
|
|
|
fnvlist_add_uint64(nvlist_t *nvl, const char *name, uint64_t val)
|
|
|
{
|
|
|
- VERIFY3U(nvlist_add_uint64(nvl, name, val), ==, 0);
|
|
|
+ VERIFY0(nvlist_add_uint64(nvl, name, val));
|
|
|
}
|
|
|
|
|
|
void
|
|
|
fnvlist_add_string(nvlist_t *nvl, const char *name, const char *val)
|
|
|
{
|
|
|
- VERIFY3U(nvlist_add_string(nvl, name, val), ==, 0);
|
|
|
+ VERIFY0(nvlist_add_string(nvl, name, val));
|
|
|
}
|
|
|
|
|
|
void
|
|
|
fnvlist_add_nvlist(nvlist_t *nvl, const char *name, nvlist_t *val)
|
|
|
{
|
|
|
- VERIFY3U(nvlist_add_nvlist(nvl, name, val), ==, 0);
|
|
|
+ VERIFY0(nvlist_add_nvlist(nvl, name, val));
|
|
|
}
|
|
|
|
|
|
void
|
|
|
fnvlist_add_nvpair(nvlist_t *nvl, nvpair_t *pair)
|
|
|
{
|
|
|
- VERIFY3U(nvlist_add_nvpair(nvl, pair), ==, 0);
|
|
|
+ VERIFY0(nvlist_add_nvpair(nvl, pair));
|
|
|
}
|
|
|
|
|
|
void
|
|
|
fnvlist_add_boolean_array(nvlist_t *nvl, const char *name,
|
|
|
boolean_t *val, uint_t n)
|
|
|
{
|
|
|
- VERIFY3U(nvlist_add_boolean_array(nvl, name, val, n), ==, 0);
|
|
|
+ VERIFY0(nvlist_add_boolean_array(nvl, name, val, n));
|
|
|
}
|
|
|
|
|
|
void
|
|
|
fnvlist_add_byte_array(nvlist_t *nvl, const char *name, uchar_t *val, uint_t n)
|
|
|
{
|
|
|
- VERIFY3U(nvlist_add_byte_array(nvl, name, val, n), ==, 0);
|
|
|
+ VERIFY0(nvlist_add_byte_array(nvl, name, val, n));
|
|
|
}
|
|
|
|
|
|
void
|
|
|
fnvlist_add_int8_array(nvlist_t *nvl, const char *name, int8_t *val, uint_t n)
|
|
|
{
|
|
|
- VERIFY3U(nvlist_add_int8_array(nvl, name, val, n), ==, 0);
|
|
|
+ VERIFY0(nvlist_add_int8_array(nvl, name, val, n));
|
|
|
}
|
|
|
|
|
|
void
|
|
|
fnvlist_add_uint8_array(nvlist_t *nvl, const char *name, uint8_t *val, uint_t n)
|
|
|
{
|
|
|
- VERIFY3U(nvlist_add_uint8_array(nvl, name, val, n), ==, 0);
|
|
|
+ VERIFY0(nvlist_add_uint8_array(nvl, name, val, n));
|
|
|
}
|
|
|
|
|
|
void
|
|
|
fnvlist_add_int16_array(nvlist_t *nvl, const char *name, int16_t *val, uint_t n)
|
|
|
{
|
|
|
- VERIFY3U(nvlist_add_int16_array(nvl, name, val, n), ==, 0);
|
|
|
+ VERIFY0(nvlist_add_int16_array(nvl, name, val, n));
|
|
|
}
|
|
|
|
|
|
void
|
|
|
fnvlist_add_uint16_array(nvlist_t *nvl, const char *name,
|
|
|
uint16_t *val, uint_t n)
|
|
|
{
|
|
|
- VERIFY3U(nvlist_add_uint16_array(nvl, name, val, n), ==, 0);
|
|
|
+ VERIFY0(nvlist_add_uint16_array(nvl, name, val, n));
|
|
|
}
|
|
|
|
|
|
void
|
|
|
fnvlist_add_int32_array(nvlist_t *nvl, const char *name, int32_t *val, uint_t n)
|
|
|
{
|
|
|
- VERIFY3U(nvlist_add_int32_array(nvl, name, val, n), ==, 0);
|
|
|
+ VERIFY0(nvlist_add_int32_array(nvl, name, val, n));
|
|
|
}
|
|
|
|
|
|
void
|
|
|
fnvlist_add_uint32_array(nvlist_t *nvl, const char *name,
|
|
|
uint32_t *val, uint_t n)
|
|
|
{
|
|
|
- VERIFY3U(nvlist_add_uint32_array(nvl, name, val, n), ==, 0);
|
|
|
+ VERIFY0(nvlist_add_uint32_array(nvl, name, val, n));
|
|
|
}
|
|
|
|
|
|
void
|
|
|
fnvlist_add_int64_array(nvlist_t *nvl, const char *name, int64_t *val, uint_t n)
|
|
|
{
|
|
|
- VERIFY3U(nvlist_add_int64_array(nvl, name, val, n), ==, 0);
|
|
|
+ VERIFY0(nvlist_add_int64_array(nvl, name, val, n));
|
|
|
}
|
|
|
|
|
|
void
|
|
|
fnvlist_add_uint64_array(nvlist_t *nvl, const char *name,
|
|
|
uint64_t *val, uint_t n)
|
|
|
{
|
|
|
- VERIFY3U(nvlist_add_uint64_array(nvl, name, val, n), ==, 0);
|
|
|
+ VERIFY0(nvlist_add_uint64_array(nvl, name, val, n));
|
|
|
}
|
|
|
|
|
|
void
|
|
|
fnvlist_add_string_array(nvlist_t *nvl, const char *name,
|
|
|
char * const *val, uint_t n)
|
|
|
{
|
|
|
- VERIFY3U(nvlist_add_string_array(nvl, name, val, n), ==, 0);
|
|
|
+ VERIFY0(nvlist_add_string_array(nvl, name, val, n));
|
|
|
}
|
|
|
|
|
|
void
|
|
|
fnvlist_add_nvlist_array(nvlist_t *nvl, const char *name,
|
|
|
nvlist_t **val, uint_t n)
|
|
|
{
|
|
|
- VERIFY3U(nvlist_add_nvlist_array(nvl, name, val, n), ==, 0);
|
|
|
+ VERIFY0(nvlist_add_nvlist_array(nvl, name, val, n));
|
|
|
}
|
|
|
|
|
|
void
|
|
|
fnvlist_remove(nvlist_t *nvl, const char *name)
|
|
|
{
|
|
|
- VERIFY3U(nvlist_remove_all(nvl, name), ==, 0);
|
|
|
+ VERIFY0(nvlist_remove_all(nvl, name));
|
|
|
}
|
|
|
|
|
|
void
|
|
|
fnvlist_remove_nvpair(nvlist_t *nvl, nvpair_t *pair)
|
|
|
{
|
|
|
- VERIFY3U(nvlist_remove_nvpair(nvl, pair), ==, 0);
|
|
|
+ VERIFY0(nvlist_remove_nvpair(nvl, pair));
|
|
|
}
|
|
|
|
|
|
nvpair_t *
|
|
|
fnvlist_lookup_nvpair(nvlist_t *nvl, const char *name)
|
|
|
{
|
|
|
nvpair_t *rv;
|
|
|
- VERIFY3U(nvlist_lookup_nvpair(nvl, name, &rv), ==, 0);
|
|
|
+ VERIFY0(nvlist_lookup_nvpair(nvl, name, &rv));
|
|
|
return (rv);
|
|
|
}
|
|
|
|
|
|
@@ -307,7 +307,7 @@ boolean_t
|
|
|
fnvlist_lookup_boolean_value(nvlist_t *nvl, const char *name)
|
|
|
{
|
|
|
boolean_t rv;
|
|
|
- VERIFY3U(nvlist_lookup_boolean_value(nvl, name, &rv), ==, 0);
|
|
|
+ VERIFY0(nvlist_lookup_boolean_value(nvl, name, &rv));
|
|
|
return (rv);
|
|
|
}
|
|
|
|
|
|
@@ -315,7 +315,7 @@ uchar_t
|
|
|
fnvlist_lookup_byte(nvlist_t *nvl, const char *name)
|
|
|
{
|
|
|
uchar_t rv;
|
|
|
- VERIFY3U(nvlist_lookup_byte(nvl, name, &rv), ==, 0);
|
|
|
+ VERIFY0(nvlist_lookup_byte(nvl, name, &rv));
|
|
|
return (rv);
|
|
|
}
|
|
|
|
|
|
@@ -323,7 +323,7 @@ int8_t
|
|
|
fnvlist_lookup_int8(nvlist_t *nvl, const char *name)
|
|
|
{
|
|
|
int8_t rv;
|
|
|
- VERIFY3U(nvlist_lookup_int8(nvl, name, &rv), ==, 0);
|
|
|
+ VERIFY0(nvlist_lookup_int8(nvl, name, &rv));
|
|
|
return (rv);
|
|
|
}
|
|
|
|
|
|
@@ -331,7 +331,7 @@ int16_t
|
|
|
fnvlist_lookup_int16(nvlist_t *nvl, const char *name)
|
|
|
{
|
|
|
int16_t rv;
|
|
|
- VERIFY3U(nvlist_lookup_int16(nvl, name, &rv), ==, 0);
|
|
|
+ VERIFY0(nvlist_lookup_int16(nvl, name, &rv));
|
|
|
return (rv);
|
|
|
}
|
|
|
|
|
|
@@ -339,7 +339,7 @@ int32_t
|
|
|
fnvlist_lookup_int32(nvlist_t *nvl, const char *name)
|
|
|
{
|
|
|
int32_t rv;
|
|
|
- VERIFY3U(nvlist_lookup_int32(nvl, name, &rv), ==, 0);
|
|
|
+ VERIFY0(nvlist_lookup_int32(nvl, name, &rv));
|
|
|
return (rv);
|
|
|
}
|
|
|
|
|
|
@@ -347,7 +347,7 @@ int64_t
|
|
|
fnvlist_lookup_int64(nvlist_t *nvl, const char *name)
|
|
|
{
|
|
|
int64_t rv;
|
|
|
- VERIFY3U(nvlist_lookup_int64(nvl, name, &rv), ==, 0);
|
|
|
+ VERIFY0(nvlist_lookup_int64(nvl, name, &rv));
|
|
|
return (rv);
|
|
|
}
|
|
|
|
|
|
@@ -355,7 +355,7 @@ uint8_t
|
|
|
fnvlist_lookup_uint8(nvlist_t *nvl, const char *name)
|
|
|
{
|
|
|
uint8_t rv;
|
|
|
- VERIFY3U(nvlist_lookup_uint8(nvl, name, &rv), ==, 0);
|
|
|
+ VERIFY0(nvlist_lookup_uint8(nvl, name, &rv));
|
|
|
return (rv);
|
|
|
}
|
|
|
|
|
|
@@ -363,7 +363,7 @@ uint16_t
|
|
|
fnvlist_lookup_uint16(nvlist_t *nvl, const char *name)
|
|
|
{
|
|
|
uint16_t rv;
|
|
|
- VERIFY3U(nvlist_lookup_uint16(nvl, name, &rv), ==, 0);
|
|
|
+ VERIFY0(nvlist_lookup_uint16(nvl, name, &rv));
|
|
|
return (rv);
|
|
|
}
|
|
|
|
|
|
@@ -371,7 +371,7 @@ uint32_t
|
|
|
fnvlist_lookup_uint32(nvlist_t *nvl, const char *name)
|
|
|
{
|
|
|
uint32_t rv;
|
|
|
- VERIFY3U(nvlist_lookup_uint32(nvl, name, &rv), ==, 0);
|
|
|
+ VERIFY0(nvlist_lookup_uint32(nvl, name, &rv));
|
|
|
return (rv);
|
|
|
}
|
|
|
|
|
|
@@ -379,7 +379,7 @@ uint64_t
|
|
|
fnvlist_lookup_uint64(nvlist_t *nvl, const char *name)
|
|
|
{
|
|
|
uint64_t rv;
|
|
|
- VERIFY3U(nvlist_lookup_uint64(nvl, name, &rv), ==, 0);
|
|
|
+ VERIFY0(nvlist_lookup_uint64(nvl, name, &rv));
|
|
|
return (rv);
|
|
|
}
|
|
|
|
|
|
@@ -387,7 +387,7 @@ char *
|
|
|
fnvlist_lookup_string(nvlist_t *nvl, const char *name)
|
|
|
{
|
|
|
char *rv;
|
|
|
- VERIFY3U(nvlist_lookup_string(nvl, name, &rv), ==, 0);
|
|
|
+ VERIFY0(nvlist_lookup_string(nvl, name, &rv));
|
|
|
return (rv);
|
|
|
}
|
|
|
|
|
|
@@ -395,7 +395,7 @@ nvlist_t *
|
|
|
fnvlist_lookup_nvlist(nvlist_t *nvl, const char *name)
|
|
|
{
|
|
|
nvlist_t *rv;
|
|
|
- VERIFY3U(nvlist_lookup_nvlist(nvl, name, &rv), ==, 0);
|
|
|
+ VERIFY0(nvlist_lookup_nvlist(nvl, name, &rv));
|
|
|
return (rv);
|
|
|
}
|
|
|
|
|
|
@@ -403,7 +403,7 @@ boolean_t
|
|
|
fnvpair_value_boolean_value(nvpair_t *nvp)
|
|
|
{
|
|
|
boolean_t rv;
|
|
|
- VERIFY3U(nvpair_value_boolean_value(nvp, &rv), ==, 0);
|
|
|
+ VERIFY0(nvpair_value_boolean_value(nvp, &rv));
|
|
|
return (rv);
|
|
|
}
|
|
|
|
|
|
@@ -411,7 +411,7 @@ uchar_t
|
|
|
fnvpair_value_byte(nvpair_t *nvp)
|
|
|
{
|
|
|
uchar_t rv;
|
|
|
- VERIFY3U(nvpair_value_byte(nvp, &rv), ==, 0);
|
|
|
+ VERIFY0(nvpair_value_byte(nvp, &rv));
|
|
|
return (rv);
|
|
|
}
|
|
|
|
|
|
@@ -419,7 +419,7 @@ int8_t
|
|
|
fnvpair_value_int8(nvpair_t *nvp)
|
|
|
{
|
|
|
int8_t rv;
|
|
|
- VERIFY3U(nvpair_value_int8(nvp, &rv), ==, 0);
|
|
|
+ VERIFY0(nvpair_value_int8(nvp, &rv));
|
|
|
return (rv);
|
|
|
}
|
|
|
|
|
|
@@ -427,7 +427,7 @@ int16_t
|
|
|
fnvpair_value_int16(nvpair_t *nvp)
|
|
|
{
|
|
|
int16_t rv;
|
|
|
- VERIFY3U(nvpair_value_int16(nvp, &rv), ==, 0);
|
|
|
+ VERIFY0(nvpair_value_int16(nvp, &rv));
|
|
|
return (rv);
|
|
|
}
|
|
|
|
|
|
@@ -435,7 +435,7 @@ int32_t
|
|
|
fnvpair_value_int32(nvpair_t *nvp)
|
|
|
{
|
|
|
int32_t rv;
|
|
|
- VERIFY3U(nvpair_value_int32(nvp, &rv), ==, 0);
|
|
|
+ VERIFY0(nvpair_value_int32(nvp, &rv));
|
|
|
return (rv);
|
|
|
}
|
|
|
|
|
|
@@ -443,7 +443,7 @@ int64_t
|
|
|
fnvpair_value_int64(nvpair_t *nvp)
|
|
|
{
|
|
|
int64_t rv;
|
|
|
- VERIFY3U(nvpair_value_int64(nvp, &rv), ==, 0);
|
|
|
+ VERIFY0(nvpair_value_int64(nvp, &rv));
|
|
|
return (rv);
|
|
|
}
|
|
|
|
|
|
@@ -451,7 +451,7 @@ uint8_t
|
|
|
fnvpair_value_uint8(nvpair_t *nvp)
|
|
|
{
|
|
|
uint8_t rv;
|
|
|
- VERIFY3U(nvpair_value_uint8(nvp, &rv), ==, 0);
|
|
|
+ VERIFY0(nvpair_value_uint8(nvp, &rv));
|
|
|
return (rv);
|
|
|
}
|
|
|
|
|
|
@@ -459,7 +459,7 @@ uint16_t
|
|
|
fnvpair_value_uint16(nvpair_t *nvp)
|
|
|
{
|
|
|
uint16_t rv;
|
|
|
- VERIFY3U(nvpair_value_uint16(nvp, &rv), ==, 0);
|
|
|
+ VERIFY0(nvpair_value_uint16(nvp, &rv));
|
|
|
return (rv);
|
|
|
}
|
|
|
|
|
|
@@ -467,7 +467,7 @@ uint32_t
|
|
|
fnvpair_value_uint32(nvpair_t *nvp)
|
|
|
{
|
|
|
uint32_t rv;
|
|
|
- VERIFY3U(nvpair_value_uint32(nvp, &rv), ==, 0);
|
|
|
+ VERIFY0(nvpair_value_uint32(nvp, &rv));
|
|
|
return (rv);
|
|
|
}
|
|
|
|
|
|
@@ -475,7 +475,7 @@ uint64_t
|
|
|
fnvpair_value_uint64(nvpair_t *nvp)
|
|
|
{
|
|
|
uint64_t rv;
|
|
|
- VERIFY3U(nvpair_value_uint64(nvp, &rv), ==, 0);
|
|
|
+ VERIFY0(nvpair_value_uint64(nvp, &rv));
|
|
|
return (rv);
|
|
|
}
|
|
|
|
|
|
@@ -483,7 +483,7 @@ char *
|
|
|
fnvpair_value_string(nvpair_t *nvp)
|
|
|
{
|
|
|
char *rv;
|
|
|
- VERIFY3U(nvpair_value_string(nvp, &rv), ==, 0);
|
|
|
+ VERIFY0(nvpair_value_string(nvp, &rv));
|
|
|
return (rv);
|
|
|
}
|
|
|
|
|
|
@@ -491,7 +491,7 @@ nvlist_t *
|
|
|
fnvpair_value_nvlist(nvpair_t *nvp)
|
|
|
{
|
|
|
nvlist_t *rv;
|
|
|
- VERIFY3U(nvpair_value_nvlist(nvp, &rv), ==, 0);
|
|
|
+ VERIFY0(nvpair_value_nvlist(nvp, &rv));
|
|
|
return (rv);
|
|
|
}
|
|
|
|
|
|
diff --git a/module/nvpair/nvpair.c b/module/nvpair/nvpair.c
|
|
|
index 5c68984..36f4e4d 100644
|
|
|
--- a/module/nvpair/nvpair.c
|
|
|
+++ b/module/nvpair/nvpair.c
|
|
|
@@ -269,12 +269,25 @@ nvlist_nvflag(nvlist_t *nvl)
|
|
|
int
|
|
|
nvlist_alloc(nvlist_t **nvlp, uint_t nvflag, int kmflag)
|
|
|
{
|
|
|
+ nv_alloc_t *nva = nv_alloc_nosleep;
|
|
|
+
|
|
|
#if defined(_KERNEL) && !defined(_BOOT)
|
|
|
- return (nvlist_xalloc(nvlp, nvflag,
|
|
|
- (kmflag == KM_SLEEP ? nv_alloc_sleep : nv_alloc_nosleep)));
|
|
|
-#else
|
|
|
- return (nvlist_xalloc(nvlp, nvflag, nv_alloc_nosleep));
|
|
|
+ switch (kmflag) {
|
|
|
+ case KM_SLEEP:
|
|
|
+ nva = nv_alloc_sleep;
|
|
|
+ break;
|
|
|
+ case KM_PUSHPAGE:
|
|
|
+ nva = nv_alloc_pushpage;
|
|
|
+ break;
|
|
|
+ case KM_NOSLEEP:
|
|
|
+ nva = nv_alloc_nosleep;
|
|
|
+ break;
|
|
|
+ default:
|
|
|
+ return (EINVAL);
|
|
|
+ }
|
|
|
#endif
|
|
|
+
|
|
|
+ return (nvlist_xalloc(nvlp, nvflag, nva));
|
|
|
}
|
|
|
|
|
|
int
|
|
|
diff --git a/module/nvpair/nvpair_alloc_spl.c b/module/nvpair/nvpair_alloc_spl.c
|
|
|
index 63d57a1..be6e8f0 100644
|
|
|
--- a/module/nvpair/nvpair_alloc_spl.c
|
|
|
+++ b/module/nvpair/nvpair_alloc_spl.c
|
|
|
@@ -34,6 +34,12 @@ nv_alloc_sleep_spl(nv_alloc_t *nva, size_t size)
|
|
|
}
|
|
|
|
|
|
static void *
|
|
|
+nv_alloc_pushpage_spl(nv_alloc_t *nva, size_t size)
|
|
|
+{
|
|
|
+ return (kmem_alloc(size, KM_PUSHPAGE | KM_NODEBUG));
|
|
|
+}
|
|
|
+
|
|
|
+static void *
|
|
|
nv_alloc_nosleep_spl(nv_alloc_t *nva, size_t size)
|
|
|
{
|
|
|
return (kmem_alloc(size, KM_NOSLEEP));
|
|
|
@@ -53,6 +59,14 @@ const nv_alloc_ops_t spl_sleep_ops_def = {
|
|
|
NULL /* nv_ao_reset() */
|
|
|
};
|
|
|
|
|
|
+const nv_alloc_ops_t spl_pushpage_ops_def = {
|
|
|
+ NULL, /* nv_ao_init() */
|
|
|
+ NULL, /* nv_ao_fini() */
|
|
|
+ nv_alloc_pushpage_spl, /* nv_ao_alloc() */
|
|
|
+ nv_free_spl, /* nv_ao_free() */
|
|
|
+ NULL /* nv_ao_reset() */
|
|
|
+};
|
|
|
+
|
|
|
const nv_alloc_ops_t spl_nosleep_ops_def = {
|
|
|
NULL, /* nv_ao_init() */
|
|
|
NULL, /* nv_ao_fini() */
|
|
|
@@ -66,10 +80,16 @@ nv_alloc_t nv_alloc_sleep_def = {
|
|
|
NULL
|
|
|
};
|
|
|
|
|
|
+nv_alloc_t nv_alloc_pushpage_def = {
|
|
|
+ &spl_pushpage_ops_def,
|
|
|
+ NULL
|
|
|
+};
|
|
|
+
|
|
|
nv_alloc_t nv_alloc_nosleep_def = {
|
|
|
&spl_nosleep_ops_def,
|
|
|
NULL
|
|
|
};
|
|
|
|
|
|
nv_alloc_t *nv_alloc_sleep = &nv_alloc_sleep_def;
|
|
|
+nv_alloc_t *nv_alloc_pushpage = &nv_alloc_pushpage_def;
|
|
|
nv_alloc_t *nv_alloc_nosleep = &nv_alloc_nosleep_def;
|
|
|
diff --git a/module/zfs/arc.c b/module/zfs/arc.c
|
|
|
index da070cd..df3aeb7 100644
|
|
|
--- a/module/zfs/arc.c
|
|
|
+++ b/module/zfs/arc.c
|
|
|
@@ -884,7 +884,6 @@ buf_cons(void *vbuf, void *unused, int kmflag)
|
|
|
|
|
|
bzero(buf, sizeof (arc_buf_t));
|
|
|
mutex_init(&buf->b_evict_lock, NULL, MUTEX_DEFAULT, NULL);
|
|
|
- rw_init(&buf->b_data_lock, NULL, RW_DEFAULT, NULL);
|
|
|
arc_space_consume(sizeof (arc_buf_t), ARC_SPACE_HDRS);
|
|
|
|
|
|
return (0);
|
|
|
@@ -914,7 +913,6 @@ buf_dest(void *vbuf, void *unused)
|
|
|
arc_buf_t *buf = vbuf;
|
|
|
|
|
|
mutex_destroy(&buf->b_evict_lock);
|
|
|
- rw_destroy(&buf->b_data_lock);
|
|
|
arc_space_return(sizeof (arc_buf_t), ARC_SPACE_HDRS);
|
|
|
}
|
|
|
|
|
|
@@ -1077,7 +1075,7 @@ add_reference(arc_buf_hdr_t *ab, kmutex_t *hash_lock, void *tag)
|
|
|
ASSERT(list_link_active(&ab->b_arc_node));
|
|
|
list_remove(list, ab);
|
|
|
if (GHOST_STATE(ab->b_state)) {
|
|
|
- ASSERT3U(ab->b_datacnt, ==, 0);
|
|
|
+ ASSERT0(ab->b_datacnt);
|
|
|
ASSERT3P(ab->b_buf, ==, NULL);
|
|
|
delta = ab->b_size;
|
|
|
}
|
|
|
@@ -1644,7 +1642,7 @@ int
|
|
|
arc_buf_remove_ref(arc_buf_t *buf, void* tag)
|
|
|
{
|
|
|
arc_buf_hdr_t *hdr = buf->b_hdr;
|
|
|
- kmutex_t *hash_lock = HDR_LOCK(hdr);
|
|
|
+ kmutex_t *hash_lock = NULL;
|
|
|
int no_callback = (buf->b_efunc == NULL);
|
|
|
|
|
|
if (hdr->b_state == arc_anon) {
|
|
|
@@ -1653,6 +1651,7 @@ arc_buf_remove_ref(arc_buf_t *buf, void* tag)
|
|
|
return (no_callback);
|
|
|
}
|
|
|
|
|
|
+ hash_lock = HDR_LOCK(hdr);
|
|
|
mutex_enter(hash_lock);
|
|
|
hdr = buf->b_hdr;
|
|
|
ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
|
|
|
@@ -1772,7 +1771,7 @@ arc_evict(arc_state_t *state, uint64_t spa, int64_t bytes, boolean_t recycle,
|
|
|
hash_lock = HDR_LOCK(ab);
|
|
|
have_lock = MUTEX_HELD(hash_lock);
|
|
|
if (have_lock || mutex_tryenter(hash_lock)) {
|
|
|
- ASSERT3U(refcount_count(&ab->b_refcnt), ==, 0);
|
|
|
+ ASSERT0(refcount_count(&ab->b_refcnt));
|
|
|
ASSERT(ab->b_datacnt > 0);
|
|
|
while (ab->b_buf) {
|
|
|
arc_buf_t *buf = ab->b_buf;
|
|
|
@@ -2718,7 +2717,7 @@ arc_access(arc_buf_hdr_t *buf, kmutex_t *hash_lock)
|
|
|
* This is a prefetch access...
|
|
|
* move this block back to the MRU state.
|
|
|
*/
|
|
|
- ASSERT3U(refcount_count(&buf->b_refcnt), ==, 0);
|
|
|
+ ASSERT0(refcount_count(&buf->b_refcnt));
|
|
|
new_state = arc_mru;
|
|
|
}
|
|
|
|
|
|
@@ -2907,42 +2906,11 @@ arc_read_done(zio_t *zio)
|
|
|
*
|
|
|
* arc_read_done() will invoke all the requested "done" functions
|
|
|
* for readers of this block.
|
|
|
- *
|
|
|
- * Normal callers should use arc_read and pass the arc buffer and offset
|
|
|
- * for the bp. But if you know you don't need locking, you can use
|
|
|
- * arc_read_bp.
|
|
|
*/
|
|
|
int
|
|
|
-arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_buf_t *pbuf,
|
|
|
- arc_done_func_t *done, void *private, int priority, int zio_flags,
|
|
|
- uint32_t *arc_flags, const zbookmark_t *zb)
|
|
|
-{
|
|
|
- int err;
|
|
|
-
|
|
|
- if (pbuf == NULL) {
|
|
|
- /*
|
|
|
- * XXX This happens from traverse callback funcs, for
|
|
|
- * the objset_phys_t block.
|
|
|
- */
|
|
|
- return (arc_read_nolock(pio, spa, bp, done, private, priority,
|
|
|
- zio_flags, arc_flags, zb));
|
|
|
- }
|
|
|
-
|
|
|
- ASSERT(!refcount_is_zero(&pbuf->b_hdr->b_refcnt));
|
|
|
- ASSERT3U((char *)bp - (char *)pbuf->b_data, <, pbuf->b_hdr->b_size);
|
|
|
- rw_enter(&pbuf->b_data_lock, RW_READER);
|
|
|
-
|
|
|
- err = arc_read_nolock(pio, spa, bp, done, private, priority,
|
|
|
- zio_flags, arc_flags, zb);
|
|
|
- rw_exit(&pbuf->b_data_lock);
|
|
|
-
|
|
|
- return (err);
|
|
|
-}
|
|
|
-
|
|
|
-int
|
|
|
-arc_read_nolock(zio_t *pio, spa_t *spa, const blkptr_t *bp,
|
|
|
- arc_done_func_t *done, void *private, int priority, int zio_flags,
|
|
|
- uint32_t *arc_flags, const zbookmark_t *zb)
|
|
|
+arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done,
|
|
|
+ void *private, int priority, int zio_flags, uint32_t *arc_flags,
|
|
|
+ const zbookmark_t *zb)
|
|
|
{
|
|
|
arc_buf_hdr_t *hdr;
|
|
|
arc_buf_t *buf = NULL;
|
|
|
@@ -3061,7 +3029,7 @@ top:
|
|
|
/* this block is in the ghost cache */
|
|
|
ASSERT(GHOST_STATE(hdr->b_state));
|
|
|
ASSERT(!HDR_IO_IN_PROGRESS(hdr));
|
|
|
- ASSERT3U(refcount_count(&hdr->b_refcnt), ==, 0);
|
|
|
+ ASSERT0(refcount_count(&hdr->b_refcnt));
|
|
|
ASSERT(hdr->b_buf == NULL);
|
|
|
|
|
|
/* if this is a prefetch, we don't have a reference */
|
|
|
@@ -3241,6 +3209,34 @@ arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *private)
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
+ * Notify the arc that a block was freed, and thus will never be used again.
|
|
|
+ */
|
|
|
+void
|
|
|
+arc_freed(spa_t *spa, const blkptr_t *bp)
|
|
|
+{
|
|
|
+ arc_buf_hdr_t *hdr;
|
|
|
+ kmutex_t *hash_lock;
|
|
|
+ uint64_t guid = spa_load_guid(spa);
|
|
|
+
|
|
|
+ hdr = buf_hash_find(guid, BP_IDENTITY(bp), BP_PHYSICAL_BIRTH(bp),
|
|
|
+ &hash_lock);
|
|
|
+ if (hdr == NULL)
|
|
|
+ return;
|
|
|
+ if (HDR_BUF_AVAILABLE(hdr)) {
|
|
|
+ arc_buf_t *buf = hdr->b_buf;
|
|
|
+ add_reference(hdr, hash_lock, FTAG);
|
|
|
+ hdr->b_flags &= ~ARC_BUF_AVAILABLE;
|
|
|
+ mutex_exit(hash_lock);
|
|
|
+
|
|
|
+ arc_release(buf, FTAG);
|
|
|
+ (void) arc_buf_remove_ref(buf, FTAG);
|
|
|
+ } else {
|
|
|
+ mutex_exit(hash_lock);
|
|
|
+ }
|
|
|
+
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
* This is used by the DMU to let the ARC know that a buffer is
|
|
|
* being evicted, so the ARC should clean up. If this arc buf
|
|
|
* is not yet in the evicted state, it will be put there.
|
|
|
@@ -3452,19 +3448,6 @@ arc_release(arc_buf_t *buf, void *tag)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-/*
|
|
|
- * Release this buffer. If it does not match the provided BP, fill it
|
|
|
- * with that block's contents.
|
|
|
- */
|
|
|
-/* ARGSUSED */
|
|
|
-int
|
|
|
-arc_release_bp(arc_buf_t *buf, void *tag, blkptr_t *bp, spa_t *spa,
|
|
|
- zbookmark_t *zb)
|
|
|
-{
|
|
|
- arc_release(buf, tag);
|
|
|
- return (0);
|
|
|
-}
|
|
|
-
|
|
|
int
|
|
|
arc_released(arc_buf_t *buf)
|
|
|
{
|
|
|
@@ -4723,7 +4706,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
|
|
|
mutex_exit(&l2arc_buflist_mtx);
|
|
|
|
|
|
if (pio == NULL) {
|
|
|
- ASSERT3U(write_sz, ==, 0);
|
|
|
+ ASSERT0(write_sz);
|
|
|
kmem_cache_free(hdr_cache, head);
|
|
|
return (0);
|
|
|
}
|
|
|
diff --git a/module/zfs/bpobj.c b/module/zfs/bpobj.c
|
|
|
index d5f8d40..1920da4 100644
|
|
|
--- a/module/zfs/bpobj.c
|
|
|
+++ b/module/zfs/bpobj.c
|
|
|
@@ -43,7 +43,7 @@ bpobj_alloc_empty(objset_t *os, int blocksize, dmu_tx_t *tx)
|
|
|
|
|
|
if (spa_feature_is_enabled(spa, empty_bpobj_feat)) {
|
|
|
if (!spa_feature_is_active(spa, empty_bpobj_feat)) {
|
|
|
- ASSERT3U(dp->dp_empty_bpobj, ==, 0);
|
|
|
+ ASSERT0(dp->dp_empty_bpobj);
|
|
|
dp->dp_empty_bpobj =
|
|
|
bpobj_alloc(os, SPA_MAXBLOCKSIZE, tx);
|
|
|
VERIFY(zap_add(os,
|
|
|
diff --git a/module/zfs/bptree.c b/module/zfs/bptree.c
|
|
|
index 8c5a7d4..73922db 100644
|
|
|
--- a/module/zfs/bptree.c
|
|
|
+++ b/module/zfs/bptree.c
|
|
|
@@ -94,9 +94,9 @@ bptree_free(objset_t *os, uint64_t obj, dmu_tx_t *tx)
|
|
|
VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db));
|
|
|
bt = db->db_data;
|
|
|
ASSERT3U(bt->bt_begin, ==, bt->bt_end);
|
|
|
- ASSERT3U(bt->bt_bytes, ==, 0);
|
|
|
- ASSERT3U(bt->bt_comp, ==, 0);
|
|
|
- ASSERT3U(bt->bt_uncomp, ==, 0);
|
|
|
+ ASSERT0(bt->bt_bytes);
|
|
|
+ ASSERT0(bt->bt_comp);
|
|
|
+ ASSERT0(bt->bt_uncomp);
|
|
|
dmu_buf_rele(db, FTAG);
|
|
|
|
|
|
return (dmu_object_free(os, obj, tx));
|
|
|
@@ -135,7 +135,7 @@ bptree_add(objset_t *os, uint64_t obj, blkptr_t *bp, uint64_t birth_txg,
|
|
|
|
|
|
/* ARGSUSED */
|
|
|
static int
|
|
|
-bptree_visit_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
|
|
|
+bptree_visit_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
|
|
const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
|
|
|
{
|
|
|
int err;
|
|
|
@@ -189,7 +189,8 @@ bptree_iterate(objset_t *os, uint64_t obj, boolean_t free, bptree_itor_t func,
|
|
|
break;
|
|
|
|
|
|
err = traverse_dataset_destroyed(os->os_spa, &bte.be_bp,
|
|
|
- bte.be_birth_txg, &bte.be_zb, TRAVERSE_POST,
|
|
|
+ bte.be_birth_txg, &bte.be_zb,
|
|
|
+ TRAVERSE_PREFETCH_METADATA | TRAVERSE_POST,
|
|
|
bptree_visit_cb, &ba);
|
|
|
if (free) {
|
|
|
ASSERT(err == 0 || err == ERESTART);
|
|
|
@@ -197,7 +198,7 @@ bptree_iterate(objset_t *os, uint64_t obj, boolean_t free, bptree_itor_t func,
|
|
|
/* save bookmark for future resume */
|
|
|
ASSERT3U(bte.be_zb.zb_objset, ==,
|
|
|
ZB_DESTROYED_OBJSET);
|
|
|
- ASSERT3U(bte.be_zb.zb_level, ==, 0);
|
|
|
+ ASSERT0(bte.be_zb.zb_level);
|
|
|
dmu_write(os, obj, i * sizeof (bte),
|
|
|
sizeof (bte), &bte, tx);
|
|
|
break;
|
|
|
@@ -213,9 +214,9 @@ bptree_iterate(objset_t *os, uint64_t obj, boolean_t free, bptree_itor_t func,
|
|
|
|
|
|
/* if all blocks are free there should be no used space */
|
|
|
if (ba.ba_phys->bt_begin == ba.ba_phys->bt_end) {
|
|
|
- ASSERT3U(ba.ba_phys->bt_bytes, ==, 0);
|
|
|
- ASSERT3U(ba.ba_phys->bt_comp, ==, 0);
|
|
|
- ASSERT3U(ba.ba_phys->bt_uncomp, ==, 0);
|
|
|
+ ASSERT0(ba.ba_phys->bt_bytes);
|
|
|
+ ASSERT0(ba.ba_phys->bt_comp);
|
|
|
+ ASSERT0(ba.ba_phys->bt_uncomp);
|
|
|
}
|
|
|
|
|
|
dmu_buf_rele(db, FTAG);
|
|
|
diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c
|
|
|
index 205abaa..46f42dd 100644
|
|
|
--- a/module/zfs/dbuf.c
|
|
|
+++ b/module/zfs/dbuf.c
|
|
|
@@ -373,7 +373,7 @@ dbuf_verify(dmu_buf_impl_t *db)
|
|
|
} else if (db->db_blkid == DMU_SPILL_BLKID) {
|
|
|
ASSERT(dn != NULL);
|
|
|
ASSERT3U(db->db.db_size, >=, dn->dn_bonuslen);
|
|
|
- ASSERT3U(db->db.db_offset, ==, 0);
|
|
|
+ ASSERT0(db->db.db_offset);
|
|
|
} else {
|
|
|
ASSERT3U(db->db.db_offset, ==, db->db_blkid * db->db.db_size);
|
|
|
}
|
|
|
@@ -559,7 +559,6 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags)
|
|
|
spa_t *spa;
|
|
|
zbookmark_t zb;
|
|
|
uint32_t aflags = ARC_NOWAIT;
|
|
|
- arc_buf_t *pbuf;
|
|
|
|
|
|
DB_DNODE_ENTER(db);
|
|
|
dn = DB_DNODE(db);
|
|
|
@@ -621,14 +620,8 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags)
|
|
|
db->db.db_object, db->db_level, db->db_blkid);
|
|
|
|
|
|
dbuf_add_ref(db, NULL);
|
|
|
- /* ZIO_FLAG_CANFAIL callers have to check the parent zio's error */
|
|
|
|
|
|
- if (db->db_parent)
|
|
|
- pbuf = db->db_parent->db_buf;
|
|
|
- else
|
|
|
- pbuf = db->db_objset->os_phys_buf;
|
|
|
-
|
|
|
- (void) dsl_read(zio, spa, db->db_blkptr, pbuf,
|
|
|
+ (void) arc_read(zio, spa, db->db_blkptr,
|
|
|
dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ,
|
|
|
(*flags & DB_RF_CANFAIL) ? ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED,
|
|
|
&aflags, &zb);
|
|
|
@@ -1026,7 +1019,6 @@ void
|
|
|
dbuf_release_bp(dmu_buf_impl_t *db)
|
|
|
{
|
|
|
objset_t *os;
|
|
|
- zbookmark_t zb;
|
|
|
|
|
|
DB_GET_OBJSET(&os, db);
|
|
|
ASSERT(dsl_pool_sync_context(dmu_objset_pool(os)));
|
|
|
@@ -1034,13 +1026,7 @@ dbuf_release_bp(dmu_buf_impl_t *db)
|
|
|
list_link_active(&os->os_dsl_dataset->ds_synced_link));
|
|
|
ASSERT(db->db_parent == NULL || arc_released(db->db_parent->db_buf));
|
|
|
|
|
|
- zb.zb_objset = os->os_dsl_dataset ?
|
|
|
- os->os_dsl_dataset->ds_object : 0;
|
|
|
- zb.zb_object = db->db.db_object;
|
|
|
- zb.zb_level = db->db_level;
|
|
|
- zb.zb_blkid = db->db_blkid;
|
|
|
- (void) arc_release_bp(db->db_buf, db,
|
|
|
- db->db_blkptr, os->os_spa, &zb);
|
|
|
+ (void) arc_release(db->db_buf, db);
|
|
|
}
|
|
|
|
|
|
dbuf_dirty_record_t *
|
|
|
@@ -1886,7 +1872,6 @@ dbuf_prefetch(dnode_t *dn, uint64_t blkid)
|
|
|
if (bp && !BP_IS_HOLE(bp)) {
|
|
|
int priority = dn->dn_type == DMU_OT_DDT_ZAP ?
|
|
|
ZIO_PRIORITY_DDT_PREFETCH : ZIO_PRIORITY_ASYNC_READ;
|
|
|
- arc_buf_t *pbuf;
|
|
|
dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
|
|
|
uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH;
|
|
|
zbookmark_t zb;
|
|
|
@@ -1894,13 +1879,8 @@ dbuf_prefetch(dnode_t *dn, uint64_t blkid)
|
|
|
SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
|
|
|
dn->dn_object, 0, blkid);
|
|
|
|
|
|
- if (db)
|
|
|
- pbuf = db->db_buf;
|
|
|
- else
|
|
|
- pbuf = dn->dn_objset->os_phys_buf;
|
|
|
-
|
|
|
- (void) dsl_read(NULL, dn->dn_objset->os_spa,
|
|
|
- bp, pbuf, NULL, NULL, priority,
|
|
|
+ (void) arc_read(NULL, dn->dn_objset->os_spa,
|
|
|
+ bp, NULL, NULL, priority,
|
|
|
ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
|
|
|
&aflags, &zb);
|
|
|
}
|
|
|
@@ -2433,7 +2413,7 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
|
|
|
dbuf_dirty_record_t **drp;
|
|
|
|
|
|
ASSERT(*datap != NULL);
|
|
|
- ASSERT3U(db->db_level, ==, 0);
|
|
|
+ ASSERT0(db->db_level);
|
|
|
ASSERT3U(dn->dn_phys->dn_bonuslen, <=, DN_MAX_BONUSLEN);
|
|
|
bcopy(*datap, DN_BONUS(dn->dn_phys), dn->dn_phys->dn_bonuslen);
|
|
|
DB_DNODE_EXIT(db);
|
|
|
@@ -2636,7 +2616,7 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
|
|
|
uint64_t txg = zio->io_txg;
|
|
|
dbuf_dirty_record_t **drp, *dr;
|
|
|
|
|
|
- ASSERT3U(zio->io_error, ==, 0);
|
|
|
+ ASSERT0(zio->io_error);
|
|
|
ASSERT(db->db_blkptr == bp);
|
|
|
|
|
|
if (zio->io_flags & ZIO_FLAG_IO_REWRITE) {
|
|
|
diff --git a/module/zfs/ddt.c b/module/zfs/ddt.c
|
|
|
index 2d8763a..286f3bb 100644
|
|
|
--- a/module/zfs/ddt.c
|
|
|
+++ b/module/zfs/ddt.c
|
|
|
@@ -324,7 +324,7 @@ void
|
|
|
ddt_phys_decref(ddt_phys_t *ddp)
|
|
|
{
|
|
|
if (ddp) {
|
|
|
- ASSERT((int64_t)ddp->ddp_refcnt > 0);
|
|
|
+ ASSERT(ddp->ddp_refcnt > 0);
|
|
|
ddp->ddp_refcnt--;
|
|
|
}
|
|
|
}
|
|
|
@@ -1058,7 +1058,6 @@ ddt_sync_entry(ddt_t *ddt, ddt_entry_t *dde, dmu_tx_t *tx, uint64_t txg)
|
|
|
|
|
|
for (p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
|
|
|
ASSERT(dde->dde_lead_zio[p] == NULL);
|
|
|
- ASSERT((int64_t)ddp->ddp_refcnt >= 0);
|
|
|
if (ddp->ddp_phys_birth == 0) {
|
|
|
ASSERT(ddp->ddp_refcnt == 0);
|
|
|
continue;
|
|
|
diff --git a/module/zfs/dmu_diff.c b/module/zfs/dmu_diff.c
|
|
|
index 22340eb..dc23778 100644
|
|
|
--- a/module/zfs/dmu_diff.c
|
|
|
+++ b/module/zfs/dmu_diff.c
|
|
|
@@ -105,7 +105,7 @@ report_dnode(struct diffarg *da, uint64_t object, dnode_phys_t *dnp)
|
|
|
|
|
|
/* ARGSUSED */
|
|
|
static int
|
|
|
-diff_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
|
|
|
+diff_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
|
|
const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
|
|
|
{
|
|
|
struct diffarg *da = arg;
|
|
|
@@ -132,9 +132,9 @@ diff_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
|
|
|
int blksz = BP_GET_LSIZE(bp);
|
|
|
int i;
|
|
|
|
|
|
- if (dsl_read(NULL, spa, bp, pbuf,
|
|
|
- arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ,
|
|
|
- ZIO_FLAG_CANFAIL, &aflags, zb) != 0)
|
|
|
+ if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf,
|
|
|
+ ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL,
|
|
|
+ &aflags, zb) != 0)
|
|
|
return (EIO);
|
|
|
|
|
|
blk = abuf->b_data;
|
|
|
diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c
|
|
|
index 50673dc..ca75e52 100644
|
|
|
--- a/module/zfs/dmu_objset.c
|
|
|
+++ b/module/zfs/dmu_objset.c
|
|
|
@@ -276,12 +276,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
|
|
|
aflags |= ARC_L2CACHE;
|
|
|
|
|
|
dprintf_bp(os->os_rootbp, "reading %s", "");
|
|
|
- /*
|
|
|
- * XXX when bprewrite scrub can change the bp,
|
|
|
- * and this is called from dmu_objset_open_ds_os, the bp
|
|
|
- * could change, and we'll need a lock.
|
|
|
- */
|
|
|
- err = dsl_read_nolock(NULL, spa, os->os_rootbp,
|
|
|
+ err = arc_read(NULL, spa, os->os_rootbp,
|
|
|
arc_getbuf_func, &os->os_phys_buf,
|
|
|
ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb);
|
|
|
if (err) {
|
|
|
@@ -1119,8 +1114,7 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
|
|
|
SET_BOOKMARK(&zb, os->os_dsl_dataset ?
|
|
|
os->os_dsl_dataset->ds_object : DMU_META_OBJSET,
|
|
|
ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
|
|
|
- VERIFY3U(0, ==, arc_release_bp(os->os_phys_buf, &os->os_phys_buf,
|
|
|
- os->os_rootbp, os->os_spa, &zb));
|
|
|
+ arc_release(os->os_phys_buf, &os->os_phys_buf);
|
|
|
|
|
|
dmu_write_policy(os, NULL, 0, 0, &zp);
|
|
|
|
|
|
@@ -1765,7 +1759,7 @@ dmu_objset_prefetch(const char *name, void *arg)
|
|
|
SET_BOOKMARK(&zb, ds->ds_object, ZB_ROOT_OBJECT,
|
|
|
ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
|
|
|
|
|
|
- (void) dsl_read_nolock(NULL, dsl_dataset_get_spa(ds),
|
|
|
+ (void) arc_read(NULL, dsl_dataset_get_spa(ds),
|
|
|
&ds->ds_phys->ds_bp, NULL, NULL,
|
|
|
ZIO_PRIORITY_ASYNC_READ,
|
|
|
ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
|
|
|
diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c
|
|
|
index 921c3d7..54e7597 100644
|
|
|
--- a/module/zfs/dmu_send.c
|
|
|
+++ b/module/zfs/dmu_send.c
|
|
|
@@ -39,6 +39,7 @@
|
|
|
#include <sys/dsl_prop.h>
|
|
|
#include <sys/dsl_pool.h>
|
|
|
#include <sys/dsl_synctask.h>
|
|
|
+#include <sys/spa_impl.h>
|
|
|
#include <sys/zfs_ioctl.h>
|
|
|
#include <sys/zap.h>
|
|
|
#include <sys/zio_checksum.h>
|
|
|
@@ -53,21 +54,48 @@ int zfs_send_corrupt_data = B_FALSE;
|
|
|
|
|
|
static char *dmu_recv_tag = "dmu_recv_tag";
|
|
|
|
|
|
-static int
|
|
|
-dump_bytes(dmu_sendarg_t *dsp, void *buf, int len)
|
|
|
+typedef struct dump_bytes_io {
|
|
|
+ dmu_sendarg_t *dbi_dsp;
|
|
|
+ void *dbi_buf;
|
|
|
+ int dbi_len;
|
|
|
+} dump_bytes_io_t;
|
|
|
+
|
|
|
+static void
|
|
|
+dump_bytes_strategy(void *arg)
|
|
|
{
|
|
|
+ dump_bytes_io_t *dbi = (dump_bytes_io_t *)arg;
|
|
|
+ dmu_sendarg_t *dsp = dbi->dbi_dsp;
|
|
|
dsl_dataset_t *ds = dsp->dsa_os->os_dsl_dataset;
|
|
|
ssize_t resid; /* have to get resid to get detailed errno */
|
|
|
- ASSERT3U(len % 8, ==, 0);
|
|
|
+ ASSERT0(dbi->dbi_len % 8);
|
|
|
|
|
|
- fletcher_4_incremental_native(buf, len, &dsp->dsa_zc);
|
|
|
+ fletcher_4_incremental_native(dbi->dbi_buf, dbi->dbi_len, &dsp->dsa_zc);
|
|
|
dsp->dsa_err = vn_rdwr(UIO_WRITE, dsp->dsa_vp,
|
|
|
- (caddr_t)buf, len,
|
|
|
+ (caddr_t)dbi->dbi_buf, dbi->dbi_len,
|
|
|
0, UIO_SYSSPACE, FAPPEND, RLIM64_INFINITY, CRED(), &resid);
|
|
|
|
|
|
mutex_enter(&ds->ds_sendstream_lock);
|
|
|
- *dsp->dsa_off += len;
|
|
|
+ *dsp->dsa_off += dbi->dbi_len;
|
|
|
mutex_exit(&ds->ds_sendstream_lock);
|
|
|
+}
|
|
|
+
|
|
|
+static int
|
|
|
+dump_bytes(dmu_sendarg_t *dsp, void *buf, int len)
|
|
|
+{
|
|
|
+ dump_bytes_io_t dbi;
|
|
|
+
|
|
|
+ dbi.dbi_dsp = dsp;
|
|
|
+ dbi.dbi_buf = buf;
|
|
|
+ dbi.dbi_len = len;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * The vn_rdwr() call is performed in a taskq to ensure that there is
|
|
|
+ * always enough stack space to write safely to the target filesystem.
|
|
|
+ * The ZIO_TYPE_FREE threads are used because there can be a lot of
|
|
|
+ * them and they are used in vdev_file.c for a similar purpose.
|
|
|
+ */
|
|
|
+ spa_taskq_dispatch_sync(dmu_objset_spa(dsp->dsa_os), ZIO_TYPE_FREE,
|
|
|
+ ZIO_TASKQ_ISSUE, dump_bytes_strategy, &dbi, TQ_SLEEP);
|
|
|
|
|
|
return (dsp->dsa_err);
|
|
|
}
|
|
|
@@ -302,7 +330,7 @@ dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp)
|
|
|
|
|
|
/* ARGSUSED */
|
|
|
static int
|
|
|
-backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
|
|
|
+backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
|
|
const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
|
|
|
{
|
|
|
dmu_sendarg_t *dsp = arg;
|
|
|
@@ -331,9 +359,9 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
|
|
|
uint32_t aflags = ARC_WAIT;
|
|
|
arc_buf_t *abuf;
|
|
|
|
|
|
- if (dsl_read(NULL, spa, bp, pbuf,
|
|
|
- arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ,
|
|
|
- ZIO_FLAG_CANFAIL, &aflags, zb) != 0)
|
|
|
+ if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf,
|
|
|
+ ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL,
|
|
|
+ &aflags, zb) != 0)
|
|
|
return (EIO);
|
|
|
|
|
|
blk = abuf->b_data;
|
|
|
@@ -350,9 +378,9 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
|
|
|
arc_buf_t *abuf;
|
|
|
int blksz = BP_GET_LSIZE(bp);
|
|
|
|
|
|
- if (arc_read_nolock(NULL, spa, bp,
|
|
|
- arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ,
|
|
|
- ZIO_FLAG_CANFAIL, &aflags, zb) != 0)
|
|
|
+ if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf,
|
|
|
+ ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL,
|
|
|
+ &aflags, zb) != 0)
|
|
|
return (EIO);
|
|
|
|
|
|
err = dump_spill(dsp, zb->zb_object, blksz, abuf->b_data);
|
|
|
@@ -362,9 +390,9 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
|
|
|
arc_buf_t *abuf;
|
|
|
int blksz = BP_GET_LSIZE(bp);
|
|
|
|
|
|
- if (dsl_read(NULL, spa, bp, pbuf,
|
|
|
- arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ,
|
|
|
- ZIO_FLAG_CANFAIL, &aflags, zb) != 0) {
|
|
|
+ if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf,
|
|
|
+ ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL,
|
|
|
+ &aflags, zb) != 0) {
|
|
|
if (zfs_send_corrupt_data) {
|
|
|
uint64_t *ptr;
|
|
|
/* Send a block filled with 0x"zfs badd bloc" */
|
|
|
@@ -960,7 +988,7 @@ restore_read(struct restorearg *ra, int len)
|
|
|
int done = 0;
|
|
|
|
|
|
/* some things will require 8-byte alignment, so everything must */
|
|
|
- ASSERT3U(len % 8, ==, 0);
|
|
|
+ ASSERT0(len % 8);
|
|
|
|
|
|
while (done < len) {
|
|
|
ssize_t resid;
|
|
|
@@ -1639,7 +1667,7 @@ out:
|
|
|
(void) add_ds_to_guidmap(drc->drc_guid_to_ds_map, ds);
|
|
|
dsl_dataset_disown(ds, dmu_recv_tag);
|
|
|
myerr = dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag, B_FALSE);
|
|
|
- ASSERT3U(myerr, ==, 0);
|
|
|
+ ASSERT0(myerr);
|
|
|
return (err);
|
|
|
}
|
|
|
|
|
|
diff --git a/module/zfs/dmu_traverse.c b/module/zfs/dmu_traverse.c
|
|
|
index 980c1aa..1c39723 100644
|
|
|
--- a/module/zfs/dmu_traverse.c
|
|
|
+++ b/module/zfs/dmu_traverse.c
|
|
|
@@ -62,7 +62,9 @@ typedef struct traverse_data {
|
|
|
} traverse_data_t;
|
|
|
|
|
|
static int traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp,
|
|
|
- arc_buf_t *buf, uint64_t objset, uint64_t object);
|
|
|
+ uint64_t objset, uint64_t object);
|
|
|
+static void prefetch_dnode_metadata(traverse_data_t *td, const dnode_phys_t *,
|
|
|
+ uint64_t objset, uint64_t object);
|
|
|
|
|
|
static int
|
|
|
traverse_zil_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
|
|
|
@@ -79,7 +81,7 @@ traverse_zil_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
|
|
|
SET_BOOKMARK(&zb, td->td_objset, ZB_ZIL_OBJECT, ZB_ZIL_LEVEL,
|
|
|
bp->blk_cksum.zc_word[ZIL_ZC_SEQ]);
|
|
|
|
|
|
- (void) td->td_func(td->td_spa, zilog, bp, NULL, &zb, NULL, td->td_arg);
|
|
|
+ (void) td->td_func(td->td_spa, zilog, bp, &zb, NULL, td->td_arg);
|
|
|
|
|
|
return (0);
|
|
|
}
|
|
|
@@ -103,7 +105,7 @@ traverse_zil_record(zilog_t *zilog, lr_t *lrc, void *arg, uint64_t claim_txg)
|
|
|
SET_BOOKMARK(&zb, td->td_objset, lr->lr_foid,
|
|
|
ZB_ZIL_LEVEL, lr->lr_offset / BP_GET_LSIZE(bp));
|
|
|
|
|
|
- (void) td->td_func(td->td_spa, zilog, bp, NULL, &zb, NULL,
|
|
|
+ (void) td->td_func(td->td_spa, zilog, bp, &zb, NULL,
|
|
|
td->td_arg);
|
|
|
}
|
|
|
return (0);
|
|
|
@@ -174,13 +176,37 @@ static void
|
|
|
traverse_pause(traverse_data_t *td, const zbookmark_t *zb)
|
|
|
{
|
|
|
ASSERT(td->td_resume != NULL);
|
|
|
- ASSERT3U(zb->zb_level, ==, 0);
|
|
|
+ ASSERT0(zb->zb_level);
|
|
|
bcopy(zb, td->td_resume, sizeof (*td->td_resume));
|
|
|
}
|
|
|
|
|
|
+static void
|
|
|
+traverse_prefetch_metadata(traverse_data_t *td,
|
|
|
+ const blkptr_t *bp, const zbookmark_t *zb)
|
|
|
+{
|
|
|
+ uint32_t flags = ARC_NOWAIT | ARC_PREFETCH;
|
|
|
+
|
|
|
+ if (!(td->td_flags & TRAVERSE_PREFETCH_METADATA))
|
|
|
+ return;
|
|
|
+ /*
|
|
|
+ * If we are in the process of resuming, don't prefetch, because
|
|
|
+ * some children will not be needed (and in fact may have already
|
|
|
+ * been freed).
|
|
|
+ */
|
|
|
+ if (td->td_resume != NULL && !ZB_IS_ZERO(td->td_resume))
|
|
|
+ return;
|
|
|
+ if (BP_IS_HOLE(bp) || bp->blk_birth <= td->td_min_txg)
|
|
|
+ return;
|
|
|
+ if (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_DNODE)
|
|
|
+ return;
|
|
|
+
|
|
|
+ (void) arc_read(NULL, td->td_spa, bp, NULL, NULL,
|
|
|
+ ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
|
|
|
+}
|
|
|
+
|
|
|
static int
|
|
|
traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
|
|
|
- arc_buf_t *pbuf, blkptr_t *bp, const zbookmark_t *zb)
|
|
|
+ const blkptr_t *bp, const zbookmark_t *zb)
|
|
|
{
|
|
|
zbookmark_t czb;
|
|
|
int err = 0, lasterr = 0;
|
|
|
@@ -201,8 +227,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
|
|
|
}
|
|
|
|
|
|
if (BP_IS_HOLE(bp)) {
|
|
|
- err = td->td_func(td->td_spa, NULL, NULL, pbuf, zb, dnp,
|
|
|
- td->td_arg);
|
|
|
+ err = td->td_func(td->td_spa, NULL, NULL, zb, dnp, td->td_arg);
|
|
|
return (err);
|
|
|
}
|
|
|
|
|
|
@@ -222,7 +247,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
|
|
|
}
|
|
|
|
|
|
if (td->td_flags & TRAVERSE_PRE) {
|
|
|
- err = td->td_func(td->td_spa, NULL, bp, pbuf, zb, dnp,
|
|
|
+ err = td->td_func(td->td_spa, NULL, bp, zb, dnp,
|
|
|
td->td_arg);
|
|
|
if (err == TRAVERSE_VISIT_NO_CHILDREN)
|
|
|
return (0);
|
|
|
@@ -238,19 +263,25 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
|
|
|
blkptr_t *cbp;
|
|
|
int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
|
|
|
|
|
|
- err = dsl_read(NULL, td->td_spa, bp, pbuf,
|
|
|
- arc_getbuf_func, &buf,
|
|
|
+ err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
|
|
|
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
|
|
|
if (err)
|
|
|
return (err);
|
|
|
+ cbp = buf->b_data;
|
|
|
+
|
|
|
+ for (i = 0; i < epb; i++) {
|
|
|
+ SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
|
|
|
+ zb->zb_level - 1,
|
|
|
+ zb->zb_blkid * epb + i);
|
|
|
+ traverse_prefetch_metadata(td, &cbp[i], &czb);
|
|
|
+ }
|
|
|
|
|
|
/* recursively visitbp() blocks below this */
|
|
|
- cbp = buf->b_data;
|
|
|
- for (i = 0; i < epb; i++, cbp++) {
|
|
|
+ for (i = 0; i < epb; i++) {
|
|
|
SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
|
|
|
zb->zb_level - 1,
|
|
|
zb->zb_blkid * epb + i);
|
|
|
- err = traverse_visitbp(td, dnp, buf, cbp, &czb);
|
|
|
+ err = traverse_visitbp(td, dnp, &cbp[i], &czb);
|
|
|
if (err) {
|
|
|
if (!hard)
|
|
|
break;
|
|
|
@@ -262,16 +293,20 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
|
|
|
int i;
|
|
|
int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
|
|
|
|
|
|
- err = dsl_read(NULL, td->td_spa, bp, pbuf,
|
|
|
- arc_getbuf_func, &buf,
|
|
|
+ err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
|
|
|
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
|
|
|
if (err)
|
|
|
return (err);
|
|
|
+ dnp = buf->b_data;
|
|
|
+
|
|
|
+ for (i = 0; i < epb; i++) {
|
|
|
+ prefetch_dnode_metadata(td, &dnp[i], zb->zb_objset,
|
|
|
+ zb->zb_blkid * epb + i);
|
|
|
+ }
|
|
|
|
|
|
/* recursively visitbp() blocks below this */
|
|
|
- dnp = buf->b_data;
|
|
|
- for (i = 0; i < epb; i++, dnp++) {
|
|
|
- err = traverse_dnode(td, dnp, buf, zb->zb_objset,
|
|
|
+ for (i = 0; i < epb; i++) {
|
|
|
+ err = traverse_dnode(td, &dnp[i], zb->zb_objset,
|
|
|
zb->zb_blkid * epb + i);
|
|
|
if (err) {
|
|
|
if (!hard)
|
|
|
@@ -284,15 +319,23 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
|
|
|
objset_phys_t *osp;
|
|
|
dnode_phys_t *dnp;
|
|
|
|
|
|
- err = dsl_read_nolock(NULL, td->td_spa, bp,
|
|
|
- arc_getbuf_func, &buf,
|
|
|
+ err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
|
|
|
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
|
|
|
if (err)
|
|
|
return (err);
|
|
|
|
|
|
osp = buf->b_data;
|
|
|
dnp = &osp->os_meta_dnode;
|
|
|
- err = traverse_dnode(td, dnp, buf, zb->zb_objset,
|
|
|
+ prefetch_dnode_metadata(td, dnp, zb->zb_objset,
|
|
|
+ DMU_META_DNODE_OBJECT);
|
|
|
+ if (arc_buf_size(buf) >= sizeof (objset_phys_t)) {
|
|
|
+ prefetch_dnode_metadata(td, &osp->os_userused_dnode,
|
|
|
+ zb->zb_objset, DMU_USERUSED_OBJECT);
|
|
|
+ prefetch_dnode_metadata(td, &osp->os_groupused_dnode,
|
|
|
+ zb->zb_objset, DMU_USERUSED_OBJECT);
|
|
|
+ }
|
|
|
+
|
|
|
+ err = traverse_dnode(td, dnp, zb->zb_objset,
|
|
|
DMU_META_DNODE_OBJECT);
|
|
|
if (err && hard) {
|
|
|
lasterr = err;
|
|
|
@@ -300,7 +343,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
|
|
|
}
|
|
|
if (err == 0 && arc_buf_size(buf) >= sizeof (objset_phys_t)) {
|
|
|
dnp = &osp->os_userused_dnode;
|
|
|
- err = traverse_dnode(td, dnp, buf, zb->zb_objset,
|
|
|
+ err = traverse_dnode(td, dnp, zb->zb_objset,
|
|
|
DMU_USERUSED_OBJECT);
|
|
|
}
|
|
|
if (err && hard) {
|
|
|
@@ -309,7 +352,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
|
|
|
}
|
|
|
if (err == 0 && arc_buf_size(buf) >= sizeof (objset_phys_t)) {
|
|
|
dnp = &osp->os_groupused_dnode;
|
|
|
- err = traverse_dnode(td, dnp, buf, zb->zb_objset,
|
|
|
+ err = traverse_dnode(td, dnp, zb->zb_objset,
|
|
|
DMU_GROUPUSED_OBJECT);
|
|
|
}
|
|
|
}
|
|
|
@@ -319,8 +362,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
|
|
|
|
|
|
post:
|
|
|
if (err == 0 && lasterr == 0 && (td->td_flags & TRAVERSE_POST)) {
|
|
|
- err = td->td_func(td->td_spa, NULL, bp, pbuf, zb, dnp,
|
|
|
- td->td_arg);
|
|
|
+ err = td->td_func(td->td_spa, NULL, bp, zb, dnp, td->td_arg);
|
|
|
if (err == ERESTART)
|
|
|
pause = B_TRUE;
|
|
|
}
|
|
|
@@ -334,9 +376,27 @@ post:
|
|
|
return (err != 0 ? err : lasterr);
|
|
|
}
|
|
|
|
|
|
+static void
|
|
|
+prefetch_dnode_metadata(traverse_data_t *td, const dnode_phys_t *dnp,
|
|
|
+ uint64_t objset, uint64_t object)
|
|
|
+{
|
|
|
+ int j;
|
|
|
+ zbookmark_t czb;
|
|
|
+
|
|
|
+ for (j = 0; j < dnp->dn_nblkptr; j++) {
|
|
|
+ SET_BOOKMARK(&czb, objset, object, dnp->dn_nlevels - 1, j);
|
|
|
+ traverse_prefetch_metadata(td, &dnp->dn_blkptr[j], &czb);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
|
|
|
+ SET_BOOKMARK(&czb, objset, object, 0, DMU_SPILL_BLKID);
|
|
|
+ traverse_prefetch_metadata(td, &dnp->dn_spill, &czb);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
static int
|
|
|
traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp,
|
|
|
- arc_buf_t *buf, uint64_t objset, uint64_t object)
|
|
|
+ uint64_t objset, uint64_t object)
|
|
|
{
|
|
|
int j, err = 0, lasterr = 0;
|
|
|
zbookmark_t czb;
|
|
|
@@ -344,8 +404,7 @@ traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp,
|
|
|
|
|
|
for (j = 0; j < dnp->dn_nblkptr; j++) {
|
|
|
SET_BOOKMARK(&czb, objset, object, dnp->dn_nlevels - 1, j);
|
|
|
- err = traverse_visitbp(td, dnp, buf,
|
|
|
- (blkptr_t *)&dnp->dn_blkptr[j], &czb);
|
|
|
+ err = traverse_visitbp(td, dnp, &dnp->dn_blkptr[j], &czb);
|
|
|
if (err) {
|
|
|
if (!hard)
|
|
|
break;
|
|
|
@@ -354,10 +413,8 @@ traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp,
|
|
|
}
|
|
|
|
|
|
if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
|
|
|
- SET_BOOKMARK(&czb, objset,
|
|
|
- object, 0, DMU_SPILL_BLKID);
|
|
|
- err = traverse_visitbp(td, dnp, buf,
|
|
|
- (blkptr_t *)&dnp->dn_spill, &czb);
|
|
|
+ SET_BOOKMARK(&czb, objset, object, 0, DMU_SPILL_BLKID);
|
|
|
+ err = traverse_visitbp(td, dnp, &dnp->dn_spill, &czb);
|
|
|
if (err) {
|
|
|
if (!hard)
|
|
|
return (err);
|
|
|
@@ -370,8 +427,7 @@ traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp,
|
|
|
/* ARGSUSED */
|
|
|
static int
|
|
|
traverse_prefetcher(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
|
|
- arc_buf_t *pbuf, const zbookmark_t *zb, const dnode_phys_t *dnp,
|
|
|
- void *arg)
|
|
|
+ const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
|
|
|
{
|
|
|
prefetch_data_t *pfd = arg;
|
|
|
uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH;
|
|
|
@@ -392,10 +448,8 @@ traverse_prefetcher(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
|
|
cv_broadcast(&pfd->pd_cv);
|
|
|
mutex_exit(&pfd->pd_mtx);
|
|
|
|
|
|
- (void) dsl_read(NULL, spa, bp, pbuf, NULL, NULL,
|
|
|
- ZIO_PRIORITY_ASYNC_READ,
|
|
|
- ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
|
|
|
- &aflags, zb);
|
|
|
+ (void) arc_read(NULL, spa, bp, NULL, NULL, ZIO_PRIORITY_ASYNC_READ,
|
|
|
+ ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE, &aflags, zb);
|
|
|
|
|
|
return (0);
|
|
|
}
|
|
|
@@ -413,7 +467,7 @@ traverse_prefetch_thread(void *arg)
|
|
|
|
|
|
SET_BOOKMARK(&czb, td.td_objset,
|
|
|
ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
|
|
|
- (void) traverse_visitbp(&td, NULL, NULL, td.td_rootbp, &czb);
|
|
|
+ (void) traverse_visitbp(&td, NULL, td.td_rootbp, &czb);
|
|
|
|
|
|
mutex_enter(&td_main->td_pfd->pd_mtx);
|
|
|
td_main->td_pfd->pd_exited = B_TRUE;
|
|
|
@@ -438,6 +492,12 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
|
|
|
ASSERT(ds == NULL || objset == ds->ds_object);
|
|
|
ASSERT(!(flags & TRAVERSE_PRE) || !(flags & TRAVERSE_POST));
|
|
|
|
|
|
+ /*
|
|
|
+ * The data prefetching mechanism (the prefetch thread) is incompatible
|
|
|
+ * with resuming from a bookmark.
|
|
|
+ */
|
|
|
+ ASSERT(resume == NULL || !(flags & TRAVERSE_PREFETCH_DATA));
|
|
|
+
|
|
|
td = kmem_alloc(sizeof(traverse_data_t), KM_PUSHPAGE);
|
|
|
pd = kmem_zalloc(sizeof(prefetch_data_t), KM_PUSHPAGE);
|
|
|
czb = kmem_alloc(sizeof(zbookmark_t), KM_PUSHPAGE);
|
|
|
@@ -468,14 +528,14 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
|
|
|
traverse_zil(td, &os->os_zil_header);
|
|
|
}
|
|
|
|
|
|
- if (!(flags & TRAVERSE_PREFETCH) ||
|
|
|
+ if (!(flags & TRAVERSE_PREFETCH_DATA) ||
|
|
|
0 == taskq_dispatch(system_taskq, traverse_prefetch_thread,
|
|
|
td, TQ_NOQUEUE))
|
|
|
pd->pd_exited = B_TRUE;
|
|
|
|
|
|
SET_BOOKMARK(czb, td->td_objset,
|
|
|
ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
|
|
|
- err = traverse_visitbp(td, NULL, NULL, rootbp, czb);
|
|
|
+ err = traverse_visitbp(td, NULL, rootbp, czb);
|
|
|
|
|
|
mutex_enter(&pd->pd_mtx);
|
|
|
pd->pd_cancel = B_TRUE;
|
|
|
diff --git a/module/zfs/dmu_tx.c b/module/zfs/dmu_tx.c
|
|
|
index e47d858..b0dc64f 100644
|
|
|
--- a/module/zfs/dmu_tx.c
|
|
|
+++ b/module/zfs/dmu_tx.c
|
|
|
@@ -21,7 +21,7 @@
|
|
|
/*
|
|
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
|
|
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
|
|
- * Copyright (c) 2012 by Delphix. All rights reserved.
|
|
|
+ * Copyright (c) 2013 by Delphix. All rights reserved.
|
|
|
*/
|
|
|
|
|
|
#include <sys/dmu.h>
|
|
|
@@ -301,6 +301,7 @@ dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
|
|
|
delta = P2NPHASE(off, dn->dn_datablksz);
|
|
|
}
|
|
|
|
|
|
+ min_ibs = max_ibs = dn->dn_indblkshift;
|
|
|
if (dn->dn_maxblkid > 0) {
|
|
|
/*
|
|
|
* The blocksize can't change,
|
|
|
@@ -308,13 +309,6 @@ dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
|
|
|
*/
|
|
|
ASSERT(dn->dn_datablkshift != 0);
|
|
|
min_bs = max_bs = dn->dn_datablkshift;
|
|
|
- min_ibs = max_ibs = dn->dn_indblkshift;
|
|
|
- } else if (dn->dn_indblkshift > max_ibs) {
|
|
|
- /*
|
|
|
- * This ensures that if we reduce DN_MAX_INDBLKSHIFT,
|
|
|
- * the code will still work correctly on older pools.
|
|
|
- */
|
|
|
- min_ibs = max_ibs = dn->dn_indblkshift;
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
@@ -929,7 +923,7 @@ dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how)
|
|
|
uint64_t memory, asize, fsize, usize;
|
|
|
uint64_t towrite, tofree, tooverwrite, tounref, tohold, fudge;
|
|
|
|
|
|
- ASSERT3U(tx->tx_txg, ==, 0);
|
|
|
+ ASSERT0(tx->tx_txg);
|
|
|
|
|
|
if (tx->tx_err) {
|
|
|
DMU_TX_STAT_BUMP(dmu_tx_error);
|
|
|
@@ -1095,12 +1089,15 @@ dmu_tx_unassign(dmu_tx_t *tx)
|
|
|
int
|
|
|
dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how)
|
|
|
{
|
|
|
+ hrtime_t before, after;
|
|
|
int err;
|
|
|
|
|
|
ASSERT(tx->tx_txg == 0);
|
|
|
ASSERT(txg_how != 0);
|
|
|
ASSERT(!dsl_pool_sync_context(tx->tx_pool));
|
|
|
|
|
|
+ before = gethrtime();
|
|
|
+
|
|
|
while ((err = dmu_tx_try_assign(tx, txg_how)) != 0) {
|
|
|
dmu_tx_unassign(tx);
|
|
|
|
|
|
@@ -1112,6 +1109,11 @@ dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how)
|
|
|
|
|
|
txg_rele_to_quiesce(&tx->tx_txgh);
|
|
|
|
|
|
+ after = gethrtime();
|
|
|
+
|
|
|
+ dsl_pool_tx_assign_add_usecs(tx->tx_pool,
|
|
|
+ (after - before) / NSEC_PER_USEC);
|
|
|
+
|
|
|
return (0);
|
|
|
}
|
|
|
|
|
|
diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c
|
|
|
index 3a8a5e3..d8d6651 100644
|
|
|
--- a/module/zfs/dnode.c
|
|
|
+++ b/module/zfs/dnode.c
|
|
|
@@ -139,32 +139,32 @@ dnode_dest(void *arg, void *unused)
|
|
|
ASSERT(!list_link_active(&dn->dn_dirty_link[i]));
|
|
|
avl_destroy(&dn->dn_ranges[i]);
|
|
|
list_destroy(&dn->dn_dirty_records[i]);
|
|
|
- ASSERT3U(dn->dn_next_nblkptr[i], ==, 0);
|
|
|
- ASSERT3U(dn->dn_next_nlevels[i], ==, 0);
|
|
|
- ASSERT3U(dn->dn_next_indblkshift[i], ==, 0);
|
|
|
- ASSERT3U(dn->dn_next_bonustype[i], ==, 0);
|
|
|
- ASSERT3U(dn->dn_rm_spillblk[i], ==, 0);
|
|
|
- ASSERT3U(dn->dn_next_bonuslen[i], ==, 0);
|
|
|
- ASSERT3U(dn->dn_next_blksz[i], ==, 0);
|
|
|
+ ASSERT0(dn->dn_next_nblkptr[i]);
|
|
|
+ ASSERT0(dn->dn_next_nlevels[i]);
|
|
|
+ ASSERT0(dn->dn_next_indblkshift[i]);
|
|
|
+ ASSERT0(dn->dn_next_bonustype[i]);
|
|
|
+ ASSERT0(dn->dn_rm_spillblk[i]);
|
|
|
+ ASSERT0(dn->dn_next_bonuslen[i]);
|
|
|
+ ASSERT0(dn->dn_next_blksz[i]);
|
|
|
}
|
|
|
|
|
|
- ASSERT3U(dn->dn_allocated_txg, ==, 0);
|
|
|
- ASSERT3U(dn->dn_free_txg, ==, 0);
|
|
|
- ASSERT3U(dn->dn_assigned_txg, ==, 0);
|
|
|
- ASSERT3U(dn->dn_dirtyctx, ==, 0);
|
|
|
+ ASSERT0(dn->dn_allocated_txg);
|
|
|
+ ASSERT0(dn->dn_free_txg);
|
|
|
+ ASSERT0(dn->dn_assigned_txg);
|
|
|
+ ASSERT0(dn->dn_dirtyctx);
|
|
|
ASSERT3P(dn->dn_dirtyctx_firstset, ==, NULL);
|
|
|
ASSERT3P(dn->dn_bonus, ==, NULL);
|
|
|
ASSERT(!dn->dn_have_spill);
|
|
|
ASSERT3P(dn->dn_zio, ==, NULL);
|
|
|
- ASSERT3U(dn->dn_oldused, ==, 0);
|
|
|
- ASSERT3U(dn->dn_oldflags, ==, 0);
|
|
|
- ASSERT3U(dn->dn_olduid, ==, 0);
|
|
|
- ASSERT3U(dn->dn_oldgid, ==, 0);
|
|
|
- ASSERT3U(dn->dn_newuid, ==, 0);
|
|
|
- ASSERT3U(dn->dn_newgid, ==, 0);
|
|
|
- ASSERT3U(dn->dn_id_flags, ==, 0);
|
|
|
-
|
|
|
- ASSERT3U(dn->dn_dbufs_count, ==, 0);
|
|
|
+ ASSERT0(dn->dn_oldused);
|
|
|
+ ASSERT0(dn->dn_oldflags);
|
|
|
+ ASSERT0(dn->dn_olduid);
|
|
|
+ ASSERT0(dn->dn_oldgid);
|
|
|
+ ASSERT0(dn->dn_newuid);
|
|
|
+ ASSERT0(dn->dn_newgid);
|
|
|
+ ASSERT0(dn->dn_id_flags);
|
|
|
+
|
|
|
+ ASSERT0(dn->dn_dbufs_count);
|
|
|
list_destroy(&dn->dn_dbufs);
|
|
|
}
|
|
|
|
|
|
@@ -361,7 +361,7 @@ dnode_rm_spill(dnode_t *dn, dmu_tx_t *tx)
|
|
|
static void
|
|
|
dnode_setdblksz(dnode_t *dn, int size)
|
|
|
{
|
|
|
- ASSERT3U(P2PHASE(size, SPA_MINBLOCKSIZE), ==, 0);
|
|
|
+ ASSERT0(P2PHASE(size, SPA_MINBLOCKSIZE));
|
|
|
ASSERT3U(size, <=, SPA_MAXBLOCKSIZE);
|
|
|
ASSERT3U(size, >=, SPA_MINBLOCKSIZE);
|
|
|
ASSERT3U(size >> SPA_MINBLOCKSHIFT, <,
|
|
|
@@ -506,24 +506,24 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
|
|
|
ASSERT(DMU_OT_IS_VALID(bonustype));
|
|
|
ASSERT3U(bonuslen, <=, DN_MAX_BONUSLEN);
|
|
|
ASSERT(dn->dn_type == DMU_OT_NONE);
|
|
|
- ASSERT3U(dn->dn_maxblkid, ==, 0);
|
|
|
- ASSERT3U(dn->dn_allocated_txg, ==, 0);
|
|
|
- ASSERT3U(dn->dn_assigned_txg, ==, 0);
|
|
|
+ ASSERT0(dn->dn_maxblkid);
|
|
|
+ ASSERT0(dn->dn_allocated_txg);
|
|
|
+ ASSERT0(dn->dn_assigned_txg);
|
|
|
ASSERT(refcount_is_zero(&dn->dn_tx_holds));
|
|
|
ASSERT3U(refcount_count(&dn->dn_holds), <=, 1);
|
|
|
ASSERT3P(list_head(&dn->dn_dbufs), ==, NULL);
|
|
|
|
|
|
for (i = 0; i < TXG_SIZE; i++) {
|
|
|
- ASSERT3U(dn->dn_next_nblkptr[i], ==, 0);
|
|
|
- ASSERT3U(dn->dn_next_nlevels[i], ==, 0);
|
|
|
- ASSERT3U(dn->dn_next_indblkshift[i], ==, 0);
|
|
|
- ASSERT3U(dn->dn_next_bonuslen[i], ==, 0);
|
|
|
- ASSERT3U(dn->dn_next_bonustype[i], ==, 0);
|
|
|
- ASSERT3U(dn->dn_rm_spillblk[i], ==, 0);
|
|
|
- ASSERT3U(dn->dn_next_blksz[i], ==, 0);
|
|
|
+ ASSERT0(dn->dn_next_nblkptr[i]);
|
|
|
+ ASSERT0(dn->dn_next_nlevels[i]);
|
|
|
+ ASSERT0(dn->dn_next_indblkshift[i]);
|
|
|
+ ASSERT0(dn->dn_next_bonuslen[i]);
|
|
|
+ ASSERT0(dn->dn_next_bonustype[i]);
|
|
|
+ ASSERT0(dn->dn_rm_spillblk[i]);
|
|
|
+ ASSERT0(dn->dn_next_blksz[i]);
|
|
|
ASSERT(!list_link_active(&dn->dn_dirty_link[i]));
|
|
|
ASSERT3P(list_head(&dn->dn_dirty_records[i]), ==, NULL);
|
|
|
- ASSERT3U(avl_numnodes(&dn->dn_ranges[i]), ==, 0);
|
|
|
+ ASSERT0(avl_numnodes(&dn->dn_ranges[i]));
|
|
|
}
|
|
|
|
|
|
dn->dn_type = ot;
|
|
|
@@ -565,7 +565,7 @@ dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
|
|
|
|
|
|
ASSERT3U(blocksize, >=, SPA_MINBLOCKSIZE);
|
|
|
ASSERT3U(blocksize, <=, SPA_MAXBLOCKSIZE);
|
|
|
- ASSERT3U(blocksize % SPA_MINBLOCKSIZE, ==, 0);
|
|
|
+ ASSERT0(blocksize % SPA_MINBLOCKSIZE);
|
|
|
ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT || dmu_tx_private_ok(tx));
|
|
|
ASSERT(tx->tx_txg != 0);
|
|
|
ASSERT((bonustype == DMU_OT_NONE && bonuslen == 0) ||
|
|
|
@@ -1236,9 +1236,9 @@ dnode_setdirty(dnode_t *dn, dmu_tx_t *tx)
|
|
|
|
|
|
ASSERT(!refcount_is_zero(&dn->dn_holds) || list_head(&dn->dn_dbufs));
|
|
|
ASSERT(dn->dn_datablksz != 0);
|
|
|
- ASSERT3U(dn->dn_next_bonuslen[txg&TXG_MASK], ==, 0);
|
|
|
- ASSERT3U(dn->dn_next_blksz[txg&TXG_MASK], ==, 0);
|
|
|
- ASSERT3U(dn->dn_next_bonustype[txg&TXG_MASK], ==, 0);
|
|
|
+ ASSERT0(dn->dn_next_bonuslen[txg&TXG_MASK]);
|
|
|
+ ASSERT0(dn->dn_next_blksz[txg&TXG_MASK]);
|
|
|
+ ASSERT0(dn->dn_next_bonustype[txg&TXG_MASK]);
|
|
|
|
|
|
dprintf_ds(os->os_dsl_dataset, "obj=%llu txg=%llu\n",
|
|
|
dn->dn_object, txg);
|
|
|
@@ -1588,7 +1588,7 @@ dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx)
|
|
|
else
|
|
|
tail = P2PHASE(len, blksz);
|
|
|
|
|
|
- ASSERT3U(P2PHASE(off, blksz), ==, 0);
|
|
|
+ ASSERT0(P2PHASE(off, blksz));
|
|
|
/* zero out any partial block data at the end of the range */
|
|
|
if (tail) {
|
|
|
if (len < tail)
|
|
|
@@ -1770,7 +1770,7 @@ dnode_diduse_space(dnode_t *dn, int64_t delta)
|
|
|
space += delta;
|
|
|
if (spa_version(dn->dn_objset->os_spa) < SPA_VERSION_DNODE_BYTES) {
|
|
|
ASSERT((dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) == 0);
|
|
|
- ASSERT3U(P2PHASE(space, 1<<DEV_BSHIFT), ==, 0);
|
|
|
+ ASSERT0(P2PHASE(space, 1<<DEV_BSHIFT));
|
|
|
dn->dn_phys->dn_used = space >> DEV_BSHIFT;
|
|
|
} else {
|
|
|
dn->dn_phys->dn_used = space;
|
|
|
diff --git a/module/zfs/dnode_sync.c b/module/zfs/dnode_sync.c
|
|
|
index 58fa473..76e6037 100644
|
|
|
--- a/module/zfs/dnode_sync.c
|
|
|
+++ b/module/zfs/dnode_sync.c
|
|
|
@@ -274,7 +274,7 @@ free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks, int trunc,
|
|
|
continue;
|
|
|
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
|
|
err = dbuf_hold_impl(dn, db->db_level-1, i, TRUE, FTAG, &subdb);
|
|
|
- ASSERT3U(err, ==, 0);
|
|
|
+ ASSERT0(err);
|
|
|
rw_exit(&dn->dn_struct_rwlock);
|
|
|
|
|
|
if (free_children(subdb, blkid, nblks, trunc, tx) == ALL) {
|
|
|
@@ -294,7 +294,7 @@ free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks, int trunc,
|
|
|
continue;
|
|
|
else if (i == end && !trunc)
|
|
|
continue;
|
|
|
- ASSERT3U(bp->blk_birth, ==, 0);
|
|
|
+ ASSERT0(bp->blk_birth);
|
|
|
}
|
|
|
#endif
|
|
|
ASSERT(all || blocks_freed == 0 || db->db_last_dirty);
|
|
|
@@ -350,7 +350,7 @@ dnode_sync_free_range(dnode_t *dn, uint64_t blkid, uint64_t nblks, dmu_tx_t *tx)
|
|
|
continue;
|
|
|
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
|
|
err = dbuf_hold_impl(dn, dnlevel-1, i, TRUE, FTAG, &db);
|
|
|
- ASSERT3U(err, ==, 0);
|
|
|
+ ASSERT0(err);
|
|
|
rw_exit(&dn->dn_struct_rwlock);
|
|
|
|
|
|
if (free_children(db, blkid, nblks, trunc, tx) == ALL) {
|
|
|
@@ -475,7 +475,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
|
|
|
* Our contents should have been freed in dnode_sync() by the
|
|
|
* free range record inserted by the caller of dnode_free().
|
|
|
*/
|
|
|
- ASSERT3U(DN_USED_BYTES(dn->dn_phys), ==, 0);
|
|
|
+ ASSERT0(DN_USED_BYTES(dn->dn_phys));
|
|
|
ASSERT(BP_IS_HOLE(dn->dn_phys->dn_blkptr));
|
|
|
|
|
|
dnode_undirty_dbufs(&dn->dn_dirty_records[txgoff]);
|
|
|
diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c
|
|
|
index 55a8b0f..d9e8bd3 100644
|
|
|
--- a/module/zfs/dsl_dataset.c
|
|
|
+++ b/module/zfs/dsl_dataset.c
|
|
|
@@ -1271,7 +1271,7 @@ struct killarg {
|
|
|
|
|
|
/* ARGSUSED */
|
|
|
static int
|
|
|
-kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
|
|
|
+kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
|
|
const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
|
|
|
{
|
|
|
struct killarg *ka = arg;
|
|
|
@@ -1511,7 +1511,7 @@ remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj, dmu_tx_t *tx)
|
|
|
* remove this one.
|
|
|
*/
|
|
|
if (err != ENOENT) {
|
|
|
- VERIFY3U(err, ==, 0);
|
|
|
+ VERIFY0(err);
|
|
|
}
|
|
|
ASSERT3U(0, ==, zap_count(mos, ds->ds_phys->ds_next_clones_obj,
|
|
|
&count));
|
|
|
@@ -1598,7 +1598,7 @@ process_old_deadlist(dsl_dataset_t *ds, dsl_dataset_t *ds_prev,
|
|
|
poa.pio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
|
|
|
VERIFY3U(0, ==, bpobj_iterate(&ds_next->ds_deadlist.dl_bpobj,
|
|
|
process_old_cb, &poa, tx));
|
|
|
- VERIFY3U(zio_wait(poa.pio), ==, 0);
|
|
|
+ VERIFY0(zio_wait(poa.pio));
|
|
|
ASSERT3U(poa.used, ==, ds->ds_phys->ds_unique_bytes);
|
|
|
|
|
|
/* change snapused */
|
|
|
@@ -1633,7 +1633,7 @@ old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx)
|
|
|
err = traverse_dataset(ds,
|
|
|
ds->ds_phys->ds_prev_snap_txg, TRAVERSE_POST,
|
|
|
kill_blkptr, &ka);
|
|
|
- ASSERT3U(err, ==, 0);
|
|
|
+ ASSERT0(err);
|
|
|
ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || ds->ds_phys->ds_unique_bytes == 0);
|
|
|
|
|
|
return (err);
|
|
|
@@ -1685,7 +1685,7 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
|
|
|
psa.psa_effective_value = 0; /* predict default value */
|
|
|
|
|
|
dsl_dataset_set_reservation_sync(ds, &psa, tx);
|
|
|
- ASSERT3U(ds->ds_reserved, ==, 0);
|
|
|
+ ASSERT0(ds->ds_reserved);
|
|
|
}
|
|
|
|
|
|
ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
|
|
|
@@ -1952,7 +1952,7 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
|
|
|
|
|
|
err = dsl_dataset_snap_lookup(ds_head,
|
|
|
ds->ds_snapname, &val);
|
|
|
- ASSERT3U(err, ==, 0);
|
|
|
+ ASSERT0(err);
|
|
|
ASSERT3U(val, ==, obj);
|
|
|
}
|
|
|
#endif
|
|
|
@@ -2458,13 +2458,13 @@ dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
|
|
|
|
|
|
VERIFY(0 == dsl_dataset_get_snapname(ds));
|
|
|
err = dsl_dataset_snap_remove(hds, ds->ds_snapname, tx);
|
|
|
- ASSERT3U(err, ==, 0);
|
|
|
+ ASSERT0(err);
|
|
|
mutex_enter(&ds->ds_lock);
|
|
|
(void) strcpy(ds->ds_snapname, newsnapname);
|
|
|
mutex_exit(&ds->ds_lock);
|
|
|
err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj,
|
|
|
ds->ds_snapname, 8, 1, &ds->ds_object, tx);
|
|
|
- ASSERT3U(err, ==, 0);
|
|
|
+ ASSERT0(err);
|
|
|
|
|
|
spa_history_log_internal(LOG_DS_RENAME, dd->dd_pool->dp_spa, tx,
|
|
|
"dataset = %llu", ds->ds_object);
|
|
|
@@ -2917,7 +2917,7 @@ dsl_dataset_promote_sync(void *arg1, void *arg2, dmu_tx_t *tx)
|
|
|
zap_cursor_fini(&zc);
|
|
|
}
|
|
|
|
|
|
- ASSERT3U(dsl_prop_numcb(ds), ==, 0);
|
|
|
+ ASSERT0(dsl_prop_numcb(ds));
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
diff --git a/module/zfs/dsl_dir.c b/module/zfs/dsl_dir.c
|
|
|
index a25ebbf..69f68c2 100644
|
|
|
--- a/module/zfs/dsl_dir.c
|
|
|
+++ b/module/zfs/dsl_dir.c
|
|
|
@@ -500,10 +500,10 @@ dsl_dir_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
|
|
|
|
|
|
dsl_dir_set_reservation_sync(ds, &psa, tx);
|
|
|
|
|
|
- ASSERT3U(dd->dd_phys->dd_used_bytes, ==, 0);
|
|
|
- ASSERT3U(dd->dd_phys->dd_reserved, ==, 0);
|
|
|
+ ASSERT0(dd->dd_phys->dd_used_bytes);
|
|
|
+ ASSERT0(dd->dd_phys->dd_reserved);
|
|
|
for (t = 0; t < DD_USED_NUM; t++)
|
|
|
- ASSERT3U(dd->dd_phys->dd_used_breakdown[t], ==, 0);
|
|
|
+ ASSERT0(dd->dd_phys->dd_used_breakdown[t]);
|
|
|
|
|
|
VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_child_dir_zapobj, tx));
|
|
|
VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_props_zapobj, tx));
|
|
|
@@ -592,7 +592,7 @@ dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx)
|
|
|
ASSERT(dmu_tx_is_syncing(tx));
|
|
|
|
|
|
mutex_enter(&dd->dd_lock);
|
|
|
- ASSERT3U(dd->dd_tempreserved[tx->tx_txg&TXG_MASK], ==, 0);
|
|
|
+ ASSERT0(dd->dd_tempreserved[tx->tx_txg&TXG_MASK]);
|
|
|
dprintf_dd(dd, "txg=%llu towrite=%lluK\n", tx->tx_txg,
|
|
|
dd->dd_space_towrite[tx->tx_txg&TXG_MASK] / 1024);
|
|
|
dd->dd_space_towrite[tx->tx_txg&TXG_MASK] = 0;
|
|
|
@@ -1323,7 +1323,7 @@ dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
|
|
|
/* remove from old parent zapobj */
|
|
|
err = zap_remove(mos, dd->dd_parent->dd_phys->dd_child_dir_zapobj,
|
|
|
dd->dd_myname, tx);
|
|
|
- ASSERT3U(err, ==, 0);
|
|
|
+ ASSERT0(err);
|
|
|
|
|
|
(void) strcpy(dd->dd_myname, ra->mynewname);
|
|
|
dsl_dir_close(dd->dd_parent, dd);
|
|
|
@@ -1334,7 +1334,7 @@ dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
|
|
|
/* add to new parent zapobj */
|
|
|
err = zap_add(mos, ra->newparent->dd_phys->dd_child_dir_zapobj,
|
|
|
dd->dd_myname, 8, 1, &dd->dd_object, tx);
|
|
|
- ASSERT3U(err, ==, 0);
|
|
|
+ ASSERT0(err);
|
|
|
|
|
|
spa_history_log_internal(LOG_DS_RENAME, dd->dd_pool->dp_spa,
|
|
|
tx, "dataset = %llu", dd->dd_phys->dd_head_dataset_obj);
|
|
|
diff --git a/module/zfs/dsl_pool.c b/module/zfs/dsl_pool.c
|
|
|
index 704f034..7795d80 100644
|
|
|
--- a/module/zfs/dsl_pool.c
|
|
|
+++ b/module/zfs/dsl_pool.c
|
|
|
@@ -58,6 +58,63 @@ kmutex_t zfs_write_limit_lock;
|
|
|
|
|
|
static pgcnt_t old_physmem = 0;
|
|
|
|
|
|
+static void
|
|
|
+dsl_pool_tx_assign_init(dsl_pool_t *dp, unsigned int ndata)
|
|
|
+{
|
|
|
+ kstat_named_t *ks;
|
|
|
+ char name[KSTAT_STRLEN];
|
|
|
+ int i, data_size = ndata * sizeof(kstat_named_t);
|
|
|
+
|
|
|
+ (void) snprintf(name, KSTAT_STRLEN, "dmu_tx_assign-%s",
|
|
|
+ spa_name(dp->dp_spa));
|
|
|
+
|
|
|
+ dp->dp_tx_assign_size = ndata;
|
|
|
+
|
|
|
+ if (data_size)
|
|
|
+ dp->dp_tx_assign_buckets = kmem_alloc(data_size, KM_SLEEP);
|
|
|
+ else
|
|
|
+ dp->dp_tx_assign_buckets = NULL;
|
|
|
+
|
|
|
+ for (i = 0; i < dp->dp_tx_assign_size; i++) {
|
|
|
+ ks = &dp->dp_tx_assign_buckets[i];
|
|
|
+ ks->data_type = KSTAT_DATA_UINT64;
|
|
|
+ ks->value.ui64 = 0;
|
|
|
+ (void) snprintf(ks->name, KSTAT_STRLEN, "%u us", 1 << i);
|
|
|
+ }
|
|
|
+
|
|
|
+ dp->dp_tx_assign_kstat = kstat_create("zfs", 0, name, "misc",
|
|
|
+ KSTAT_TYPE_NAMED, 0, KSTAT_FLAG_VIRTUAL);
|
|
|
+
|
|
|
+ if (dp->dp_tx_assign_kstat) {
|
|
|
+ dp->dp_tx_assign_kstat->ks_data = dp->dp_tx_assign_buckets;
|
|
|
+ dp->dp_tx_assign_kstat->ks_ndata = dp->dp_tx_assign_size;
|
|
|
+ dp->dp_tx_assign_kstat->ks_data_size = data_size;
|
|
|
+ kstat_install(dp->dp_tx_assign_kstat);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+static void
|
|
|
+dsl_pool_tx_assign_destroy(dsl_pool_t *dp)
|
|
|
+{
|
|
|
+ if (dp->dp_tx_assign_buckets)
|
|
|
+ kmem_free(dp->dp_tx_assign_buckets,
|
|
|
+ dp->dp_tx_assign_size * sizeof(kstat_named_t));
|
|
|
+
|
|
|
+ if (dp->dp_tx_assign_kstat)
|
|
|
+ kstat_delete(dp->dp_tx_assign_kstat);
|
|
|
+}
|
|
|
+
|
|
|
+void
|
|
|
+dsl_pool_tx_assign_add_usecs(dsl_pool_t *dp, uint64_t usecs)
|
|
|
+{
|
|
|
+ uint64_t idx = 0;
|
|
|
+
|
|
|
+ while (((1 << idx) < usecs) && (idx < dp->dp_tx_assign_size - 1))
|
|
|
+ idx++;
|
|
|
+
|
|
|
+ atomic_inc_64(&dp->dp_tx_assign_buckets[idx].value.ui64);
|
|
|
+}
|
|
|
+
|
|
|
static int
|
|
|
dsl_pool_txg_history_update(kstat_t *ksp, int rw)
|
|
|
{
|
|
|
@@ -143,7 +200,7 @@ dsl_pool_txg_history_add(dsl_pool_t *dp, uint64_t txg)
|
|
|
{
|
|
|
txg_history_t *th, *rm;
|
|
|
|
|
|
- th = kmem_zalloc(sizeof(txg_history_t), KM_SLEEP);
|
|
|
+ th = kmem_zalloc(sizeof(txg_history_t), KM_PUSHPAGE);
|
|
|
mutex_init(&th->th_lock, NULL, MUTEX_DEFAULT, NULL);
|
|
|
th->th_kstat.txg = txg;
|
|
|
th->th_kstat.state = TXG_STATE_OPEN;
|
|
|
@@ -238,6 +295,7 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg)
|
|
|
1, 4, 0);
|
|
|
|
|
|
dsl_pool_txg_history_init(dp, txg);
|
|
|
+ dsl_pool_tx_assign_init(dp, 32);
|
|
|
|
|
|
return (dp);
|
|
|
}
|
|
|
@@ -379,6 +437,7 @@ dsl_pool_close(dsl_pool_t *dp)
|
|
|
arc_flush(dp->dp_spa);
|
|
|
txg_fini(dp);
|
|
|
dsl_scan_fini(dp);
|
|
|
+ dsl_pool_tx_assign_destroy(dp);
|
|
|
dsl_pool_txg_history_destroy(dp);
|
|
|
rw_destroy(&dp->dp_config_rwlock);
|
|
|
mutex_destroy(&dp->dp_lock);
|
|
|
@@ -405,7 +464,7 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg)
|
|
|
/* create the pool directory */
|
|
|
err = zap_create_claim(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
|
|
|
DMU_OT_OBJECT_DIRECTORY, DMU_OT_NONE, 0, tx);
|
|
|
- ASSERT3U(err, ==, 0);
|
|
|
+ ASSERT0(err);
|
|
|
|
|
|
/* Initialize scan structures */
|
|
|
VERIFY3U(0, ==, dsl_scan_init(dp, txg));
|
|
|
diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c
|
|
|
index 297caa0..34a4f03 100644
|
|
|
--- a/module/zfs/dsl_scan.c
|
|
|
+++ b/module/zfs/dsl_scan.c
|
|
|
@@ -363,24 +363,6 @@ dsl_free_sync(zio_t *pio, dsl_pool_t *dp, uint64_t txg, const blkptr_t *bpp)
|
|
|
zio_nowait(zio_free_sync(pio, dp->dp_spa, txg, bpp, pio->io_flags));
|
|
|
}
|
|
|
|
|
|
-int
|
|
|
-dsl_read(zio_t *pio, spa_t *spa, const blkptr_t *bpp, arc_buf_t *pbuf,
|
|
|
- arc_done_func_t *done, void *private, int priority, int zio_flags,
|
|
|
- uint32_t *arc_flags, const zbookmark_t *zb)
|
|
|
-{
|
|
|
- return (arc_read(pio, spa, bpp, pbuf, done, private,
|
|
|
- priority, zio_flags, arc_flags, zb));
|
|
|
-}
|
|
|
-
|
|
|
-int
|
|
|
-dsl_read_nolock(zio_t *pio, spa_t *spa, const blkptr_t *bpp,
|
|
|
- arc_done_func_t *done, void *private, int priority, int zio_flags,
|
|
|
- uint32_t *arc_flags, const zbookmark_t *zb)
|
|
|
-{
|
|
|
- return (arc_read_nolock(pio, spa, bpp, done, private,
|
|
|
- priority, zio_flags, arc_flags, zb));
|
|
|
-}
|
|
|
-
|
|
|
static uint64_t
|
|
|
dsl_scan_ds_maxtxg(dsl_dataset_t *ds)
|
|
|
{
|
|
|
@@ -551,12 +533,8 @@ dsl_scan_prefetch(dsl_scan_t *scn, arc_buf_t *buf, blkptr_t *bp,
|
|
|
|
|
|
SET_BOOKMARK(&czb, objset, object, BP_GET_LEVEL(bp), blkid);
|
|
|
|
|
|
- /*
|
|
|
- * XXX need to make sure all of these arc_read() prefetches are
|
|
|
- * done before setting xlateall (similar to dsl_read())
|
|
|
- */
|
|
|
(void) arc_read(scn->scn_zio_root, scn->scn_dp->dp_spa, bp,
|
|
|
- buf, NULL, NULL, ZIO_PRIORITY_ASYNC_READ,
|
|
|
+ NULL, NULL, ZIO_PRIORITY_ASYNC_READ,
|
|
|
ZIO_FLAG_CANFAIL | ZIO_FLAG_SCAN_THREAD, &flags, &czb);
|
|
|
}
|
|
|
|
|
|
@@ -614,8 +592,7 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype,
|
|
|
blkptr_t *cbp;
|
|
|
int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
|
|
|
|
|
|
- err = arc_read_nolock(NULL, dp->dp_spa, bp,
|
|
|
- arc_getbuf_func, bufp,
|
|
|
+ err = arc_read(NULL, dp->dp_spa, bp, arc_getbuf_func, bufp,
|
|
|
ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb);
|
|
|
if (err) {
|
|
|
scn->scn_phys.scn_errors++;
|
|
|
@@ -637,8 +614,7 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype,
|
|
|
} else if (BP_GET_TYPE(bp) == DMU_OT_USERGROUP_USED) {
|
|
|
uint32_t flags = ARC_WAIT;
|
|
|
|
|
|
- err = arc_read_nolock(NULL, dp->dp_spa, bp,
|
|
|
- arc_getbuf_func, bufp,
|
|
|
+ err = arc_read(NULL, dp->dp_spa, bp, arc_getbuf_func, bufp,
|
|
|
ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb);
|
|
|
if (err) {
|
|
|
scn->scn_phys.scn_errors++;
|
|
|
@@ -650,8 +626,7 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype,
|
|
|
int i, j;
|
|
|
int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
|
|
|
|
|
|
- err = arc_read_nolock(NULL, dp->dp_spa, bp,
|
|
|
- arc_getbuf_func, bufp,
|
|
|
+ err = arc_read(NULL, dp->dp_spa, bp, arc_getbuf_func, bufp,
|
|
|
ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb);
|
|
|
if (err) {
|
|
|
scn->scn_phys.scn_errors++;
|
|
|
@@ -673,8 +648,7 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype,
|
|
|
uint32_t flags = ARC_WAIT;
|
|
|
objset_phys_t *osp;
|
|
|
|
|
|
- err = arc_read_nolock(NULL, dp->dp_spa, bp,
|
|
|
- arc_getbuf_func, bufp,
|
|
|
+ err = arc_read(NULL, dp->dp_spa, bp, arc_getbuf_func, bufp,
|
|
|
ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb);
|
|
|
if (err) {
|
|
|
scn->scn_phys.scn_errors++;
|
|
|
diff --git a/module/zfs/dsl_synctask.c b/module/zfs/dsl_synctask.c
|
|
|
index f807ccc..75eb507 100644
|
|
|
--- a/module/zfs/dsl_synctask.c
|
|
|
+++ b/module/zfs/dsl_synctask.c
|
|
|
@@ -161,7 +161,7 @@ dsl_sync_task_group_sync(dsl_sync_task_group_t *dstg, dmu_tx_t *tx)
|
|
|
dsl_pool_t *dp = dstg->dstg_pool;
|
|
|
uint64_t quota, used;
|
|
|
|
|
|
- ASSERT3U(dstg->dstg_err, ==, 0);
|
|
|
+ ASSERT0(dstg->dstg_err);
|
|
|
|
|
|
/*
|
|
|
* Check for sufficient space. We just check against what's
|
|
|
diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c
|
|
|
index 03d5a1d..cd1b6ce 100644
|
|
|
--- a/module/zfs/metaslab.c
|
|
|
+++ b/module/zfs/metaslab.c
|
|
|
@@ -49,6 +49,14 @@ uint64_t metaslab_aliquot = 512ULL << 10;
|
|
|
uint64_t metaslab_gang_bang = SPA_MAXBLOCKSIZE + 1; /* force gang blocks */
|
|
|
|
|
|
/*
|
|
|
+ * The in-core space map representation is more compact than its on-disk form.
|
|
|
+ * The zfs_condense_pct determines how much more compact the in-core
|
|
|
+ * space_map representation must be before we compact it on-disk.
|
|
|
+ * Values should be greater than or equal to 100.
|
|
|
+ */
|
|
|
+int zfs_condense_pct = 200;
|
|
|
+
|
|
|
+/*
|
|
|
* This value defines the number of allowed allocation failures per vdev.
|
|
|
* If a device reaches this threshold in a given txg then we consider skipping
|
|
|
* allocations on that device.
|
|
|
@@ -204,9 +212,9 @@ metaslab_compare(const void *x1, const void *x2)
|
|
|
/*
|
|
|
* If the weights are identical, use the offset to force uniqueness.
|
|
|
*/
|
|
|
- if (m1->ms_map.sm_start < m2->ms_map.sm_start)
|
|
|
+ if (m1->ms_map->sm_start < m2->ms_map->sm_start)
|
|
|
return (-1);
|
|
|
- if (m1->ms_map.sm_start > m2->ms_map.sm_start)
|
|
|
+ if (m1->ms_map->sm_start > m2->ms_map->sm_start)
|
|
|
return (1);
|
|
|
|
|
|
ASSERT3P(m1, ==, m2);
|
|
|
@@ -739,14 +747,15 @@ metaslab_init(metaslab_group_t *mg, space_map_obj_t *smo,
|
|
|
* addition of new space; and for debugging, it ensures that we'd
|
|
|
* data fault on any attempt to use this metaslab before it's ready.
|
|
|
*/
|
|
|
- space_map_create(&msp->ms_map, start, size,
|
|
|
+ msp->ms_map = kmem_zalloc(sizeof (space_map_t), KM_PUSHPAGE);
|
|
|
+ space_map_create(msp->ms_map, start, size,
|
|
|
vd->vdev_ashift, &msp->ms_lock);
|
|
|
|
|
|
metaslab_group_add(mg, msp);
|
|
|
|
|
|
if (metaslab_debug && smo->smo_object != 0) {
|
|
|
mutex_enter(&msp->ms_lock);
|
|
|
- VERIFY(space_map_load(&msp->ms_map, mg->mg_class->mc_ops,
|
|
|
+ VERIFY(space_map_load(msp->ms_map, mg->mg_class->mc_ops,
|
|
|
SM_FREE, smo, spa_meta_objset(vd->vdev_spa)) == 0);
|
|
|
mutex_exit(&msp->ms_lock);
|
|
|
}
|
|
|
@@ -775,24 +784,29 @@ metaslab_fini(metaslab_t *msp)
|
|
|
int t;
|
|
|
|
|
|
vdev_space_update(mg->mg_vd,
|
|
|
- -msp->ms_smo.smo_alloc, 0, -msp->ms_map.sm_size);
|
|
|
+ -msp->ms_smo.smo_alloc, 0, -msp->ms_map->sm_size);
|
|
|
|
|
|
metaslab_group_remove(mg, msp);
|
|
|
|
|
|
mutex_enter(&msp->ms_lock);
|
|
|
|
|
|
- space_map_unload(&msp->ms_map);
|
|
|
- space_map_destroy(&msp->ms_map);
|
|
|
+ space_map_unload(msp->ms_map);
|
|
|
+ space_map_destroy(msp->ms_map);
|
|
|
+ kmem_free(msp->ms_map, sizeof (*msp->ms_map));
|
|
|
|
|
|
for (t = 0; t < TXG_SIZE; t++) {
|
|
|
- space_map_destroy(&msp->ms_allocmap[t]);
|
|
|
- space_map_destroy(&msp->ms_freemap[t]);
|
|
|
+ space_map_destroy(msp->ms_allocmap[t]);
|
|
|
+ space_map_destroy(msp->ms_freemap[t]);
|
|
|
+ kmem_free(msp->ms_allocmap[t], sizeof (*msp->ms_allocmap[t]));
|
|
|
+ kmem_free(msp->ms_freemap[t], sizeof (*msp->ms_freemap[t]));
|
|
|
}
|
|
|
|
|
|
- for (t = 0; t < TXG_DEFER_SIZE; t++)
|
|
|
- space_map_destroy(&msp->ms_defermap[t]);
|
|
|
+ for (t = 0; t < TXG_DEFER_SIZE; t++) {
|
|
|
+ space_map_destroy(msp->ms_defermap[t]);
|
|
|
+ kmem_free(msp->ms_defermap[t], sizeof (*msp->ms_defermap[t]));
|
|
|
+ }
|
|
|
|
|
|
- ASSERT3S(msp->ms_deferspace, ==, 0);
|
|
|
+ ASSERT0(msp->ms_deferspace);
|
|
|
|
|
|
mutex_exit(&msp->ms_lock);
|
|
|
mutex_destroy(&msp->ms_lock);
|
|
|
@@ -809,7 +823,7 @@ static uint64_t
|
|
|
metaslab_weight(metaslab_t *msp)
|
|
|
{
|
|
|
metaslab_group_t *mg = msp->ms_group;
|
|
|
- space_map_t *sm = &msp->ms_map;
|
|
|
+ space_map_t *sm = msp->ms_map;
|
|
|
space_map_obj_t *smo = &msp->ms_smo;
|
|
|
vdev_t *vd = mg->mg_vd;
|
|
|
uint64_t weight, space;
|
|
|
@@ -869,7 +883,7 @@ metaslab_prefetch(metaslab_group_t *mg)
|
|
|
* Prefetch the next potential metaslabs
|
|
|
*/
|
|
|
for (msp = avl_first(t), m = 0; msp; msp = AVL_NEXT(t, msp), m++) {
|
|
|
- space_map_t *sm = &msp->ms_map;
|
|
|
+ space_map_t *sm = msp->ms_map;
|
|
|
space_map_obj_t *smo = &msp->ms_smo;
|
|
|
|
|
|
/* If we have reached our prefetch limit then we're done */
|
|
|
@@ -890,7 +904,7 @@ static int
|
|
|
metaslab_activate(metaslab_t *msp, uint64_t activation_weight)
|
|
|
{
|
|
|
metaslab_group_t *mg = msp->ms_group;
|
|
|
- space_map_t *sm = &msp->ms_map;
|
|
|
+ space_map_t *sm = msp->ms_map;
|
|
|
space_map_ops_t *sm_ops = msp->ms_group->mg_class->mc_ops;
|
|
|
int t;
|
|
|
|
|
|
@@ -899,15 +913,16 @@ metaslab_activate(metaslab_t *msp, uint64_t activation_weight)
|
|
|
if ((msp->ms_weight & METASLAB_ACTIVE_MASK) == 0) {
|
|
|
space_map_load_wait(sm);
|
|
|
if (!sm->sm_loaded) {
|
|
|
- int error = space_map_load(sm, sm_ops, SM_FREE,
|
|
|
- &msp->ms_smo,
|
|
|
+ space_map_obj_t *smo = &msp->ms_smo;
|
|
|
+
|
|
|
+ int error = space_map_load(sm, sm_ops, SM_FREE, smo,
|
|
|
spa_meta_objset(msp->ms_group->mg_vd->vdev_spa));
|
|
|
if (error) {
|
|
|
metaslab_group_sort(msp->ms_group, msp, 0);
|
|
|
return (error);
|
|
|
}
|
|
|
for (t = 0; t < TXG_DEFER_SIZE; t++)
|
|
|
- space_map_walk(&msp->ms_defermap[t],
|
|
|
+ space_map_walk(msp->ms_defermap[t],
|
|
|
space_map_claim, sm);
|
|
|
|
|
|
}
|
|
|
@@ -938,12 +953,159 @@ metaslab_passivate(metaslab_t *msp, uint64_t size)
|
|
|
* this metaslab again. In that case, it had better be empty,
|
|
|
* or we would be leaving space on the table.
|
|
|
*/
|
|
|
- ASSERT(size >= SPA_MINBLOCKSIZE || msp->ms_map.sm_space == 0);
|
|
|
+ ASSERT(size >= SPA_MINBLOCKSIZE || msp->ms_map->sm_space == 0);
|
|
|
metaslab_group_sort(msp->ms_group, msp, MIN(msp->ms_weight, size));
|
|
|
ASSERT((msp->ms_weight & METASLAB_ACTIVE_MASK) == 0);
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
+ * Determine if the in-core space map representation can be condensed on-disk.
|
|
|
+ * We would like to use the following criteria to make our decision:
|
|
|
+ *
|
|
|
+ * 1. The size of the space map object should not dramatically increase as a
|
|
|
+ * result of writing out our in-core free map.
|
|
|
+ *
|
|
|
+ * 2. The minimal on-disk space map representation is zfs_condense_pct/100
|
|
|
+ * times the size than the in-core representation (i.e. zfs_condense_pct = 110
|
|
|
+ * and in-core = 1MB, minimal = 1.1.MB).
|
|
|
+ *
|
|
|
+ * Checking the first condition is tricky since we don't want to walk
|
|
|
+ * the entire AVL tree calculating the estimated on-disk size. Instead we
|
|
|
+ * use the size-ordered AVL tree in the space map and calculate the
|
|
|
+ * size required for the largest segment in our in-core free map. If the
|
|
|
+ * size required to represent that segment on disk is larger than the space
|
|
|
+ * map object then we avoid condensing this map.
|
|
|
+ *
|
|
|
+ * To determine the second criterion we use a best-case estimate and assume
|
|
|
+ * each segment can be represented on-disk as a single 64-bit entry. We refer
|
|
|
+ * to this best-case estimate as the space map's minimal form.
|
|
|
+ */
|
|
|
+static boolean_t
|
|
|
+metaslab_should_condense(metaslab_t *msp)
|
|
|
+{
|
|
|
+ space_map_t *sm = msp->ms_map;
|
|
|
+ space_map_obj_t *smo = &msp->ms_smo_syncing;
|
|
|
+ space_seg_t *ss;
|
|
|
+ uint64_t size, entries, segsz;
|
|
|
+
|
|
|
+ ASSERT(MUTEX_HELD(&msp->ms_lock));
|
|
|
+ ASSERT(sm->sm_loaded);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Use the sm_pp_root AVL tree, which is ordered by size, to obtain
|
|
|
+ * the largest segment in the in-core free map. If the tree is
|
|
|
+ * empty then we should condense the map.
|
|
|
+ */
|
|
|
+ ss = avl_last(sm->sm_pp_root);
|
|
|
+ if (ss == NULL)
|
|
|
+ return (B_TRUE);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Calculate the number of 64-bit entries this segment would
|
|
|
+ * require when written to disk. If this single segment would be
|
|
|
+ * larger on-disk than the entire current on-disk structure, then
|
|
|
+ * clearly condensing will increase the on-disk structure size.
|
|
|
+ */
|
|
|
+ size = (ss->ss_end - ss->ss_start) >> sm->sm_shift;
|
|
|
+ entries = size / (MIN(size, SM_RUN_MAX));
|
|
|
+ segsz = entries * sizeof (uint64_t);
|
|
|
+
|
|
|
+ return (segsz <= smo->smo_objsize &&
|
|
|
+ smo->smo_objsize >= (zfs_condense_pct *
|
|
|
+ sizeof (uint64_t) * avl_numnodes(&sm->sm_root)) / 100);
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Condense the on-disk space map representation to its minimized form.
|
|
|
+ * The minimized form consists of a small number of allocations followed by
|
|
|
+ * the in-core free map.
|
|
|
+ */
|
|
|
+static void
|
|
|
+metaslab_condense(metaslab_t *msp, uint64_t txg, dmu_tx_t *tx)
|
|
|
+{
|
|
|
+ spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
|
|
|
+ space_map_t *freemap = msp->ms_freemap[txg & TXG_MASK];
|
|
|
+ space_map_t condense_map;
|
|
|
+ space_map_t *sm = msp->ms_map;
|
|
|
+ objset_t *mos = spa_meta_objset(spa);
|
|
|
+ space_map_obj_t *smo = &msp->ms_smo_syncing;
|
|
|
+ int t;
|
|
|
+
|
|
|
+ ASSERT(MUTEX_HELD(&msp->ms_lock));
|
|
|
+ ASSERT3U(spa_sync_pass(spa), ==, 1);
|
|
|
+ ASSERT(sm->sm_loaded);
|
|
|
+
|
|
|
+ spa_dbgmsg(spa, "condensing: txg %llu, msp[%llu] %p, "
|
|
|
+ "smo size %llu, segments %lu", txg,
|
|
|
+ (msp->ms_map->sm_start / msp->ms_map->sm_size), msp,
|
|
|
+ smo->smo_objsize, avl_numnodes(&sm->sm_root));
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Create an map that is a 100% allocated map. We remove segments
|
|
|
+ * that have been freed in this txg, any deferred frees that exist,
|
|
|
+ * and any allocation in the future. Removing segments should be
|
|
|
+ * a relatively inexpensive operation since we expect these maps to
|
|
|
+ * a small number of nodes.
|
|
|
+ */
|
|
|
+ space_map_create(&condense_map, sm->sm_start, sm->sm_size,
|
|
|
+ sm->sm_shift, sm->sm_lock);
|
|
|
+ space_map_add(&condense_map, condense_map.sm_start,
|
|
|
+ condense_map.sm_size);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Remove what's been freed in this txg from the condense_map.
|
|
|
+ * Since we're in sync_pass 1, we know that all the frees from
|
|
|
+ * this txg are in the freemap.
|
|
|
+ */
|
|
|
+ space_map_walk(freemap, space_map_remove, &condense_map);
|
|
|
+
|
|
|
+ for (t = 0; t < TXG_DEFER_SIZE; t++)
|
|
|
+ space_map_walk(msp->ms_defermap[t],
|
|
|
+ space_map_remove, &condense_map);
|
|
|
+
|
|
|
+ for (t = 1; t < TXG_CONCURRENT_STATES; t++)
|
|
|
+ space_map_walk(msp->ms_allocmap[(txg + t) & TXG_MASK],
|
|
|
+ space_map_remove, &condense_map);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * We're about to drop the metaslab's lock thus allowing
|
|
|
+ * other consumers to change it's content. Set the
|
|
|
+ * space_map's sm_condensing flag to ensure that
|
|
|
+ * allocations on this metaslab do not occur while we're
|
|
|
+ * in the middle of committing it to disk. This is only critical
|
|
|
+ * for the ms_map as all other space_maps use per txg
|
|
|
+ * views of their content.
|
|
|
+ */
|
|
|
+ sm->sm_condensing = B_TRUE;
|
|
|
+
|
|
|
+ mutex_exit(&msp->ms_lock);
|
|
|
+ space_map_truncate(smo, mos, tx);
|
|
|
+ mutex_enter(&msp->ms_lock);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * While we would ideally like to create a space_map representation
|
|
|
+ * that consists only of allocation records, doing so can be
|
|
|
+ * prohibitively expensive because the in-core free map can be
|
|
|
+ * large, and therefore computationally expensive to subtract
|
|
|
+ * from the condense_map. Instead we sync out two maps, a cheap
|
|
|
+ * allocation only map followed by the in-core free map. While not
|
|
|
+ * optimal, this is typically close to optimal, and much cheaper to
|
|
|
+ * compute.
|
|
|
+ */
|
|
|
+ space_map_sync(&condense_map, SM_ALLOC, smo, mos, tx);
|
|
|
+ space_map_vacate(&condense_map, NULL, NULL);
|
|
|
+ space_map_destroy(&condense_map);
|
|
|
+
|
|
|
+ space_map_sync(sm, SM_FREE, smo, mos, tx);
|
|
|
+ sm->sm_condensing = B_FALSE;
|
|
|
+
|
|
|
+ spa_dbgmsg(spa, "condensed: txg %llu, msp[%llu] %p, "
|
|
|
+ "smo size %llu", txg,
|
|
|
+ (msp->ms_map->sm_start / msp->ms_map->sm_size), msp,
|
|
|
+ smo->smo_objsize);
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
* Write a metaslab to disk in the context of the specified transaction group.
|
|
|
*/
|
|
|
void
|
|
|
@@ -952,18 +1114,29 @@ metaslab_sync(metaslab_t *msp, uint64_t txg)
|
|
|
vdev_t *vd = msp->ms_group->mg_vd;
|
|
|
spa_t *spa = vd->vdev_spa;
|
|
|
objset_t *mos = spa_meta_objset(spa);
|
|
|
- space_map_t *allocmap = &msp->ms_allocmap[txg & TXG_MASK];
|
|
|
- space_map_t *freemap = &msp->ms_freemap[txg & TXG_MASK];
|
|
|
- space_map_t *freed_map = &msp->ms_freemap[TXG_CLEAN(txg) & TXG_MASK];
|
|
|
- space_map_t *sm = &msp->ms_map;
|
|
|
+ space_map_t *allocmap = msp->ms_allocmap[txg & TXG_MASK];
|
|
|
+ space_map_t **freemap = &msp->ms_freemap[txg & TXG_MASK];
|
|
|
+ space_map_t **freed_map = &msp->ms_freemap[TXG_CLEAN(txg) & TXG_MASK];
|
|
|
+ space_map_t *sm = msp->ms_map;
|
|
|
space_map_obj_t *smo = &msp->ms_smo_syncing;
|
|
|
dmu_buf_t *db;
|
|
|
dmu_tx_t *tx;
|
|
|
- int t;
|
|
|
|
|
|
ASSERT(!vd->vdev_ishole);
|
|
|
|
|
|
- if (allocmap->sm_space == 0 && freemap->sm_space == 0)
|
|
|
+ /*
|
|
|
+ * This metaslab has just been added so there's no work to do now.
|
|
|
+ */
|
|
|
+ if (*freemap == NULL) {
|
|
|
+ ASSERT3P(allocmap, ==, NULL);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ ASSERT3P(allocmap, !=, NULL);
|
|
|
+ ASSERT3P(*freemap, !=, NULL);
|
|
|
+ ASSERT3P(*freed_map, !=, NULL);
|
|
|
+
|
|
|
+ if (allocmap->sm_space == 0 && (*freemap)->sm_space == 0)
|
|
|
return;
|
|
|
|
|
|
/*
|
|
|
@@ -991,49 +1164,36 @@ metaslab_sync(metaslab_t *msp, uint64_t txg)
|
|
|
|
|
|
mutex_enter(&msp->ms_lock);
|
|
|
|
|
|
- space_map_walk(freemap, space_map_add, freed_map);
|
|
|
-
|
|
|
- if (sm->sm_loaded && spa_sync_pass(spa) == 1 && smo->smo_objsize >=
|
|
|
- 2 * sizeof (uint64_t) * avl_numnodes(&sm->sm_root)) {
|
|
|
- /*
|
|
|
- * The in-core space map representation is twice as compact
|
|
|
- * as the on-disk one, so it's time to condense the latter
|
|
|
- * by generating a pure allocmap from first principles.
|
|
|
- *
|
|
|
- * This metaslab is 100% allocated,
|
|
|
- * minus the content of the in-core map (sm),
|
|
|
- * minus what's been freed this txg (freed_map),
|
|
|
- * minus deferred frees (ms_defermap[]),
|
|
|
- * minus allocations from txgs in the future
|
|
|
- * (because they haven't been committed yet).
|
|
|
- */
|
|
|
- space_map_vacate(allocmap, NULL, NULL);
|
|
|
- space_map_vacate(freemap, NULL, NULL);
|
|
|
-
|
|
|
- space_map_add(allocmap, allocmap->sm_start, allocmap->sm_size);
|
|
|
-
|
|
|
- space_map_walk(sm, space_map_remove, allocmap);
|
|
|
- space_map_walk(freed_map, space_map_remove, allocmap);
|
|
|
-
|
|
|
- for (t = 0; t < TXG_DEFER_SIZE; t++)
|
|
|
- space_map_walk(&msp->ms_defermap[t],
|
|
|
- space_map_remove, allocmap);
|
|
|
+ if (sm->sm_loaded && spa_sync_pass(spa) == 1 &&
|
|
|
+ metaslab_should_condense(msp)) {
|
|
|
+ metaslab_condense(msp, txg, tx);
|
|
|
+ } else {
|
|
|
+ space_map_sync(allocmap, SM_ALLOC, smo, mos, tx);
|
|
|
+ space_map_sync(*freemap, SM_FREE, smo, mos, tx);
|
|
|
+ }
|
|
|
|
|
|
- for (t = 1; t < TXG_CONCURRENT_STATES; t++)
|
|
|
- space_map_walk(&msp->ms_allocmap[(txg + t) & TXG_MASK],
|
|
|
- space_map_remove, allocmap);
|
|
|
+ space_map_vacate(allocmap, NULL, NULL);
|
|
|
|
|
|
- mutex_exit(&msp->ms_lock);
|
|
|
- space_map_truncate(smo, mos, tx);
|
|
|
- mutex_enter(&msp->ms_lock);
|
|
|
+ /*
|
|
|
+ * For sync pass 1, we avoid walking the entire space map and
|
|
|
+ * instead will just swap the pointers for freemap and
|
|
|
+ * freed_map. We can safely do this since the freed_map is
|
|
|
+ * guaranteed to be empty on the initial pass.
|
|
|
+ */
|
|
|
+ if (spa_sync_pass(spa) == 1) {
|
|
|
+ ASSERT0((*freed_map)->sm_space);
|
|
|
+ ASSERT0(avl_numnodes(&(*freed_map)->sm_root));
|
|
|
+ space_map_swap(freemap, freed_map);
|
|
|
+ } else {
|
|
|
+ space_map_vacate(*freemap, space_map_add, *freed_map);
|
|
|
}
|
|
|
|
|
|
- space_map_sync(allocmap, SM_ALLOC, smo, mos, tx);
|
|
|
- space_map_sync(freemap, SM_FREE, smo, mos, tx);
|
|
|
+ ASSERT0(msp->ms_allocmap[txg & TXG_MASK]->sm_space);
|
|
|
+ ASSERT0(msp->ms_freemap[txg & TXG_MASK]->sm_space);
|
|
|
|
|
|
mutex_exit(&msp->ms_lock);
|
|
|
|
|
|
- VERIFY(0 == dmu_bonus_hold(mos, smo->smo_object, FTAG, &db));
|
|
|
+ VERIFY0(dmu_bonus_hold(mos, smo->smo_object, FTAG, &db));
|
|
|
dmu_buf_will_dirty(db, tx);
|
|
|
ASSERT3U(db->db_size, >=, sizeof (*smo));
|
|
|
bcopy(smo, db->db_data, sizeof (*smo));
|
|
|
@@ -1051,9 +1211,9 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg)
|
|
|
{
|
|
|
space_map_obj_t *smo = &msp->ms_smo;
|
|
|
space_map_obj_t *smosync = &msp->ms_smo_syncing;
|
|
|
- space_map_t *sm = &msp->ms_map;
|
|
|
- space_map_t *freed_map = &msp->ms_freemap[TXG_CLEAN(txg) & TXG_MASK];
|
|
|
- space_map_t *defer_map = &msp->ms_defermap[txg % TXG_DEFER_SIZE];
|
|
|
+ space_map_t *sm = msp->ms_map;
|
|
|
+ space_map_t *freed_map = msp->ms_freemap[TXG_CLEAN(txg) & TXG_MASK];
|
|
|
+ space_map_t *defer_map = msp->ms_defermap[txg % TXG_DEFER_SIZE];
|
|
|
metaslab_group_t *mg = msp->ms_group;
|
|
|
vdev_t *vd = mg->mg_vd;
|
|
|
int64_t alloc_delta, defer_delta;
|
|
|
@@ -1065,19 +1225,30 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg)
|
|
|
|
|
|
/*
|
|
|
* If this metaslab is just becoming available, initialize its
|
|
|
- * allocmaps and freemaps and add its capacity to the vdev.
|
|
|
+ * allocmaps, freemaps, and defermap and add its capacity to the vdev.
|
|
|
*/
|
|
|
- if (freed_map->sm_size == 0) {
|
|
|
+ if (freed_map == NULL) {
|
|
|
+ ASSERT(defer_map == NULL);
|
|
|
for (t = 0; t < TXG_SIZE; t++) {
|
|
|
- space_map_create(&msp->ms_allocmap[t], sm->sm_start,
|
|
|
+ msp->ms_allocmap[t] = kmem_zalloc(sizeof (space_map_t),
|
|
|
+ KM_PUSHPAGE);
|
|
|
+ space_map_create(msp->ms_allocmap[t], sm->sm_start,
|
|
|
sm->sm_size, sm->sm_shift, sm->sm_lock);
|
|
|
- space_map_create(&msp->ms_freemap[t], sm->sm_start,
|
|
|
+ msp->ms_freemap[t] = kmem_zalloc(sizeof (space_map_t),
|
|
|
+ KM_PUSHPAGE);
|
|
|
+ space_map_create(msp->ms_freemap[t], sm->sm_start,
|
|
|
sm->sm_size, sm->sm_shift, sm->sm_lock);
|
|
|
}
|
|
|
|
|
|
- for (t = 0; t < TXG_DEFER_SIZE; t++)
|
|
|
- space_map_create(&msp->ms_defermap[t], sm->sm_start,
|
|
|
+ for (t = 0; t < TXG_DEFER_SIZE; t++) {
|
|
|
+ msp->ms_defermap[t] = kmem_zalloc(sizeof (space_map_t),
|
|
|
+ KM_PUSHPAGE);
|
|
|
+ space_map_create(msp->ms_defermap[t], sm->sm_start,
|
|
|
sm->sm_size, sm->sm_shift, sm->sm_lock);
|
|
|
+ }
|
|
|
+
|
|
|
+ freed_map = msp->ms_freemap[TXG_CLEAN(txg) & TXG_MASK];
|
|
|
+ defer_map = msp->ms_defermap[txg % TXG_DEFER_SIZE];
|
|
|
|
|
|
vdev_space_update(vd, 0, 0, sm->sm_size);
|
|
|
}
|
|
|
@@ -1087,8 +1258,8 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg)
|
|
|
|
|
|
vdev_space_update(vd, alloc_delta + defer_delta, defer_delta, 0);
|
|
|
|
|
|
- ASSERT(msp->ms_allocmap[txg & TXG_MASK].sm_space == 0);
|
|
|
- ASSERT(msp->ms_freemap[txg & TXG_MASK].sm_space == 0);
|
|
|
+ ASSERT(msp->ms_allocmap[txg & TXG_MASK]->sm_space == 0);
|
|
|
+ ASSERT(msp->ms_freemap[txg & TXG_MASK]->sm_space == 0);
|
|
|
|
|
|
/*
|
|
|
* If there's a space_map_load() in progress, wait for it to complete
|
|
|
@@ -1122,7 +1293,7 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg)
|
|
|
int evictable = 1;
|
|
|
|
|
|
for (t = 1; t < TXG_CONCURRENT_STATES; t++)
|
|
|
- if (msp->ms_allocmap[(txg + t) & TXG_MASK].sm_space)
|
|
|
+ if (msp->ms_allocmap[(txg + t) & TXG_MASK]->sm_space)
|
|
|
evictable = 0;
|
|
|
|
|
|
if (evictable && !metaslab_debug)
|
|
|
@@ -1148,7 +1319,7 @@ metaslab_sync_reassess(metaslab_group_t *mg)
|
|
|
for (m = 0; m < vd->vdev_ms_count; m++) {
|
|
|
metaslab_t *msp = vd->vdev_ms[m];
|
|
|
|
|
|
- if (msp->ms_map.sm_start > mg->mg_bonus_area)
|
|
|
+ if (msp->ms_map->sm_start > mg->mg_bonus_area)
|
|
|
break;
|
|
|
|
|
|
mutex_enter(&msp->ms_lock);
|
|
|
@@ -1169,7 +1340,7 @@ metaslab_distance(metaslab_t *msp, dva_t *dva)
|
|
|
{
|
|
|
uint64_t ms_shift = msp->ms_group->mg_vd->vdev_ms_shift;
|
|
|
uint64_t offset = DVA_GET_OFFSET(dva) >> ms_shift;
|
|
|
- uint64_t start = msp->ms_map.sm_start >> ms_shift;
|
|
|
+ uint64_t start = msp->ms_map->sm_start >> ms_shift;
|
|
|
|
|
|
if (msp->ms_group->mg_vd->vdev_id != DVA_GET_VDEV(dva))
|
|
|
return (1ULL << 63);
|
|
|
@@ -1257,6 +1428,16 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize,
|
|
|
mutex_enter(&msp->ms_lock);
|
|
|
|
|
|
/*
|
|
|
+ * If this metaslab is currently condensing then pick again as
|
|
|
+ * we can't manipulate this metaslab until it's committed
|
|
|
+ * to disk.
|
|
|
+ */
|
|
|
+ if (msp->ms_map->sm_condensing) {
|
|
|
+ mutex_exit(&msp->ms_lock);
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
* Ensure that the metaslab we have selected is still
|
|
|
* capable of handling our request. It's possible that
|
|
|
* another thread may have changed the weight while we
|
|
|
@@ -1282,20 +1463,20 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize,
|
|
|
continue;
|
|
|
}
|
|
|
|
|
|
- if ((offset = space_map_alloc(&msp->ms_map, asize)) != -1ULL)
|
|
|
+ if ((offset = space_map_alloc(msp->ms_map, asize)) != -1ULL)
|
|
|
break;
|
|
|
|
|
|
atomic_inc_64(&mg->mg_alloc_failures);
|
|
|
|
|
|
- metaslab_passivate(msp, space_map_maxsize(&msp->ms_map));
|
|
|
+ metaslab_passivate(msp, space_map_maxsize(msp->ms_map));
|
|
|
|
|
|
mutex_exit(&msp->ms_lock);
|
|
|
}
|
|
|
|
|
|
- if (msp->ms_allocmap[txg & TXG_MASK].sm_space == 0)
|
|
|
+ if (msp->ms_allocmap[txg & TXG_MASK]->sm_space == 0)
|
|
|
vdev_dirty(mg->mg_vd, VDD_METASLAB, msp, txg);
|
|
|
|
|
|
- space_map_add(&msp->ms_allocmap[txg & TXG_MASK], offset, asize);
|
|
|
+ space_map_add(msp->ms_allocmap[txg & TXG_MASK], offset, asize);
|
|
|
|
|
|
mutex_exit(&msp->ms_lock);
|
|
|
|
|
|
@@ -1545,13 +1726,13 @@ metaslab_free_dva(spa_t *spa, const dva_t *dva, uint64_t txg, boolean_t now)
|
|
|
mutex_enter(&msp->ms_lock);
|
|
|
|
|
|
if (now) {
|
|
|
- space_map_remove(&msp->ms_allocmap[txg & TXG_MASK],
|
|
|
+ space_map_remove(msp->ms_allocmap[txg & TXG_MASK],
|
|
|
offset, size);
|
|
|
- space_map_free(&msp->ms_map, offset, size);
|
|
|
+ space_map_free(msp->ms_map, offset, size);
|
|
|
} else {
|
|
|
- if (msp->ms_freemap[txg & TXG_MASK].sm_space == 0)
|
|
|
+ if (msp->ms_freemap[txg & TXG_MASK]->sm_space == 0)
|
|
|
vdev_dirty(vd, VDD_METASLAB, msp, txg);
|
|
|
- space_map_add(&msp->ms_freemap[txg & TXG_MASK], offset, size);
|
|
|
+ space_map_add(msp->ms_freemap[txg & TXG_MASK], offset, size);
|
|
|
}
|
|
|
|
|
|
mutex_exit(&msp->ms_lock);
|
|
|
@@ -1586,10 +1767,10 @@ metaslab_claim_dva(spa_t *spa, const dva_t *dva, uint64_t txg)
|
|
|
|
|
|
mutex_enter(&msp->ms_lock);
|
|
|
|
|
|
- if ((txg != 0 && spa_writeable(spa)) || !msp->ms_map.sm_loaded)
|
|
|
+ if ((txg != 0 && spa_writeable(spa)) || !msp->ms_map->sm_loaded)
|
|
|
error = metaslab_activate(msp, METASLAB_WEIGHT_SECONDARY);
|
|
|
|
|
|
- if (error == 0 && !space_map_contains(&msp->ms_map, offset, size))
|
|
|
+ if (error == 0 && !space_map_contains(msp->ms_map, offset, size))
|
|
|
error = ENOENT;
|
|
|
|
|
|
if (error || txg == 0) { /* txg == 0 indicates dry run */
|
|
|
@@ -1597,12 +1778,12 @@ metaslab_claim_dva(spa_t *spa, const dva_t *dva, uint64_t txg)
|
|
|
return (error);
|
|
|
}
|
|
|
|
|
|
- space_map_claim(&msp->ms_map, offset, size);
|
|
|
+ space_map_claim(msp->ms_map, offset, size);
|
|
|
|
|
|
if (spa_writeable(spa)) { /* don't dirty if we're zdb(1M) */
|
|
|
- if (msp->ms_allocmap[txg & TXG_MASK].sm_space == 0)
|
|
|
+ if (msp->ms_allocmap[txg & TXG_MASK]->sm_space == 0)
|
|
|
vdev_dirty(vd, VDD_METASLAB, msp, txg);
|
|
|
- space_map_add(&msp->ms_allocmap[txg & TXG_MASK], offset, size);
|
|
|
+ space_map_add(msp->ms_allocmap[txg & TXG_MASK], offset, size);
|
|
|
}
|
|
|
|
|
|
mutex_exit(&msp->ms_lock);
|
|
|
diff --git a/module/zfs/spa.c b/module/zfs/spa.c
|
|
|
index 0babf47..65f78b7 100644
|
|
|
--- a/module/zfs/spa.c
|
|
|
+++ b/module/zfs/spa.c
|
|
|
@@ -64,6 +64,7 @@
|
|
|
#include <sys/zfs_ioctl.h>
|
|
|
#include <sys/dsl_scan.h>
|
|
|
#include <sys/zfeature.h>
|
|
|
+#include <sys/zvol.h>
|
|
|
|
|
|
#ifdef _KERNEL
|
|
|
#include <sys/bootprops.h>
|
|
|
@@ -78,23 +79,25 @@
|
|
|
#include "zfs_comutil.h"
|
|
|
|
|
|
typedef enum zti_modes {
|
|
|
- zti_mode_fixed, /* value is # of threads (min 1) */
|
|
|
- zti_mode_online_percent, /* value is % of online CPUs */
|
|
|
- zti_mode_batch, /* cpu-intensive; value is ignored */
|
|
|
- zti_mode_null, /* don't create a taskq */
|
|
|
- zti_nmodes
|
|
|
+ ZTI_MODE_FIXED, /* value is # of threads (min 1) */
|
|
|
+ ZTI_MODE_ONLINE_PERCENT, /* value is % of online CPUs */
|
|
|
+ ZTI_MODE_BATCH, /* cpu-intensive; value is ignored */
|
|
|
+ ZTI_MODE_NULL, /* don't create a taskq */
|
|
|
+ ZTI_NMODES
|
|
|
} zti_modes_t;
|
|
|
|
|
|
-#define ZTI_FIX(n) { zti_mode_fixed, (n) }
|
|
|
-#define ZTI_PCT(n) { zti_mode_online_percent, (n) }
|
|
|
-#define ZTI_BATCH { zti_mode_batch, 0 }
|
|
|
-#define ZTI_NULL { zti_mode_null, 0 }
|
|
|
+#define ZTI_P(n, q) { ZTI_MODE_FIXED, (n), (q) }
|
|
|
+#define ZTI_PCT(n) { ZTI_MODE_ONLINE_PERCENT, (n), 1 }
|
|
|
+#define ZTI_BATCH { ZTI_MODE_BATCH, 0, 1 }
|
|
|
+#define ZTI_NULL { ZTI_MODE_NULL, 0, 0 }
|
|
|
|
|
|
-#define ZTI_ONE ZTI_FIX(1)
|
|
|
+#define ZTI_N(n) ZTI_P(n, 1)
|
|
|
+#define ZTI_ONE ZTI_N(1)
|
|
|
|
|
|
typedef struct zio_taskq_info {
|
|
|
- enum zti_modes zti_mode;
|
|
|
+ zti_modes_t zti_mode;
|
|
|
uint_t zti_value;
|
|
|
+ uint_t zti_count;
|
|
|
} zio_taskq_info_t;
|
|
|
|
|
|
static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = {
|
|
|
@@ -102,17 +105,30 @@ static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = {
|
|
|
};
|
|
|
|
|
|
/*
|
|
|
- * Define the taskq threads for the following I/O types:
|
|
|
- * NULL, READ, WRITE, FREE, CLAIM, and IOCTL
|
|
|
+ * This table defines the taskq settings for each ZFS I/O type. When
|
|
|
+ * initializing a pool, we use this table to create an appropriately sized
|
|
|
+ * taskq. Some operations are low volume and therefore have a small, static
|
|
|
+ * number of threads assigned to their taskqs using the ZTI_N(#) or ZTI_ONE
|
|
|
+ * macros. Other operations process a large amount of data; the ZTI_BATCH
|
|
|
+ * macro causes us to create a taskq oriented for throughput. Some operations
|
|
|
+ * are so high frequency and short-lived that the taskq itself can become a a
|
|
|
+ * point of lock contention. The ZTI_P(#, #) macro indicates that we need an
|
|
|
+ * additional degree of parallelism specified by the number of threads per-
|
|
|
+ * taskq and the number of taskqs; when dispatching an event in this case, the
|
|
|
+ * particular taskq is chosen at random.
|
|
|
+ *
|
|
|
+ * The different taskq priorities are to handle the different contexts (issue
|
|
|
+ * and interrupt) and then to reserve threads for ZIO_PRIORITY_NOW I/Os that
|
|
|
+ * need to be handled with minimum delay.
|
|
|
*/
|
|
|
const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
|
|
|
/* ISSUE ISSUE_HIGH INTR INTR_HIGH */
|
|
|
- { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL },
|
|
|
- { ZTI_FIX(8), ZTI_NULL, ZTI_BATCH, ZTI_NULL },
|
|
|
- { ZTI_BATCH, ZTI_FIX(5), ZTI_FIX(16), ZTI_FIX(5) },
|
|
|
- { ZTI_PCT(100), ZTI_NULL, ZTI_ONE, ZTI_NULL },
|
|
|
- { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL },
|
|
|
- { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL },
|
|
|
+ { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* NULL */
|
|
|
+ { ZTI_N(8), ZTI_NULL, ZTI_BATCH, ZTI_NULL }, /* READ */
|
|
|
+ { ZTI_BATCH, ZTI_N(5), ZTI_N(16), ZTI_N(5) }, /* WRITE */
|
|
|
+ { ZTI_P(4, 8), ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* FREE */
|
|
|
+ { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* CLAIM */
|
|
|
+ { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* IOCTL */
|
|
|
};
|
|
|
|
|
|
static dsl_syncfunc_t spa_sync_version;
|
|
|
@@ -794,48 +810,146 @@ spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub)
|
|
|
offsetof(spa_error_entry_t, se_avl));
|
|
|
}
|
|
|
|
|
|
-static taskq_t *
|
|
|
-spa_taskq_create(spa_t *spa, const char *name, enum zti_modes mode,
|
|
|
- uint_t value)
|
|
|
+static void
|
|
|
+spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
|
|
|
{
|
|
|
- uint_t flags = TASKQ_PREPOPULATE;
|
|
|
+ const zio_taskq_info_t *ztip = &zio_taskqs[t][q];
|
|
|
+ enum zti_modes mode = ztip->zti_mode;
|
|
|
+ uint_t value = ztip->zti_value;
|
|
|
+ uint_t count = ztip->zti_count;
|
|
|
+ spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
|
|
|
+ char name[32];
|
|
|
+ uint_t i, flags = 0;
|
|
|
boolean_t batch = B_FALSE;
|
|
|
|
|
|
- switch (mode) {
|
|
|
- case zti_mode_null:
|
|
|
- return (NULL); /* no taskq needed */
|
|
|
+ if (mode == ZTI_MODE_NULL) {
|
|
|
+ tqs->stqs_count = 0;
|
|
|
+ tqs->stqs_taskq = NULL;
|
|
|
+ return;
|
|
|
+ }
|
|
|
|
|
|
- case zti_mode_fixed:
|
|
|
- ASSERT3U(value, >=, 1);
|
|
|
- value = MAX(value, 1);
|
|
|
- break;
|
|
|
+ ASSERT3U(count, >, 0);
|
|
|
|
|
|
- case zti_mode_batch:
|
|
|
- batch = B_TRUE;
|
|
|
- flags |= TASKQ_THREADS_CPU_PCT;
|
|
|
- value = zio_taskq_batch_pct;
|
|
|
- break;
|
|
|
+ tqs->stqs_count = count;
|
|
|
+ tqs->stqs_taskq = kmem_alloc(count * sizeof (taskq_t *), KM_SLEEP);
|
|
|
|
|
|
- case zti_mode_online_percent:
|
|
|
- flags |= TASKQ_THREADS_CPU_PCT;
|
|
|
- break;
|
|
|
+ for (i = 0; i < count; i++) {
|
|
|
+ taskq_t *tq;
|
|
|
|
|
|
- default:
|
|
|
- panic("unrecognized mode for %s taskq (%u:%u) in "
|
|
|
- "spa_activate()",
|
|
|
- name, mode, value);
|
|
|
- break;
|
|
|
+ switch (mode) {
|
|
|
+ case ZTI_MODE_FIXED:
|
|
|
+ ASSERT3U(value, >=, 1);
|
|
|
+ value = MAX(value, 1);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case ZTI_MODE_BATCH:
|
|
|
+ batch = B_TRUE;
|
|
|
+ flags |= TASKQ_THREADS_CPU_PCT;
|
|
|
+ value = zio_taskq_batch_pct;
|
|
|
+ break;
|
|
|
+
|
|
|
+ case ZTI_MODE_ONLINE_PERCENT:
|
|
|
+ flags |= TASKQ_THREADS_CPU_PCT;
|
|
|
+ break;
|
|
|
+
|
|
|
+ default:
|
|
|
+ panic("unrecognized mode for %s_%s taskq (%u:%u) in "
|
|
|
+ "spa_activate()",
|
|
|
+ zio_type_name[t], zio_taskq_types[q], mode, value);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (count > 1) {
|
|
|
+ (void) snprintf(name, sizeof (name), "%s_%s_%u",
|
|
|
+ zio_type_name[t], zio_taskq_types[q], i);
|
|
|
+ } else {
|
|
|
+ (void) snprintf(name, sizeof (name), "%s_%s",
|
|
|
+ zio_type_name[t], zio_taskq_types[q]);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (zio_taskq_sysdc && spa->spa_proc != &p0) {
|
|
|
+ if (batch)
|
|
|
+ flags |= TASKQ_DC_BATCH;
|
|
|
+
|
|
|
+ tq = taskq_create_sysdc(name, value, 50, INT_MAX,
|
|
|
+ spa->spa_proc, zio_taskq_basedc, flags);
|
|
|
+ } else {
|
|
|
+ tq = taskq_create_proc(name, value, maxclsyspri, 50,
|
|
|
+ INT_MAX, spa->spa_proc, flags);
|
|
|
+ }
|
|
|
+
|
|
|
+ tqs->stqs_taskq[i] = tq;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+static void
|
|
|
+spa_taskqs_fini(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
|
|
|
+{
|
|
|
+ spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
|
|
|
+ uint_t i;
|
|
|
+
|
|
|
+ if (tqs->stqs_taskq == NULL) {
|
|
|
+ ASSERT3U(tqs->stqs_count, ==, 0);
|
|
|
+ return;
|
|
|
}
|
|
|
|
|
|
- if (zio_taskq_sysdc && spa->spa_proc != &p0) {
|
|
|
- if (batch)
|
|
|
- flags |= TASKQ_DC_BATCH;
|
|
|
+ for (i = 0; i < tqs->stqs_count; i++) {
|
|
|
+ ASSERT3P(tqs->stqs_taskq[i], !=, NULL);
|
|
|
+ taskq_destroy(tqs->stqs_taskq[i]);
|
|
|
+ }
|
|
|
+
|
|
|
+ kmem_free(tqs->stqs_taskq, tqs->stqs_count * sizeof (taskq_t *));
|
|
|
+ tqs->stqs_taskq = NULL;
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Dispatch a task to the appropriate taskq for the ZFS I/O type and priority.
|
|
|
+ * Note that a type may have multiple discrete taskqs to avoid lock contention
|
|
|
+ * on the taskq itself. In that case we choose which taskq at random by using
|
|
|
+ * the low bits of gethrtime().
|
|
|
+ */
|
|
|
+void
|
|
|
+spa_taskq_dispatch_ent(spa_t *spa, zio_type_t t, zio_taskq_type_t q,
|
|
|
+ task_func_t *func, void *arg, uint_t flags, taskq_ent_t *ent)
|
|
|
+{
|
|
|
+ spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
|
|
|
+ taskq_t *tq;
|
|
|
+
|
|
|
+ ASSERT3P(tqs->stqs_taskq, !=, NULL);
|
|
|
+ ASSERT3U(tqs->stqs_count, !=, 0);
|
|
|
+
|
|
|
+ if (tqs->stqs_count == 1) {
|
|
|
+ tq = tqs->stqs_taskq[0];
|
|
|
+ } else {
|
|
|
+ tq = tqs->stqs_taskq[((uint64_t)gethrtime()) % tqs->stqs_count];
|
|
|
+ }
|
|
|
+
|
|
|
+ taskq_dispatch_ent(tq, func, arg, flags, ent);
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Same as spa_taskq_dispatch_ent() but block on the task until completion.
|
|
|
+ */
|
|
|
+void
|
|
|
+spa_taskq_dispatch_sync(spa_t *spa, zio_type_t t, zio_taskq_type_t q,
|
|
|
+ task_func_t *func, void *arg, uint_t flags)
|
|
|
+{
|
|
|
+ spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
|
|
|
+ taskq_t *tq;
|
|
|
+ taskqid_t id;
|
|
|
+
|
|
|
+ ASSERT3P(tqs->stqs_taskq, !=, NULL);
|
|
|
+ ASSERT3U(tqs->stqs_count, !=, 0);
|
|
|
|
|
|
- return (taskq_create_sysdc(name, value, 50, INT_MAX,
|
|
|
- spa->spa_proc, zio_taskq_basedc, flags));
|
|
|
+ if (tqs->stqs_count == 1) {
|
|
|
+ tq = tqs->stqs_taskq[0];
|
|
|
+ } else {
|
|
|
+ tq = tqs->stqs_taskq[((uint64_t)gethrtime()) % tqs->stqs_count];
|
|
|
}
|
|
|
- return (taskq_create_proc(name, value, maxclsyspri, 50, INT_MAX,
|
|
|
- spa->spa_proc, flags));
|
|
|
+
|
|
|
+ id = taskq_dispatch(tq, func, arg, flags);
|
|
|
+ if (id)
|
|
|
+ taskq_wait_id(tq, id);
|
|
|
}
|
|
|
|
|
|
static void
|
|
|
@@ -845,16 +959,7 @@ spa_create_zio_taskqs(spa_t *spa)
|
|
|
|
|
|
for (t = 0; t < ZIO_TYPES; t++) {
|
|
|
for (q = 0; q < ZIO_TASKQ_TYPES; q++) {
|
|
|
- const zio_taskq_info_t *ztip = &zio_taskqs[t][q];
|
|
|
- enum zti_modes mode = ztip->zti_mode;
|
|
|
- uint_t value = ztip->zti_value;
|
|
|
- char name[32];
|
|
|
-
|
|
|
- (void) snprintf(name, sizeof (name),
|
|
|
- "%s_%s", zio_type_name[t], zio_taskq_types[q]);
|
|
|
-
|
|
|
- spa->spa_zio_taskq[t][q] =
|
|
|
- spa_taskq_create(spa, name, mode, value);
|
|
|
+ spa_taskqs_init(spa, t, q);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
@@ -1013,11 +1118,11 @@ spa_deactivate(spa_t *spa)
|
|
|
list_destroy(&spa->spa_config_dirty_list);
|
|
|
list_destroy(&spa->spa_state_dirty_list);
|
|
|
|
|
|
+ taskq_cancel_id(system_taskq, spa->spa_deadman_tqid);
|
|
|
+
|
|
|
for (t = 0; t < ZIO_TYPES; t++) {
|
|
|
for (q = 0; q < ZIO_TASKQ_TYPES; q++) {
|
|
|
- if (spa->spa_zio_taskq[t][q] != NULL)
|
|
|
- taskq_destroy(spa->spa_zio_taskq[t][q]);
|
|
|
- spa->spa_zio_taskq[t][q] = NULL;
|
|
|
+ spa_taskqs_fini(spa, t, q);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
@@ -1752,7 +1857,7 @@ spa_load_verify_done(zio_t *zio)
|
|
|
/*ARGSUSED*/
|
|
|
static int
|
|
|
spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
|
|
- arc_buf_t *pbuf, const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
|
|
|
+ const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
|
|
|
{
|
|
|
if (bp != NULL) {
|
|
|
zio_t *rio = arg;
|
|
|
@@ -2752,6 +2857,7 @@ spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy,
|
|
|
spa_load_state_t state = SPA_LOAD_OPEN;
|
|
|
int error;
|
|
|
int locked = B_FALSE;
|
|
|
+ int firstopen = B_FALSE;
|
|
|
|
|
|
*spapp = NULL;
|
|
|
|
|
|
@@ -2775,6 +2881,8 @@ spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy,
|
|
|
if (spa->spa_state == POOL_STATE_UNINITIALIZED) {
|
|
|
zpool_rewind_policy_t policy;
|
|
|
|
|
|
+ firstopen = B_TRUE;
|
|
|
+
|
|
|
zpool_get_rewind_policy(nvpolicy ? nvpolicy : spa->spa_config,
|
|
|
&policy);
|
|
|
if (policy.zrp_request & ZPOOL_DO_REWIND)
|
|
|
@@ -2849,6 +2957,11 @@ spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy,
|
|
|
mutex_exit(&spa_namespace_lock);
|
|
|
}
|
|
|
|
|
|
+#ifdef _KERNEL
|
|
|
+ if (firstopen)
|
|
|
+ zvol_create_minors(spa->spa_name);
|
|
|
+#endif
|
|
|
+
|
|
|
*spapp = spa;
|
|
|
|
|
|
return (0);
|
|
|
@@ -3906,6 +4019,10 @@ spa_import(const char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags)
|
|
|
mutex_exit(&spa_namespace_lock);
|
|
|
spa_history_log_version(spa, LOG_POOL_IMPORT);
|
|
|
|
|
|
+#ifdef _KERNEL
|
|
|
+ zvol_create_minors(pool);
|
|
|
+#endif
|
|
|
+
|
|
|
return (0);
|
|
|
}
|
|
|
|
|
|
@@ -5058,7 +5175,7 @@ spa_vdev_remove_evacuate(spa_t *spa, vdev_t *vd)
|
|
|
* The evacuation succeeded. Remove any remaining MOS metadata
|
|
|
* associated with this vdev, and wait for these changes to sync.
|
|
|
*/
|
|
|
- ASSERT3U(vd->vdev_stat.vs_alloc, ==, 0);
|
|
|
+ ASSERT0(vd->vdev_stat.vs_alloc);
|
|
|
txg = spa_vdev_config_enter(spa);
|
|
|
vd->vdev_removing = B_TRUE;
|
|
|
vdev_dirty(vd, 0, NULL, txg);
|
|
|
@@ -6017,6 +6134,12 @@ spa_sync(spa_t *spa, uint64_t txg)
|
|
|
|
|
|
tx = dmu_tx_create_assigned(dp, txg);
|
|
|
|
|
|
+ spa->spa_sync_starttime = gethrtime();
|
|
|
+ taskq_cancel_id(system_taskq, spa->spa_deadman_tqid);
|
|
|
+ spa->spa_deadman_tqid = taskq_dispatch_delay(system_taskq,
|
|
|
+ spa_deadman, spa, TQ_SLEEP, ddi_get_lbolt() +
|
|
|
+ NSEC_TO_TICK(spa->spa_deadman_synctime));
|
|
|
+
|
|
|
/*
|
|
|
* If we are upgrading to SPA_VERSION_RAIDZ_DEFLATE this txg,
|
|
|
* set spa_deflate if we have no raid-z vdevs.
|
|
|
@@ -6053,7 +6176,7 @@ spa_sync(spa_t *spa, uint64_t txg)
|
|
|
zio_t *zio = zio_root(spa, NULL, NULL, 0);
|
|
|
VERIFY3U(bpobj_iterate(defer_bpo,
|
|
|
spa_free_sync_cb, zio, tx), ==, 0);
|
|
|
- VERIFY3U(zio_wait(zio), ==, 0);
|
|
|
+ VERIFY0(zio_wait(zio));
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
@@ -6070,7 +6193,7 @@ spa_sync(spa_t *spa, uint64_t txg)
|
|
|
spa_errlog_sync(spa, txg);
|
|
|
dsl_pool_sync(dp, txg);
|
|
|
|
|
|
- if (pass <= SYNC_PASS_DEFERRED_FREE) {
|
|
|
+ if (pass < zfs_sync_pass_deferred_free) {
|
|
|
zio_t *zio = zio_root(spa, NULL, NULL, 0);
|
|
|
bplist_iterate(free_bpl, spa_free_sync_cb,
|
|
|
zio, tx);
|
|
|
@@ -6145,6 +6268,9 @@ spa_sync(spa_t *spa, uint64_t txg)
|
|
|
}
|
|
|
dmu_tx_commit(tx);
|
|
|
|
|
|
+ taskq_cancel_id(system_taskq, spa->spa_deadman_tqid);
|
|
|
+ spa->spa_deadman_tqid = 0;
|
|
|
+
|
|
|
/*
|
|
|
* Clear the dirty config list.
|
|
|
*/
|
|
|
diff --git a/module/zfs/spa_config.c b/module/zfs/spa_config.c
|
|
|
index 09149e6..849ae46 100644
|
|
|
--- a/module/zfs/spa_config.c
|
|
|
+++ b/module/zfs/spa_config.c
|
|
|
@@ -65,6 +65,7 @@ static uint64_t spa_config_generation = 1;
|
|
|
* userland pools when doing testing.
|
|
|
*/
|
|
|
char *spa_config_path = ZPOOL_CACHE;
|
|
|
+int zfs_autoimport_disable = 0;
|
|
|
|
|
|
/*
|
|
|
* Called when the module is first loaded, this routine loads the configuration
|
|
|
@@ -81,6 +82,9 @@ spa_config_load(void)
|
|
|
struct _buf *file;
|
|
|
uint64_t fsize;
|
|
|
|
|
|
+ if (zfs_autoimport_disable)
|
|
|
+ return;
|
|
|
+
|
|
|
/*
|
|
|
* Open the configuration file.
|
|
|
*/
|
|
|
@@ -508,4 +512,8 @@ EXPORT_SYMBOL(spa_config_update);
|
|
|
|
|
|
module_param(spa_config_path, charp, 0444);
|
|
|
MODULE_PARM_DESC(spa_config_path, "SPA config file (/etc/zfs/zpool.cache)");
|
|
|
+
|
|
|
+module_param(zfs_autoimport_disable, int, 0644);
|
|
|
+MODULE_PARM_DESC(zfs_autoimport_disable, "Disable pool import at module load");
|
|
|
+
|
|
|
#endif
|
|
|
diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c
|
|
|
index 5ec8e68..0ca9f3a 100644
|
|
|
--- a/module/zfs/spa_misc.c
|
|
|
+++ b/module/zfs/spa_misc.c
|
|
|
@@ -237,6 +237,24 @@ kmem_cache_t *spa_buffer_pool;
|
|
|
int spa_mode_global;
|
|
|
|
|
|
/*
|
|
|
+ * Expiration time in units of zfs_txg_synctime_ms. This value has two
|
|
|
+ * meanings. First it is used to determine when the spa_deadman logic
|
|
|
+ * should fire. By default the spa_deadman will fire if spa_sync has
|
|
|
+ * not completed in 1000 * zfs_txg_synctime_ms (i.e. 1000 seconds).
|
|
|
+ * Secondly, the value determines if an I/O is considered "hung".
|
|
|
+ * Any I/O that has not completed in zfs_deadman_synctime is considered
|
|
|
+ * "hung" resulting in a zevent being posted.
|
|
|
+ * 1000 zfs_txg_synctime_ms (i.e. 1000 seconds).
|
|
|
+ */
|
|
|
+unsigned long zfs_deadman_synctime = 1000ULL;
|
|
|
+
|
|
|
+/*
|
|
|
+ * By default the deadman is enabled.
|
|
|
+ */
|
|
|
+int zfs_deadman_enabled = 1;
|
|
|
+
|
|
|
+
|
|
|
+/*
|
|
|
* ==========================================================================
|
|
|
* SPA config locking
|
|
|
* ==========================================================================
|
|
|
@@ -413,6 +431,27 @@ spa_lookup(const char *name)
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
+ * Fires when spa_sync has not completed within zfs_deadman_synctime_ms.
|
|
|
+ * If the zfs_deadman_enabled flag is set then it inspects all vdev queues
|
|
|
+ * looking for potentially hung I/Os.
|
|
|
+ */
|
|
|
+void
|
|
|
+spa_deadman(void *arg)
|
|
|
+{
|
|
|
+ spa_t *spa = arg;
|
|
|
+
|
|
|
+ zfs_dbgmsg("slow spa_sync: started %llu seconds ago, calls %llu",
|
|
|
+ (gethrtime() - spa->spa_sync_starttime) / NANOSEC,
|
|
|
+ ++spa->spa_deadman_calls);
|
|
|
+ if (zfs_deadman_enabled)
|
|
|
+ vdev_deadman(spa->spa_root_vdev);
|
|
|
+
|
|
|
+ spa->spa_deadman_tqid = taskq_dispatch_delay(system_taskq,
|
|
|
+ spa_deadman, spa, TQ_SLEEP, ddi_get_lbolt() +
|
|
|
+ NSEC_TO_TICK(spa->spa_deadman_synctime));
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
* Create an uninitialized spa_t with the given name. Requires
|
|
|
* spa_namespace_lock. The caller must ensure that the spa_t doesn't already
|
|
|
* exist by calling spa_lookup() first.
|
|
|
@@ -454,6 +493,9 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
|
|
|
spa->spa_proc = &p0;
|
|
|
spa->spa_proc_state = SPA_PROC_NONE;
|
|
|
|
|
|
+ spa->spa_deadman_synctime = zfs_deadman_synctime *
|
|
|
+ zfs_txg_synctime_ms * MICROSEC;
|
|
|
+
|
|
|
refcount_create(&spa->spa_refcount);
|
|
|
spa_config_lock_init(spa);
|
|
|
|
|
|
@@ -1493,6 +1535,12 @@ spa_prev_software_version(spa_t *spa)
|
|
|
}
|
|
|
|
|
|
uint64_t
|
|
|
+spa_deadman_synctime(spa_t *spa)
|
|
|
+{
|
|
|
+ return (spa->spa_deadman_synctime);
|
|
|
+}
|
|
|
+
|
|
|
+uint64_t
|
|
|
dva_get_dsize_sync(spa_t *spa, const dva_t *dva)
|
|
|
{
|
|
|
uint64_t asize = DVA_GET_ASIZE(dva);
|
|
|
@@ -1585,6 +1633,7 @@ spa_init(int mode)
|
|
|
fm_init();
|
|
|
refcount_init();
|
|
|
unique_init();
|
|
|
+ space_map_init();
|
|
|
zio_init();
|
|
|
dmu_init();
|
|
|
zil_init();
|
|
|
@@ -1607,6 +1656,7 @@ spa_fini(void)
|
|
|
zil_fini();
|
|
|
dmu_fini();
|
|
|
zio_fini();
|
|
|
+ space_map_fini();
|
|
|
unique_fini();
|
|
|
refcount_fini();
|
|
|
fm_fini();
|
|
|
@@ -1812,4 +1862,10 @@ EXPORT_SYMBOL(spa_writeable);
|
|
|
EXPORT_SYMBOL(spa_mode);
|
|
|
|
|
|
EXPORT_SYMBOL(spa_namespace_lock);
|
|
|
+
|
|
|
+module_param(zfs_deadman_synctime, ulong, 0644);
|
|
|
+MODULE_PARM_DESC(zfs_deadman_synctime,"Expire in units of zfs_txg_synctime_ms");
|
|
|
+
|
|
|
+module_param(zfs_deadman_enabled, int, 0644);
|
|
|
+MODULE_PARM_DESC(zfs_deadman_enabled, "Enable deadman timer");
|
|
|
#endif
|
|
|
diff --git a/module/zfs/space_map.c b/module/zfs/space_map.c
|
|
|
index 9c0cdb6..a031f3a 100644
|
|
|
--- a/module/zfs/space_map.c
|
|
|
+++ b/module/zfs/space_map.c
|
|
|
@@ -22,6 +22,9 @@
|
|
|
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
|
|
* Use is subject to license terms.
|
|
|
*/
|
|
|
+/*
|
|
|
+ * Copyright (c) 2012 by Delphix. All rights reserved.
|
|
|
+ */
|
|
|
|
|
|
#include <sys/zfs_context.h>
|
|
|
#include <sys/spa.h>
|
|
|
@@ -29,6 +32,23 @@
|
|
|
#include <sys/zio.h>
|
|
|
#include <sys/space_map.h>
|
|
|
|
|
|
+static kmem_cache_t *space_seg_cache;
|
|
|
+
|
|
|
+void
|
|
|
+space_map_init(void)
|
|
|
+{
|
|
|
+ ASSERT(space_seg_cache == NULL);
|
|
|
+ space_seg_cache = kmem_cache_create("space_seg_cache",
|
|
|
+ sizeof (space_seg_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
|
|
|
+}
|
|
|
+
|
|
|
+void
|
|
|
+space_map_fini(void)
|
|
|
+{
|
|
|
+ kmem_cache_destroy(space_seg_cache);
|
|
|
+ space_seg_cache = NULL;
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* Space map routines.
|
|
|
* NOTE: caller is responsible for all locking.
|
|
|
@@ -73,7 +93,7 @@ void
|
|
|
space_map_destroy(space_map_t *sm)
|
|
|
{
|
|
|
ASSERT(!sm->sm_loaded && !sm->sm_loading);
|
|
|
- VERIFY3U(sm->sm_space, ==, 0);
|
|
|
+ VERIFY0(sm->sm_space);
|
|
|
avl_destroy(&sm->sm_root);
|
|
|
cv_destroy(&sm->sm_load_cv);
|
|
|
}
|
|
|
@@ -87,6 +107,7 @@ space_map_add(space_map_t *sm, uint64_t start, uint64_t size)
|
|
|
int merge_before, merge_after;
|
|
|
|
|
|
ASSERT(MUTEX_HELD(sm->sm_lock));
|
|
|
+ VERIFY(!sm->sm_condensing);
|
|
|
VERIFY(size != 0);
|
|
|
VERIFY3U(start, >=, sm->sm_start);
|
|
|
VERIFY3U(end, <=, sm->sm_start + sm->sm_size);
|
|
|
@@ -121,7 +142,7 @@ space_map_add(space_map_t *sm, uint64_t start, uint64_t size)
|
|
|
avl_remove(sm->sm_pp_root, ss_after);
|
|
|
}
|
|
|
ss_after->ss_start = ss_before->ss_start;
|
|
|
- kmem_free(ss_before, sizeof (*ss_before));
|
|
|
+ kmem_cache_free(space_seg_cache, ss_before);
|
|
|
ss = ss_after;
|
|
|
} else if (merge_before) {
|
|
|
ss_before->ss_end = end;
|
|
|
@@ -134,7 +155,7 @@ space_map_add(space_map_t *sm, uint64_t start, uint64_t size)
|
|
|
avl_remove(sm->sm_pp_root, ss_after);
|
|
|
ss = ss_after;
|
|
|
} else {
|
|
|
- ss = kmem_alloc(sizeof (*ss), KM_PUSHPAGE);
|
|
|
+ ss = kmem_cache_alloc(space_seg_cache, KM_PUSHPAGE);
|
|
|
ss->ss_start = start;
|
|
|
ss->ss_end = end;
|
|
|
avl_insert(&sm->sm_root, ss, where);
|
|
|
@@ -155,6 +176,7 @@ space_map_remove(space_map_t *sm, uint64_t start, uint64_t size)
|
|
|
int left_over, right_over;
|
|
|
|
|
|
ASSERT(MUTEX_HELD(sm->sm_lock));
|
|
|
+ VERIFY(!sm->sm_condensing);
|
|
|
VERIFY(size != 0);
|
|
|
VERIFY(P2PHASE(start, 1ULL << sm->sm_shift) == 0);
|
|
|
VERIFY(P2PHASE(size, 1ULL << sm->sm_shift) == 0);
|
|
|
@@ -181,7 +203,7 @@ space_map_remove(space_map_t *sm, uint64_t start, uint64_t size)
|
|
|
avl_remove(sm->sm_pp_root, ss);
|
|
|
|
|
|
if (left_over && right_over) {
|
|
|
- newseg = kmem_alloc(sizeof (*newseg), KM_PUSHPAGE);
|
|
|
+ newseg = kmem_cache_alloc(space_seg_cache, KM_PUSHPAGE);
|
|
|
newseg->ss_start = end;
|
|
|
newseg->ss_end = ss->ss_end;
|
|
|
ss->ss_end = start;
|
|
|
@@ -194,7 +216,7 @@ space_map_remove(space_map_t *sm, uint64_t start, uint64_t size)
|
|
|
ss->ss_start = end;
|
|
|
} else {
|
|
|
avl_remove(&sm->sm_root, ss);
|
|
|
- kmem_free(ss, sizeof (*ss));
|
|
|
+ kmem_cache_free(space_seg_cache, ss);
|
|
|
ss = NULL;
|
|
|
}
|
|
|
|
|
|
@@ -224,6 +246,20 @@ space_map_contains(space_map_t *sm, uint64_t start, uint64_t size)
|
|
|
}
|
|
|
|
|
|
void
|
|
|
+space_map_swap(space_map_t **msrc, space_map_t **mdst)
|
|
|
+{
|
|
|
+ space_map_t *sm;
|
|
|
+
|
|
|
+ ASSERT(MUTEX_HELD((*msrc)->sm_lock));
|
|
|
+ ASSERT0((*mdst)->sm_space);
|
|
|
+ ASSERT0(avl_numnodes(&(*mdst)->sm_root));
|
|
|
+
|
|
|
+ sm = *msrc;
|
|
|
+ *msrc = *mdst;
|
|
|
+ *mdst = sm;
|
|
|
+}
|
|
|
+
|
|
|
+void
|
|
|
space_map_vacate(space_map_t *sm, space_map_func_t *func, space_map_t *mdest)
|
|
|
{
|
|
|
space_seg_t *ss;
|
|
|
@@ -234,7 +270,7 @@ space_map_vacate(space_map_t *sm, space_map_func_t *func, space_map_t *mdest)
|
|
|
while ((ss = avl_destroy_nodes(&sm->sm_root, &cookie)) != NULL) {
|
|
|
if (func != NULL)
|
|
|
func(mdest, ss->ss_start, ss->ss_end - ss->ss_start);
|
|
|
- kmem_free(ss, sizeof (*ss));
|
|
|
+ kmem_cache_free(space_seg_cache, ss);
|
|
|
}
|
|
|
sm->sm_space = 0;
|
|
|
}
|
|
|
@@ -286,7 +322,7 @@ space_map_load(space_map_t *sm, space_map_ops_t *ops, uint8_t maptype,
|
|
|
space = smo->smo_alloc;
|
|
|
|
|
|
ASSERT(sm->sm_ops == NULL);
|
|
|
- VERIFY3U(sm->sm_space, ==, 0);
|
|
|
+ VERIFY0(sm->sm_space);
|
|
|
|
|
|
if (maptype == SM_FREE) {
|
|
|
space_map_add(sm, sm->sm_start, sm->sm_size);
|
|
|
@@ -404,9 +440,9 @@ space_map_sync(space_map_t *sm, uint8_t maptype,
|
|
|
space_map_obj_t *smo, objset_t *os, dmu_tx_t *tx)
|
|
|
{
|
|
|
spa_t *spa = dmu_objset_spa(os);
|
|
|
- void *cookie = NULL;
|
|
|
+ avl_tree_t *t = &sm->sm_root;
|
|
|
space_seg_t *ss;
|
|
|
- uint64_t bufsize, start, size, run_len;
|
|
|
+ uint64_t bufsize, start, size, run_len, total, sm_space, nodes;
|
|
|
uint64_t *entry, *entry_map, *entry_map_end;
|
|
|
|
|
|
ASSERT(MUTEX_HELD(sm->sm_lock));
|
|
|
@@ -435,11 +471,14 @@ space_map_sync(space_map_t *sm, uint8_t maptype,
|
|
|
SM_DEBUG_SYNCPASS_ENCODE(spa_sync_pass(spa)) |
|
|
|
SM_DEBUG_TXG_ENCODE(dmu_tx_get_txg(tx));
|
|
|
|
|
|
- while ((ss = avl_destroy_nodes(&sm->sm_root, &cookie)) != NULL) {
|
|
|
+ total = 0;
|
|
|
+ nodes = avl_numnodes(&sm->sm_root);
|
|
|
+ sm_space = sm->sm_space;
|
|
|
+ for (ss = avl_first(t); ss != NULL; ss = AVL_NEXT(t, ss)) {
|
|
|
size = ss->ss_end - ss->ss_start;
|
|
|
start = (ss->ss_start - sm->sm_start) >> sm->sm_shift;
|
|
|
|
|
|
- sm->sm_space -= size;
|
|
|
+ total += size;
|
|
|
size >>= sm->sm_shift;
|
|
|
|
|
|
while (size) {
|
|
|
@@ -461,7 +500,6 @@ space_map_sync(space_map_t *sm, uint8_t maptype,
|
|
|
start += run_len;
|
|
|
size -= run_len;
|
|
|
}
|
|
|
- kmem_free(ss, sizeof (*ss));
|
|
|
}
|
|
|
|
|
|
if (entry != entry_map) {
|
|
|
@@ -473,9 +511,15 @@ space_map_sync(space_map_t *sm, uint8_t maptype,
|
|
|
smo->smo_objsize += size;
|
|
|
}
|
|
|
|
|
|
- zio_buf_free(entry_map, bufsize);
|
|
|
+ /*
|
|
|
+ * Ensure that the space_map's accounting wasn't changed
|
|
|
+ * while we were in the middle of writing it out.
|
|
|
+ */
|
|
|
+ VERIFY3U(nodes, ==, avl_numnodes(&sm->sm_root));
|
|
|
+ VERIFY3U(sm->sm_space, ==, sm_space);
|
|
|
+ VERIFY3U(sm->sm_space, ==, total);
|
|
|
|
|
|
- VERIFY3U(sm->sm_space, ==, 0);
|
|
|
+ zio_buf_free(entry_map, bufsize);
|
|
|
}
|
|
|
|
|
|
void
|
|
|
diff --git a/module/zfs/txg.c b/module/zfs/txg.c
|
|
|
index c7c3df3..7c820af 100644
|
|
|
--- a/module/zfs/txg.c
|
|
|
+++ b/module/zfs/txg.c
|
|
|
@@ -367,6 +367,13 @@ txg_quiesce(dsl_pool_t *dp, uint64_t txg)
|
|
|
tx->tx_open_txg++;
|
|
|
|
|
|
/*
|
|
|
+ * Now that we've incremented tx_open_txg, we can let threads
|
|
|
+ * enter the next transaction group.
|
|
|
+ */
|
|
|
+ for (c = 0; c < max_ncpus; c++)
|
|
|
+ mutex_exit(&tx->tx_cpu[c].tc_lock);
|
|
|
+
|
|
|
+ /*
|
|
|
* Measure how long the txg was open and replace the kstat.
|
|
|
*/
|
|
|
th = dsl_pool_txg_history_get(dp, txg);
|
|
|
@@ -376,13 +383,6 @@ txg_quiesce(dsl_pool_t *dp, uint64_t txg)
|
|
|
dsl_pool_txg_history_add(dp, tx->tx_open_txg);
|
|
|
|
|
|
/*
|
|
|
- * Now that we've incremented tx_open_txg, we can let threads
|
|
|
- * enter the next transaction group.
|
|
|
- */
|
|
|
- for (c = 0; c < max_ncpus; c++)
|
|
|
- mutex_exit(&tx->tx_cpu[c].tc_lock);
|
|
|
-
|
|
|
- /*
|
|
|
* Quiesce the transaction group by waiting for everyone to txg_exit().
|
|
|
*/
|
|
|
start = gethrtime();
|
|
|
diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c
|
|
|
index b969751..d6b55ee 100644
|
|
|
--- a/module/zfs/vdev.c
|
|
|
+++ b/module/zfs/vdev.c
|
|
|
@@ -61,9 +61,6 @@ static vdev_ops_t *vdev_ops_table[] = {
|
|
|
NULL
|
|
|
};
|
|
|
|
|
|
-/* maximum scrub/resilver I/O queue per leaf vdev */
|
|
|
-int zfs_scrub_limit = 10;
|
|
|
-
|
|
|
/*
|
|
|
* Given a vdev type, return the appropriate ops vector.
|
|
|
*/
|
|
|
@@ -601,9 +598,9 @@ vdev_free(vdev_t *vd)
|
|
|
metaslab_group_destroy(vd->vdev_mg);
|
|
|
}
|
|
|
|
|
|
- ASSERT3U(vd->vdev_stat.vs_space, ==, 0);
|
|
|
- ASSERT3U(vd->vdev_stat.vs_dspace, ==, 0);
|
|
|
- ASSERT3U(vd->vdev_stat.vs_alloc, ==, 0);
|
|
|
+ ASSERT0(vd->vdev_stat.vs_space);
|
|
|
+ ASSERT0(vd->vdev_stat.vs_dspace);
|
|
|
+ ASSERT0(vd->vdev_stat.vs_alloc);
|
|
|
|
|
|
/*
|
|
|
* Remove this vdev from its parent's child list.
|
|
|
@@ -1258,11 +1255,12 @@ vdev_open(vdev_t *vd)
|
|
|
if (vd->vdev_asize == 0) {
|
|
|
/*
|
|
|
* This is the first-ever open, so use the computed values.
|
|
|
- * For testing purposes, a higher ashift can be requested.
|
|
|
+ * For compatibility, a different ashift can be requested.
|
|
|
*/
|
|
|
vd->vdev_asize = asize;
|
|
|
vd->vdev_max_asize = max_asize;
|
|
|
- vd->vdev_ashift = MAX(ashift, vd->vdev_ashift);
|
|
|
+ if (vd->vdev_ashift == 0)
|
|
|
+ vd->vdev_ashift = ashift;
|
|
|
} else {
|
|
|
/*
|
|
|
* Detect if the alignment requirement has increased.
|
|
|
@@ -1348,7 +1346,8 @@ vdev_validate(vdev_t *vd, boolean_t strict)
|
|
|
if (vd->vdev_ops->vdev_op_leaf && vdev_readable(vd)) {
|
|
|
uint64_t aux_guid = 0;
|
|
|
nvlist_t *nvl;
|
|
|
- uint64_t txg = strict ? spa->spa_config_txg : -1ULL;
|
|
|
+ uint64_t txg = spa_last_synced_txg(spa) != 0 ?
|
|
|
+ spa_last_synced_txg(spa) : -1ULL;
|
|
|
|
|
|
if ((label = vdev_label_read_config(vd, txg)) == NULL) {
|
|
|
vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
|
|
|
@@ -1533,7 +1532,7 @@ vdev_reopen(vdev_t *vd)
|
|
|
!l2arc_vdev_present(vd))
|
|
|
l2arc_add_vdev(spa, vd);
|
|
|
} else {
|
|
|
- (void) vdev_validate(vd, spa_last_synced_txg(spa));
|
|
|
+ (void) vdev_validate(vd, B_TRUE);
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
@@ -1826,7 +1825,7 @@ vdev_dtl_sync(vdev_t *vd, uint64_t txg)
|
|
|
|
|
|
if (vd->vdev_detached) {
|
|
|
if (smo->smo_object != 0) {
|
|
|
- VERIFY(0 == dmu_object_free(mos, smo->smo_object, tx));
|
|
|
+ VERIFY0(dmu_object_free(mos, smo->smo_object, tx));
|
|
|
smo->smo_object = 0;
|
|
|
}
|
|
|
dmu_tx_commit(tx);
|
|
|
@@ -1856,6 +1855,7 @@ vdev_dtl_sync(vdev_t *vd, uint64_t txg)
|
|
|
|
|
|
space_map_truncate(smo, mos, tx);
|
|
|
space_map_sync(&smsync, SM_ALLOC, smo, mos, tx);
|
|
|
+ space_map_vacate(&smsync, NULL, NULL);
|
|
|
|
|
|
space_map_destroy(&smsync);
|
|
|
|
|
|
@@ -2030,7 +2030,7 @@ vdev_remove(vdev_t *vd, uint64_t txg)
|
|
|
tx = dmu_tx_create_assigned(spa_get_dsl(spa), txg);
|
|
|
|
|
|
if (vd->vdev_dtl_smo.smo_object) {
|
|
|
- ASSERT3U(vd->vdev_dtl_smo.smo_alloc, ==, 0);
|
|
|
+ ASSERT0(vd->vdev_dtl_smo.smo_alloc);
|
|
|
(void) dmu_object_free(mos, vd->vdev_dtl_smo.smo_object, tx);
|
|
|
vd->vdev_dtl_smo.smo_object = 0;
|
|
|
}
|
|
|
@@ -2042,7 +2042,7 @@ vdev_remove(vdev_t *vd, uint64_t txg)
|
|
|
if (msp == NULL || msp->ms_smo.smo_object == 0)
|
|
|
continue;
|
|
|
|
|
|
- ASSERT3U(msp->ms_smo.smo_alloc, ==, 0);
|
|
|
+ ASSERT0(msp->ms_smo.smo_alloc);
|
|
|
(void) dmu_object_free(mos, msp->ms_smo.smo_object, tx);
|
|
|
msp->ms_smo.smo_object = 0;
|
|
|
}
|
|
|
@@ -2320,7 +2320,7 @@ top:
|
|
|
(void) spa_vdev_state_exit(spa, vd, 0);
|
|
|
goto top;
|
|
|
}
|
|
|
- ASSERT3U(tvd->vdev_stat.vs_alloc, ==, 0);
|
|
|
+ ASSERT0(tvd->vdev_stat.vs_alloc);
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
@@ -3193,13 +3193,50 @@ vdev_split(vdev_t *vd)
|
|
|
vdev_propagate_state(cvd);
|
|
|
}
|
|
|
|
|
|
+void
|
|
|
+vdev_deadman(vdev_t *vd)
|
|
|
+{
|
|
|
+ int c;
|
|
|
+
|
|
|
+ for (c = 0; c < vd->vdev_children; c++) {
|
|
|
+ vdev_t *cvd = vd->vdev_child[c];
|
|
|
+
|
|
|
+ vdev_deadman(cvd);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (vd->vdev_ops->vdev_op_leaf) {
|
|
|
+ vdev_queue_t *vq = &vd->vdev_queue;
|
|
|
+
|
|
|
+ mutex_enter(&vq->vq_lock);
|
|
|
+ if (avl_numnodes(&vq->vq_pending_tree) > 0) {
|
|
|
+ spa_t *spa = vd->vdev_spa;
|
|
|
+ zio_t *fio;
|
|
|
+ uint64_t delta;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Look at the head of all the pending queues,
|
|
|
+ * if any I/O has been outstanding for longer than
|
|
|
+ * the spa_deadman_synctime we log a zevent.
|
|
|
+ */
|
|
|
+ fio = avl_first(&vq->vq_pending_tree);
|
|
|
+ delta = ddi_get_lbolt64() - fio->io_timestamp;
|
|
|
+ if (delta > NSEC_TO_TICK(spa_deadman_synctime(spa))) {
|
|
|
+ zfs_dbgmsg("SLOW IO: zio timestamp %llu, "
|
|
|
+ "delta %llu, last io %llu",
|
|
|
+ fio->io_timestamp, delta,
|
|
|
+ vq->vq_io_complete_ts);
|
|
|
+ zfs_ereport_post(FM_EREPORT_ZFS_DELAY,
|
|
|
+ spa, vd, fio, 0, 0);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ mutex_exit(&vq->vq_lock);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
#if defined(_KERNEL) && defined(HAVE_SPL)
|
|
|
EXPORT_SYMBOL(vdev_fault);
|
|
|
EXPORT_SYMBOL(vdev_degrade);
|
|
|
EXPORT_SYMBOL(vdev_online);
|
|
|
EXPORT_SYMBOL(vdev_offline);
|
|
|
EXPORT_SYMBOL(vdev_clear);
|
|
|
-
|
|
|
-module_param(zfs_scrub_limit, int, 0644);
|
|
|
-MODULE_PARM_DESC(zfs_scrub_limit, "Max scrub/resilver I/O per leaf vdev");
|
|
|
#endif
|
|
|
diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c
|
|
|
index 1f6507f..2869716 100644
|
|
|
--- a/module/zfs/vdev_disk.c
|
|
|
+++ b/module/zfs/vdev_disk.c
|
|
|
@@ -268,13 +268,13 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
|
|
|
* Devices are always opened by the path provided at configuration
|
|
|
* time. This means that if the provided path is a udev by-id path
|
|
|
* then drives may be recabled without an issue. If the provided
|
|
|
- * path is a udev by-path path then the physical location information
|
|
|
+ * path is a udev by-path path, then the physical location information
|
|
|
* will be preserved. This can be critical for more complicated
|
|
|
* configurations where drives are located in specific physical
|
|
|
* locations to maximize the systems tolerence to component failure.
|
|
|
- * Alternately you can provide your own udev rule to flexibly map
|
|
|
+ * Alternatively, you can provide your own udev rule to flexibly map
|
|
|
* the drives as you see fit. It is not advised that you use the
|
|
|
- * /dev/[hd]d devices which may be reorder due to probing order.
|
|
|
+ * /dev/[hd]d devices which may be reordered due to probing order.
|
|
|
* Devices in the wrong locations will be detected by the higher
|
|
|
* level vdev validation.
|
|
|
*/
|
|
|
@@ -407,8 +407,7 @@ vdev_disk_dio_put(dio_request_t *dr)
|
|
|
vdev_disk_dio_free(dr);
|
|
|
|
|
|
if (zio) {
|
|
|
- zio->io_delay = jiffies_to_msecs(
|
|
|
- jiffies_64 - zio->io_delay);
|
|
|
+ zio->io_delay = jiffies_64 - zio->io_delay;
|
|
|
zio->io_error = error;
|
|
|
ASSERT3S(zio->io_error, >=, 0);
|
|
|
if (zio->io_error)
|
|
|
@@ -609,7 +608,7 @@ BIO_END_IO_PROTO(vdev_disk_io_flush_completion, bio, size, rc)
|
|
|
{
|
|
|
zio_t *zio = bio->bi_private;
|
|
|
|
|
|
- zio->io_delay = jiffies_to_msecs(jiffies_64 - zio->io_delay);
|
|
|
+ zio->io_delay = jiffies_64 - zio->io_delay;
|
|
|
zio->io_error = -rc;
|
|
|
if (rc && (rc == -EOPNOTSUPP))
|
|
|
zio->io_vd->vdev_nowritecache = B_TRUE;
|
|
|
@@ -633,7 +632,7 @@ vdev_disk_io_flush(struct block_device *bdev, zio_t *zio)
|
|
|
if (!q)
|
|
|
return ENXIO;
|
|
|
|
|
|
- bio = bio_alloc(GFP_KERNEL, 0);
|
|
|
+ bio = bio_alloc(GFP_NOIO, 0);
|
|
|
if (!bio)
|
|
|
return ENOMEM;
|
|
|
|
|
|
diff --git a/module/zfs/vdev_file.c b/module/zfs/vdev_file.c
|
|
|
index 3c0ce53..06999a8 100644
|
|
|
--- a/module/zfs/vdev_file.c
|
|
|
+++ b/module/zfs/vdev_file.c
|
|
|
@@ -25,6 +25,7 @@
|
|
|
|
|
|
#include <sys/zfs_context.h>
|
|
|
#include <sys/spa.h>
|
|
|
+#include <sys/spa_impl.h>
|
|
|
#include <sys/vdev_file.h>
|
|
|
#include <sys/vdev_impl.h>
|
|
|
#include <sys/zio.h>
|
|
|
@@ -139,21 +140,39 @@ vdev_file_close(vdev_t *vd)
|
|
|
vd->vdev_tsd = NULL;
|
|
|
}
|
|
|
|
|
|
-static int
|
|
|
-vdev_file_io_start(zio_t *zio)
|
|
|
+static void
|
|
|
+vdev_file_io_strategy(void *arg)
|
|
|
{
|
|
|
+ zio_t *zio = (zio_t *)arg;
|
|
|
vdev_t *vd = zio->io_vd;
|
|
|
- vdev_file_t *vf;
|
|
|
- ssize_t resid = 0;
|
|
|
+ vdev_file_t *vf = vd->vdev_tsd;
|
|
|
+ ssize_t resid;
|
|
|
|
|
|
- if (!vdev_readable(vd)) {
|
|
|
- zio->io_error = ENXIO;
|
|
|
- return (ZIO_PIPELINE_CONTINUE);
|
|
|
- }
|
|
|
+ zio->io_error = vn_rdwr(zio->io_type == ZIO_TYPE_READ ?
|
|
|
+ UIO_READ : UIO_WRITE, vf->vf_vnode, zio->io_data,
|
|
|
+ zio->io_size, zio->io_offset, UIO_SYSSPACE,
|
|
|
+ 0, RLIM64_INFINITY, kcred, &resid);
|
|
|
+
|
|
|
+ if (resid != 0 && zio->io_error == 0)
|
|
|
+ zio->io_error = ENOSPC;
|
|
|
|
|
|
- vf = vd->vdev_tsd;
|
|
|
+ zio_interrupt(zio);
|
|
|
+}
|
|
|
+
|
|
|
+static int
|
|
|
+vdev_file_io_start(zio_t *zio)
|
|
|
+{
|
|
|
+ spa_t *spa = zio->io_spa;
|
|
|
+ vdev_t *vd = zio->io_vd;
|
|
|
+ vdev_file_t *vf = vd->vdev_tsd;
|
|
|
|
|
|
if (zio->io_type == ZIO_TYPE_IOCTL) {
|
|
|
+ /* XXPOLICY */
|
|
|
+ if (!vdev_readable(vd)) {
|
|
|
+ zio->io_error = ENXIO;
|
|
|
+ return (ZIO_PIPELINE_CONTINUE);
|
|
|
+ }
|
|
|
+
|
|
|
switch (zio->io_cmd) {
|
|
|
case DKIOCFLUSHWRITECACHE:
|
|
|
zio->io_error = VOP_FSYNC(vf->vf_vnode, FSYNC | FDSYNC,
|
|
|
@@ -166,15 +185,8 @@ vdev_file_io_start(zio_t *zio)
|
|
|
return (ZIO_PIPELINE_CONTINUE);
|
|
|
}
|
|
|
|
|
|
- zio->io_error = vn_rdwr(zio->io_type == ZIO_TYPE_READ ?
|
|
|
- UIO_READ : UIO_WRITE, vf->vf_vnode, zio->io_data,
|
|
|
- zio->io_size, zio->io_offset, UIO_SYSSPACE,
|
|
|
- 0, RLIM64_INFINITY, kcred, &resid);
|
|
|
-
|
|
|
- if (resid != 0 && zio->io_error == 0)
|
|
|
- zio->io_error = ENOSPC;
|
|
|
-
|
|
|
- zio_interrupt(zio);
|
|
|
+ spa_taskq_dispatch_ent(spa, ZIO_TYPE_FREE, ZIO_TASKQ_ISSUE,
|
|
|
+ vdev_file_io_strategy, zio, 0, &zio->io_tqent);
|
|
|
|
|
|
return (ZIO_PIPELINE_STOP);
|
|
|
}
|
|
|
diff --git a/module/zfs/vdev_mirror.c b/module/zfs/vdev_mirror.c
|
|
|
index a2671ca..e0884dc 100644
|
|
|
--- a/module/zfs/vdev_mirror.c
|
|
|
+++ b/module/zfs/vdev_mirror.c
|
|
|
@@ -41,6 +41,7 @@ typedef struct mirror_child {
|
|
|
vdev_t *mc_vd;
|
|
|
uint64_t mc_offset;
|
|
|
int mc_error;
|
|
|
+ int mc_pending;
|
|
|
uint8_t mc_tried;
|
|
|
uint8_t mc_skipped;
|
|
|
uint8_t mc_speculative;
|
|
|
@@ -54,7 +55,23 @@ typedef struct mirror_map {
|
|
|
mirror_child_t mm_child[1];
|
|
|
} mirror_map_t;
|
|
|
|
|
|
-int vdev_mirror_shift = 21;
|
|
|
+/*
|
|
|
+ * When the children are equally busy queue incoming requests to a single
|
|
|
+ * child for N microseconds. This is done to maximize the likelihood that
|
|
|
+ * the Linux elevator will be able to merge requests while it is plugged.
|
|
|
+ * Otherwise, requests are queued to the least busy device.
|
|
|
+ *
|
|
|
+ * For rotational disks the Linux elevator will plug for 10ms which is
|
|
|
+ * why zfs_vdev_mirror_switch_us is set to 10ms by default. For non-
|
|
|
+ * rotational disks the elevator will not plug, but 10ms is still a small
|
|
|
+ * enough value that the requests will get spread over all the children.
|
|
|
+ *
|
|
|
+ * For fast SSDs it may make sense to decrease zfs_vdev_mirror_switch_us
|
|
|
+ * significantly to bound the worst case latencies. It would probably be
|
|
|
+ * ideal to calculate a decaying average of the last observed latencies and
|
|
|
+ * use that to dynamically adjust the zfs_vdev_mirror_switch_us time.
|
|
|
+ */
|
|
|
+int zfs_vdev_mirror_switch_us = 10000;
|
|
|
|
|
|
static void
|
|
|
vdev_mirror_map_free(zio_t *zio)
|
|
|
@@ -69,6 +86,19 @@ static const zio_vsd_ops_t vdev_mirror_vsd_ops = {
|
|
|
zio_vsd_default_cksum_report
|
|
|
};
|
|
|
|
|
|
+static int
|
|
|
+vdev_mirror_pending(vdev_t *vd)
|
|
|
+{
|
|
|
+ vdev_queue_t *vq = &vd->vdev_queue;
|
|
|
+ int pending;
|
|
|
+
|
|
|
+ mutex_enter(&vq->vq_lock);
|
|
|
+ pending = avl_numnodes(&vq->vq_pending_tree);
|
|
|
+ mutex_exit(&vq->vq_lock);
|
|
|
+
|
|
|
+ return (pending);
|
|
|
+}
|
|
|
+
|
|
|
static mirror_map_t *
|
|
|
vdev_mirror_map_alloc(zio_t *zio)
|
|
|
{
|
|
|
@@ -108,20 +138,55 @@ vdev_mirror_map_alloc(zio_t *zio)
|
|
|
mc->mc_offset = DVA_GET_OFFSET(&dva[c]);
|
|
|
}
|
|
|
} else {
|
|
|
+ int lowest_pending = INT_MAX;
|
|
|
+ int lowest_nr = 1;
|
|
|
+
|
|
|
c = vd->vdev_children;
|
|
|
|
|
|
mm = kmem_zalloc(offsetof(mirror_map_t, mm_child[c]), KM_PUSHPAGE);
|
|
|
mm->mm_children = c;
|
|
|
mm->mm_replacing = (vd->vdev_ops == &vdev_replacing_ops ||
|
|
|
vd->vdev_ops == &vdev_spare_ops);
|
|
|
- mm->mm_preferred = mm->mm_replacing ? 0 :
|
|
|
- (zio->io_offset >> vdev_mirror_shift) % c;
|
|
|
+ mm->mm_preferred = 0;
|
|
|
mm->mm_root = B_FALSE;
|
|
|
|
|
|
for (c = 0; c < mm->mm_children; c++) {
|
|
|
mc = &mm->mm_child[c];
|
|
|
mc->mc_vd = vd->vdev_child[c];
|
|
|
mc->mc_offset = zio->io_offset;
|
|
|
+
|
|
|
+ if (mm->mm_replacing)
|
|
|
+ continue;
|
|
|
+
|
|
|
+ if (!vdev_readable(mc->mc_vd)) {
|
|
|
+ mc->mc_error = ENXIO;
|
|
|
+ mc->mc_tried = 1;
|
|
|
+ mc->mc_skipped = 1;
|
|
|
+ mc->mc_pending = INT_MAX;
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+
|
|
|
+ mc->mc_pending = vdev_mirror_pending(mc->mc_vd);
|
|
|
+ if (mc->mc_pending < lowest_pending) {
|
|
|
+ lowest_pending = mc->mc_pending;
|
|
|
+ lowest_nr = 1;
|
|
|
+ } else if (mc->mc_pending == lowest_pending) {
|
|
|
+ lowest_nr++;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ d = gethrtime() / (NSEC_PER_USEC * zfs_vdev_mirror_switch_us);
|
|
|
+ d = (d % lowest_nr) + 1;
|
|
|
+
|
|
|
+ for (c = 0; c < mm->mm_children; c++) {
|
|
|
+ mc = &mm->mm_child[c];
|
|
|
+
|
|
|
+ if (mm->mm_child[c].mc_pending == lowest_pending) {
|
|
|
+ if (--d == 0) {
|
|
|
+ mm->mm_preferred = c;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
|
|
|
@@ -492,3 +557,8 @@ vdev_ops_t vdev_spare_ops = {
|
|
|
VDEV_TYPE_SPARE, /* name of this vdev type */
|
|
|
B_FALSE /* not a leaf vdev */
|
|
|
};
|
|
|
+
|
|
|
+#if defined(_KERNEL) && defined(HAVE_SPL)
|
|
|
+module_param(zfs_vdev_mirror_switch_us, int, 0644);
|
|
|
+MODULE_PARM_DESC(zfs_vdev_mirror_switch_us, "Switch mirrors every N usecs");
|
|
|
+#endif
|
|
|
diff --git a/module/zfs/vdev_queue.c b/module/zfs/vdev_queue.c
|
|
|
index e2096fa..3f2793b 100644
|
|
|
--- a/module/zfs/vdev_queue.c
|
|
|
+++ b/module/zfs/vdev_queue.c
|
|
|
@@ -23,6 +23,10 @@
|
|
|
* Use is subject to license terms.
|
|
|
*/
|
|
|
|
|
|
+/*
|
|
|
+ * Copyright (c) 2012 by Delphix. All rights reserved.
|
|
|
+ */
|
|
|
+
|
|
|
#include <sys/zfs_context.h>
|
|
|
#include <sys/vdev_impl.h>
|
|
|
#include <sys/zio.h>
|
|
|
@@ -319,6 +323,7 @@ again:
|
|
|
vi, size, fio->io_type, ZIO_PRIORITY_AGG,
|
|
|
flags | ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE,
|
|
|
vdev_queue_agg_io_done, NULL);
|
|
|
+ aio->io_timestamp = fio->io_timestamp;
|
|
|
|
|
|
nio = fio;
|
|
|
do {
|
|
|
@@ -391,7 +396,8 @@ vdev_queue_io(zio_t *zio)
|
|
|
|
|
|
mutex_enter(&vq->vq_lock);
|
|
|
|
|
|
- zio->io_deadline = (ddi_get_lbolt64() >> zfs_vdev_time_shift) +
|
|
|
+ zio->io_timestamp = ddi_get_lbolt64();
|
|
|
+ zio->io_deadline = (zio->io_timestamp >> zfs_vdev_time_shift) +
|
|
|
zio->io_priority;
|
|
|
|
|
|
vdev_queue_io_add(vq, zio);
|
|
|
@@ -417,10 +423,17 @@ vdev_queue_io_done(zio_t *zio)
|
|
|
vdev_queue_t *vq = &zio->io_vd->vdev_queue;
|
|
|
int i;
|
|
|
|
|
|
+ if (zio_injection_enabled)
|
|
|
+ delay(SEC_TO_TICK(zio_handle_io_delay(zio)));
|
|
|
+
|
|
|
mutex_enter(&vq->vq_lock);
|
|
|
|
|
|
avl_remove(&vq->vq_pending_tree, zio);
|
|
|
|
|
|
+ zio->io_delta = ddi_get_lbolt64() - zio->io_timestamp;
|
|
|
+ vq->vq_io_complete_ts = ddi_get_lbolt64();
|
|
|
+ vq->vq_io_delta_ts = vq->vq_io_complete_ts - zio->io_timestamp;
|
|
|
+
|
|
|
for (i = 0; i < zfs_vdev_ramp_rate; i++) {
|
|
|
zio_t *nio = vdev_queue_io_to_issue(vq, zfs_vdev_max_pending);
|
|
|
if (nio == NULL)
|
|
|
diff --git a/module/zfs/vdev_raidz.c b/module/zfs/vdev_raidz.c
|
|
|
index cb806c5..3e1878d 100644
|
|
|
--- a/module/zfs/vdev_raidz.c
|
|
|
+++ b/module/zfs/vdev_raidz.c
|
|
|
@@ -281,7 +281,7 @@ vdev_raidz_map_free_vsd(zio_t *zio)
|
|
|
{
|
|
|
raidz_map_t *rm = zio->io_vsd;
|
|
|
|
|
|
- ASSERT3U(rm->rm_freed, ==, 0);
|
|
|
+ ASSERT0(rm->rm_freed);
|
|
|
rm->rm_freed = 1;
|
|
|
|
|
|
if (rm->rm_reports == 0)
|
|
|
@@ -1134,7 +1134,7 @@ vdev_raidz_matrix_invert(raidz_map_t *rm, int n, int nmissing, int *missing,
|
|
|
*/
|
|
|
for (i = 0; i < nmissing; i++) {
|
|
|
for (j = 0; j < missing[i]; j++) {
|
|
|
- ASSERT3U(rows[i][j], ==, 0);
|
|
|
+ ASSERT0(rows[i][j]);
|
|
|
}
|
|
|
ASSERT3U(rows[i][missing[i]], !=, 0);
|
|
|
|
|
|
@@ -1175,7 +1175,7 @@ vdev_raidz_matrix_invert(raidz_map_t *rm, int n, int nmissing, int *missing,
|
|
|
if (j == missing[i]) {
|
|
|
ASSERT3U(rows[i][j], ==, 1);
|
|
|
} else {
|
|
|
- ASSERT3U(rows[i][j], ==, 0);
|
|
|
+ ASSERT0(rows[i][j]);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
diff --git a/module/zfs/zap.c b/module/zfs/zap.c
|
|
|
index a6c8c82..a7bae5e 100644
|
|
|
--- a/module/zfs/zap.c
|
|
|
+++ b/module/zfs/zap.c
|
|
|
@@ -162,7 +162,7 @@ zap_table_grow(zap_t *zap, zap_table_phys_t *tbl,
|
|
|
} else {
|
|
|
newblk = zap_allocate_blocks(zap, tbl->zt_numblks * 2);
|
|
|
tbl->zt_nextblk = newblk;
|
|
|
- ASSERT3U(tbl->zt_blks_copied, ==, 0);
|
|
|
+ ASSERT0(tbl->zt_blks_copied);
|
|
|
dmu_prefetch(zap->zap_objset, zap->zap_object,
|
|
|
tbl->zt_blk << bs, tbl->zt_numblks << bs);
|
|
|
}
|
|
|
@@ -339,7 +339,7 @@ zap_grow_ptrtbl(zap_t *zap, dmu_tx_t *tx)
|
|
|
|
|
|
ASSERT3U(zap->zap_f.zap_phys->zap_ptrtbl.zt_shift, ==,
|
|
|
ZAP_EMBEDDED_PTRTBL_SHIFT(zap));
|
|
|
- ASSERT3U(zap->zap_f.zap_phys->zap_ptrtbl.zt_blk, ==, 0);
|
|
|
+ ASSERT0(zap->zap_f.zap_phys->zap_ptrtbl.zt_blk);
|
|
|
|
|
|
newblk = zap_allocate_blocks(zap, 1);
|
|
|
err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
|
|
|
@@ -475,7 +475,7 @@ zap_open_leaf(uint64_t blkid, dmu_buf_t *db)
|
|
|
* chain. There should be no chained leafs (as we have removed
|
|
|
* support for them).
|
|
|
*/
|
|
|
- ASSERT3U(l->l_phys->l_hdr.lh_pad1, ==, 0);
|
|
|
+ ASSERT0(l->l_phys->l_hdr.lh_pad1);
|
|
|
|
|
|
/*
|
|
|
* There should be more hash entries than there can be
|
|
|
@@ -658,9 +658,9 @@ zap_expand_leaf(zap_name_t *zn, zap_leaf_t *l, dmu_tx_t *tx, zap_leaf_t **lp)
|
|
|
zap_leaf_split(l, nl, zap->zap_normflags != 0);
|
|
|
|
|
|
/* set sibling pointers */
|
|
|
- for (i = 0; i < (1ULL<<prefix_diff); i++) {
|
|
|
+ for (i = 0; i < (1ULL << prefix_diff); i++) {
|
|
|
err = zap_set_idx_to_blk(zap, sibling+i, nl->l_blkid, tx);
|
|
|
- ASSERT3U(err, ==, 0); /* we checked for i/o errors above */
|
|
|
+ ASSERT0(err); /* we checked for i/o errors above */
|
|
|
}
|
|
|
|
|
|
if (hash & (1ULL << (64 - l->l_phys->l_hdr.lh_prefix_len))) {
|
|
|
diff --git a/module/zfs/zap_micro.c b/module/zfs/zap_micro.c
|
|
|
index 3d8cae0..4da7836 100644
|
|
|
--- a/module/zfs/zap_micro.c
|
|
|
+++ b/module/zfs/zap_micro.c
|
|
|
@@ -506,7 +506,7 @@ zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
|
|
|
return (mzap_upgrade(zapp, tx, 0));
|
|
|
}
|
|
|
err = dmu_object_set_blocksize(os, obj, newsz, 0, tx);
|
|
|
- ASSERT3U(err, ==, 0);
|
|
|
+ ASSERT0(err);
|
|
|
zap->zap_m.zap_num_chunks =
|
|
|
db->db_size / MZAP_ENT_LEN - 1;
|
|
|
}
|
|
|
diff --git a/module/zfs/zfs_ctldir.c b/module/zfs/zfs_ctldir.c
|
|
|
index 4fa530b..168f853 100644
|
|
|
--- a/module/zfs/zfs_ctldir.c
|
|
|
+++ b/module/zfs/zfs_ctldir.c
|
|
|
@@ -732,7 +732,11 @@ zfsctl_unmount_snapshot(zfs_sb_t *zsb, char *name, int flags)
|
|
|
sep = avl_find(&zsb->z_ctldir_snaps, &search, NULL);
|
|
|
if (sep) {
|
|
|
avl_remove(&zsb->z_ctldir_snaps, sep);
|
|
|
+ mutex_exit(&zsb->z_ctldir_lock);
|
|
|
+
|
|
|
error = __zfsctl_unmount_snapshot(sep, flags);
|
|
|
+
|
|
|
+ mutex_enter(&zsb->z_ctldir_lock);
|
|
|
if (error == EBUSY)
|
|
|
avl_add(&zsb->z_ctldir_snaps, sep);
|
|
|
else
|
|
|
@@ -767,7 +771,11 @@ zfsctl_unmount_snapshots(zfs_sb_t *zsb, int flags, int *count)
|
|
|
while (sep != NULL) {
|
|
|
next = AVL_NEXT(&zsb->z_ctldir_snaps, sep);
|
|
|
avl_remove(&zsb->z_ctldir_snaps, sep);
|
|
|
+ mutex_exit(&zsb->z_ctldir_lock);
|
|
|
+
|
|
|
error = __zfsctl_unmount_snapshot(sep, flags);
|
|
|
+
|
|
|
+ mutex_enter(&zsb->z_ctldir_lock);
|
|
|
if (error == EBUSY) {
|
|
|
avl_add(&zsb->z_ctldir_snaps, sep);
|
|
|
(*count)++;
|
|
|
diff --git a/module/zfs/zfs_debug.c b/module/zfs/zfs_debug.c
|
|
|
index e561778..ad611ac 100644
|
|
|
--- a/module/zfs/zfs_debug.c
|
|
|
+++ b/module/zfs/zfs_debug.c
|
|
|
@@ -20,6 +20,7 @@
|
|
|
*/
|
|
|
/*
|
|
|
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
|
|
|
+ * Copyright (c) 2012 by Delphix. All rights reserved.
|
|
|
*/
|
|
|
|
|
|
#include <sys/zfs_context.h>
|
|
|
diff --git a/module/zfs/zfs_dir.c b/module/zfs/zfs_dir.c
|
|
|
index 670e313..4a4969f 100644
|
|
|
--- a/module/zfs/zfs_dir.c
|
|
|
+++ b/module/zfs/zfs_dir.c
|
|
|
@@ -562,7 +562,7 @@ zfs_unlinked_drain(zfs_sb_t *zsb)
|
|
|
int error;
|
|
|
|
|
|
/*
|
|
|
- * Interate over the contents of the unlinked set.
|
|
|
+ * Iterate over the contents of the unlinked set.
|
|
|
*/
|
|
|
for (zap_cursor_init(&zc, zsb->z_os, zsb->z_unlinkedobj);
|
|
|
zap_cursor_retrieve(&zc, &zap) == 0;
|
|
|
diff --git a/module/zfs/zfs_fm.c b/module/zfs/zfs_fm.c
|
|
|
index 820291b..af2030a 100644
|
|
|
--- a/module/zfs/zfs_fm.c
|
|
|
+++ b/module/zfs/zfs_fm.c
|
|
|
@@ -250,6 +250,7 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out,
|
|
|
|
|
|
if (vd != NULL) {
|
|
|
vdev_t *pvd = vd->vdev_parent;
|
|
|
+ vdev_queue_t *vq = &vd->vdev_queue;
|
|
|
|
|
|
fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID,
|
|
|
DATA_TYPE_UINT64, vd->vdev_guid,
|
|
|
@@ -272,6 +273,15 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out,
|
|
|
FM_EREPORT_PAYLOAD_ZFS_VDEV_ASHIFT,
|
|
|
DATA_TYPE_UINT64, vd->vdev_ashift, NULL);
|
|
|
|
|
|
+ if (vq != NULL) {
|
|
|
+ fm_payload_set(ereport,
|
|
|
+ FM_EREPORT_PAYLOAD_ZFS_VDEV_COMP_TS,
|
|
|
+ DATA_TYPE_UINT64, vq->vq_io_complete_ts, NULL);
|
|
|
+ fm_payload_set(ereport,
|
|
|
+ FM_EREPORT_PAYLOAD_ZFS_VDEV_DELTA_TS,
|
|
|
+ DATA_TYPE_UINT64, vq->vq_io_delta_ts, NULL);
|
|
|
+ }
|
|
|
+
|
|
|
if (pvd != NULL) {
|
|
|
fm_payload_set(ereport,
|
|
|
FM_EREPORT_PAYLOAD_ZFS_PARENT_GUID,
|
|
|
@@ -304,6 +314,12 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out,
|
|
|
DATA_TYPE_UINT32, zio->io_pipeline, NULL);
|
|
|
fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_ZIO_DELAY,
|
|
|
DATA_TYPE_UINT64, zio->io_delay, NULL);
|
|
|
+ fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_ZIO_TIMESTAMP,
|
|
|
+ DATA_TYPE_UINT64, zio->io_timestamp, NULL);
|
|
|
+ fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_ZIO_DEADLINE,
|
|
|
+ DATA_TYPE_UINT64, zio->io_deadline, NULL);
|
|
|
+ fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_ZIO_DELTA,
|
|
|
+ DATA_TYPE_UINT64, zio->io_delta, NULL);
|
|
|
|
|
|
/*
|
|
|
* If the 'size' parameter is non-zero, it indicates this is a
|
|
|
diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c
|
|
|
index 5560b25..a9184a1 100644
|
|
|
--- a/module/zfs/zfs_ioctl.c
|
|
|
+++ b/module/zfs/zfs_ioctl.c
|
|
|
@@ -1268,9 +1268,6 @@ zfs_ioc_pool_import(zfs_cmd_t *zc)
|
|
|
error = err;
|
|
|
}
|
|
|
|
|
|
- if (error == 0)
|
|
|
- zvol_create_minors(zc->zc_name);
|
|
|
-
|
|
|
nvlist_free(config);
|
|
|
|
|
|
if (props)
|
|
|
@@ -1779,7 +1776,7 @@ zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
|
|
|
error = zvol_get_stats(os, nv);
|
|
|
if (error == EIO)
|
|
|
return (error);
|
|
|
- VERIFY3S(error, ==, 0);
|
|
|
+ VERIFY0(error);
|
|
|
}
|
|
|
if (error == 0)
|
|
|
error = put_nvlist(zc, nv);
|
|
|
@@ -5136,8 +5133,6 @@ zfsdev_state_init(struct file *filp)
|
|
|
return (ENXIO);
|
|
|
|
|
|
zs = kmem_zalloc( sizeof(zfsdev_state_t), KM_SLEEP);
|
|
|
- if (zs == NULL)
|
|
|
- return (ENOMEM);
|
|
|
|
|
|
zs->zs_file = filp;
|
|
|
zs->zs_minor = minor;
|
|
|
diff --git a/module/zfs/zfs_rlock.c b/module/zfs/zfs_rlock.c
|
|
|
index 325d5aa..136972b 100644
|
|
|
--- a/module/zfs/zfs_rlock.c
|
|
|
+++ b/module/zfs/zfs_rlock.c
|
|
|
@@ -22,6 +22,9 @@
|
|
|
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
|
|
|
* Use is subject to license terms.
|
|
|
*/
|
|
|
+/*
|
|
|
+ * Copyright (c) 2012 by Delphix. All rights reserved.
|
|
|
+ */
|
|
|
|
|
|
/*
|
|
|
* This file contains the code to implement file range locking in
|
|
|
@@ -495,9 +498,9 @@ zfs_range_unlock_reader(znode_t *zp, rl_t *remove, list_t *free_list)
|
|
|
|
|
|
list_insert_tail(free_list, remove);
|
|
|
} else {
|
|
|
- ASSERT3U(remove->r_cnt, ==, 0);
|
|
|
- ASSERT3U(remove->r_write_wanted, ==, 0);
|
|
|
- ASSERT3U(remove->r_read_wanted, ==, 0);
|
|
|
+ ASSERT0(remove->r_cnt);
|
|
|
+ ASSERT0(remove->r_write_wanted);
|
|
|
+ ASSERT0(remove->r_read_wanted);
|
|
|
/*
|
|
|
* Find start proxy representing this reader lock,
|
|
|
* then decrement ref count on all proxies
|
|
|
diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c
|
|
|
index 620e39b..9ae7ab5 100644
|
|
|
--- a/module/zfs/zfs_vfsops.c
|
|
|
+++ b/module/zfs/zfs_vfsops.c
|
|
|
@@ -233,10 +233,11 @@ zfs_register_callbacks(zfs_sb_t *zsb)
|
|
|
{
|
|
|
struct dsl_dataset *ds = NULL;
|
|
|
objset_t *os = zsb->z_os;
|
|
|
+ boolean_t do_readonly = B_FALSE;
|
|
|
int error = 0;
|
|
|
|
|
|
if (zfs_is_readonly(zsb) || !spa_writeable(dmu_objset_spa(os)))
|
|
|
- readonly_changed_cb(zsb, B_TRUE);
|
|
|
+ do_readonly = B_TRUE;
|
|
|
|
|
|
/*
|
|
|
* Register property callbacks.
|
|
|
@@ -271,6 +272,9 @@ zfs_register_callbacks(zfs_sb_t *zsb)
|
|
|
if (error)
|
|
|
goto unregister;
|
|
|
|
|
|
+ if (do_readonly)
|
|
|
+ readonly_changed_cb(zsb, B_TRUE);
|
|
|
+
|
|
|
return (0);
|
|
|
|
|
|
unregister:
|
|
|
@@ -1056,10 +1060,12 @@ zfs_sb_teardown(zfs_sb_t *zsb, boolean_t unmounting)
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
- * Drain the iput_taskq to ensure all active references to the
|
|
|
+ * If someone has not already unmounted this file system,
|
|
|
+ * drain the iput_taskq to ensure all active references to the
|
|
|
* zfs_sb_t have been handled only then can it be safely destroyed.
|
|
|
*/
|
|
|
- taskq_wait(dsl_pool_iput_taskq(dmu_objset_pool(zsb->z_os)));
|
|
|
+ if (zsb->z_os)
|
|
|
+ taskq_wait(dsl_pool_iput_taskq(dmu_objset_pool(zsb->z_os)));
|
|
|
|
|
|
/*
|
|
|
* Close the zil. NB: Can't close the zil while zfs_inactive
|
|
|
@@ -1480,10 +1486,11 @@ bail:
|
|
|
|
|
|
if (err) {
|
|
|
/*
|
|
|
- * Since we couldn't reopen zfs_sb_t, force
|
|
|
- * unmount this file system.
|
|
|
+ * Since we couldn't reopen zfs_sb_t or, setup the
|
|
|
+ * sa framework, force unmount this file system.
|
|
|
*/
|
|
|
- (void) zfs_umount(zsb->z_sb);
|
|
|
+ if (zsb->z_os)
|
|
|
+ (void) zfs_umount(zsb->z_sb);
|
|
|
}
|
|
|
return (err);
|
|
|
}
|
|
|
@@ -1537,7 +1544,7 @@ zfs_set_version(zfs_sb_t *zsb, uint64_t newvers)
|
|
|
|
|
|
error = zap_add(os, MASTER_NODE_OBJ,
|
|
|
ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
|
|
|
- ASSERT3U(error, ==, 0);
|
|
|
+ ASSERT0(error);
|
|
|
|
|
|
VERIFY(0 == sa_set_sa_object(os, sa_obj));
|
|
|
sa_register_update_callback(os, zfs_sa_upgrade);
|
|
|
diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c
|
|
|
index 8ec4db2..3c3e8db 100644
|
|
|
--- a/module/zfs/zfs_vnops.c
|
|
|
+++ b/module/zfs/zfs_vnops.c
|
|
|
@@ -20,6 +20,7 @@
|
|
|
*/
|
|
|
/*
|
|
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
|
|
+ * Copyright (c) 2012 by Delphix. All rights reserved.
|
|
|
*/
|
|
|
|
|
|
/* Portions Copyright 2007 Jeremy Teo */
|
|
|
@@ -239,6 +240,68 @@ zfs_close(struct inode *ip, int flag, cred_t *cr)
|
|
|
}
|
|
|
EXPORT_SYMBOL(zfs_close);
|
|
|
|
|
|
+#if defined(SEEK_HOLE) && defined(SEEK_DATA)
|
|
|
+/*
|
|
|
+ * Lseek support for finding holes (cmd == SEEK_HOLE) and
|
|
|
+ * data (cmd == SEEK_DATA). "off" is an in/out parameter.
|
|
|
+ */
|
|
|
+static int
|
|
|
+zfs_holey_common(struct inode *ip, int cmd, loff_t *off)
|
|
|
+{
|
|
|
+ znode_t *zp = ITOZ(ip);
|
|
|
+ uint64_t noff = (uint64_t)*off; /* new offset */
|
|
|
+ uint64_t file_sz;
|
|
|
+ int error;
|
|
|
+ boolean_t hole;
|
|
|
+
|
|
|
+ file_sz = zp->z_size;
|
|
|
+ if (noff >= file_sz) {
|
|
|
+ return (ENXIO);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (cmd == SEEK_HOLE)
|
|
|
+ hole = B_TRUE;
|
|
|
+ else
|
|
|
+ hole = B_FALSE;
|
|
|
+
|
|
|
+ error = dmu_offset_next(ZTOZSB(zp)->z_os, zp->z_id, hole, &noff);
|
|
|
+
|
|
|
+ /* end of file? */
|
|
|
+ if ((error == ESRCH) || (noff > file_sz)) {
|
|
|
+ /*
|
|
|
+ * Handle the virtual hole at the end of file.
|
|
|
+ */
|
|
|
+ if (hole) {
|
|
|
+ *off = file_sz;
|
|
|
+ return (0);
|
|
|
+ }
|
|
|
+ return (ENXIO);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (noff < *off)
|
|
|
+ return (error);
|
|
|
+ *off = noff;
|
|
|
+ return (error);
|
|
|
+}
|
|
|
+
|
|
|
+int
|
|
|
+zfs_holey(struct inode *ip, int cmd, loff_t *off)
|
|
|
+{
|
|
|
+ znode_t *zp = ITOZ(ip);
|
|
|
+ zfs_sb_t *zsb = ITOZSB(ip);
|
|
|
+ int error;
|
|
|
+
|
|
|
+ ZFS_ENTER(zsb);
|
|
|
+ ZFS_VERIFY_ZP(zp);
|
|
|
+
|
|
|
+ error = zfs_holey_common(ip, cmd, off);
|
|
|
+
|
|
|
+ ZFS_EXIT(zsb);
|
|
|
+ return (error);
|
|
|
+}
|
|
|
+EXPORT_SYMBOL(zfs_holey);
|
|
|
+#endif /* SEEK_HOLE && SEEK_DATA */
|
|
|
+
|
|
|
#if defined(_KERNEL)
|
|
|
/*
|
|
|
* When a file is memory mapped, we must keep the IO data synchronized
|
|
|
@@ -1523,7 +1586,7 @@ top:
|
|
|
&xattr_obj, sizeof (xattr_obj));
|
|
|
if (error == 0 && xattr_obj) {
|
|
|
error = zfs_zget(zsb, xattr_obj, &xzp);
|
|
|
- ASSERT3U(error, ==, 0);
|
|
|
+ ASSERT0(error);
|
|
|
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
|
|
|
dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
|
|
|
}
|
|
|
@@ -2837,7 +2900,7 @@ top:
|
|
|
zp->z_mode = new_mode;
|
|
|
ASSERT3P(aclp, !=, NULL);
|
|
|
err = zfs_aclset_common(zp, aclp, cr, tx);
|
|
|
- ASSERT3U(err, ==, 0);
|
|
|
+ ASSERT0(err);
|
|
|
if (zp->z_acl_cached)
|
|
|
zfs_acl_free(zp->z_acl_cached);
|
|
|
zp->z_acl_cached = aclp;
|
|
|
@@ -3346,7 +3409,7 @@ top:
|
|
|
|
|
|
error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zsb),
|
|
|
(void *)&szp->z_pflags, sizeof (uint64_t), tx);
|
|
|
- ASSERT3U(error, ==, 0);
|
|
|
+ ASSERT0(error);
|
|
|
|
|
|
error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL);
|
|
|
if (error == 0) {
|
|
|
diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c
|
|
|
index 9bf26a7..5922d3a 100644
|
|
|
--- a/module/zfs/zfs_znode.c
|
|
|
+++ b/module/zfs/zfs_znode.c
|
|
|
@@ -20,6 +20,7 @@
|
|
|
*/
|
|
|
/*
|
|
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
|
|
+ * Copyright (c) 2012 by Delphix. All rights reserved.
|
|
|
*/
|
|
|
|
|
|
/* Portions Copyright 2007 Jeremy Teo */
|
|
|
@@ -555,7 +556,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
|
|
|
err = zap_create_claim_norm(zsb->z_os, obj,
|
|
|
zsb->z_norm, DMU_OT_DIRECTORY_CONTENTS,
|
|
|
obj_type, bonuslen, tx);
|
|
|
- ASSERT3U(err, ==, 0);
|
|
|
+ ASSERT0(err);
|
|
|
} else {
|
|
|
obj = zap_create_norm(zsb->z_os,
|
|
|
zsb->z_norm, DMU_OT_DIRECTORY_CONTENTS,
|
|
|
@@ -566,7 +567,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
|
|
|
err = dmu_object_claim(zsb->z_os, obj,
|
|
|
DMU_OT_PLAIN_FILE_CONTENTS, 0,
|
|
|
obj_type, bonuslen, tx);
|
|
|
- ASSERT3U(err, ==, 0);
|
|
|
+ ASSERT0(err);
|
|
|
} else {
|
|
|
obj = dmu_object_alloc(zsb->z_os,
|
|
|
DMU_OT_PLAIN_FILE_CONTENTS, 0,
|
|
|
@@ -746,7 +747,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
|
|
|
if (obj_type == DMU_OT_ZNODE ||
|
|
|
acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) {
|
|
|
err = zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx);
|
|
|
- ASSERT3S(err, ==, 0);
|
|
|
+ ASSERT0(err);
|
|
|
}
|
|
|
kmem_free(sa_attrs, sizeof(sa_bulk_attr_t) * ZPL_END);
|
|
|
ZFS_OBJ_HOLD_EXIT(zsb, obj);
|
|
|
@@ -1170,7 +1171,7 @@ zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx)
|
|
|
|
|
|
if (error == ENOTSUP)
|
|
|
return;
|
|
|
- ASSERT3U(error, ==, 0);
|
|
|
+ ASSERT0(error);
|
|
|
|
|
|
/* What blocksize did we actually get? */
|
|
|
dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &zp->z_blksz, &dummy);
|
|
|
diff --git a/module/zfs/zil.c b/module/zfs/zil.c
|
|
|
index 99d932f..c179693 100644
|
|
|
--- a/module/zfs/zil.c
|
|
|
+++ b/module/zfs/zil.c
|
|
|
@@ -212,7 +212,7 @@ zil_read_log_block(zilog_t *zilog, const blkptr_t *bp, blkptr_t *nbp, void *dst,
|
|
|
SET_BOOKMARK(&zb, bp->blk_cksum.zc_word[ZIL_ZC_OBJSET],
|
|
|
ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, bp->blk_cksum.zc_word[ZIL_ZC_SEQ]);
|
|
|
|
|
|
- error = dsl_read_nolock(NULL, zilog->zl_spa, bp, arc_getbuf_func, &abuf,
|
|
|
+ error = arc_read(NULL, zilog->zl_spa, bp, arc_getbuf_func, &abuf,
|
|
|
ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb);
|
|
|
|
|
|
if (error == 0) {
|
|
|
@@ -288,7 +288,7 @@ zil_read_log_data(zilog_t *zilog, const lr_write_t *lr, void *wbuf)
|
|
|
SET_BOOKMARK(&zb, dmu_objset_id(zilog->zl_os), lr->lr_foid,
|
|
|
ZB_ZIL_LEVEL, lr->lr_offset / BP_GET_LSIZE(bp));
|
|
|
|
|
|
- error = arc_read_nolock(NULL, zilog->zl_spa, bp, arc_getbuf_func, &abuf,
|
|
|
+ error = arc_read(NULL, zilog->zl_spa, bp, arc_getbuf_func, &abuf,
|
|
|
ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb);
|
|
|
|
|
|
if (error == 0) {
|
|
|
@@ -1163,7 +1163,7 @@ zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb)
|
|
|
lwb->lwb_nused += reclen + dlen;
|
|
|
lwb->lwb_max_txg = MAX(lwb->lwb_max_txg, txg);
|
|
|
ASSERT3U(lwb->lwb_nused, <=, lwb->lwb_sz);
|
|
|
- ASSERT3U(P2PHASE(lwb->lwb_nused, sizeof (uint64_t)), ==, 0);
|
|
|
+ ASSERT0(P2PHASE(lwb->lwb_nused, sizeof (uint64_t)));
|
|
|
|
|
|
return (lwb);
|
|
|
}
|
|
|
diff --git a/module/zfs/zio.c b/module/zfs/zio.c
|
|
|
index 0622553..1acb801 100644
|
|
|
--- a/module/zfs/zio.c
|
|
|
+++ b/module/zfs/zio.c
|
|
|
@@ -85,6 +85,22 @@ extern vmem_t *zio_alloc_arena;
|
|
|
extern int zfs_mg_alloc_failures;
|
|
|
|
|
|
/*
|
|
|
+ * The following actions directly effect the spa's sync-to-convergence logic.
|
|
|
+ * The values below define the sync pass when we start performing the action.
|
|
|
+ * Care should be taken when changing these values as they directly impact
|
|
|
+ * spa_sync() performance. Tuning these values may introduce subtle performance
|
|
|
+ * pathologies and should only be done in the context of performance analysis.
|
|
|
+ * These tunables will eventually be removed and replaced with #defines once
|
|
|
+ * enough analysis has been done to determine optimal values.
|
|
|
+ *
|
|
|
+ * The 'zfs_sync_pass_deferred_free' pass must be greater than 1 to ensure that
|
|
|
+ * regular blocks are not deferred.
|
|
|
+ */
|
|
|
+int zfs_sync_pass_deferred_free = 2; /* defer frees starting in this pass */
|
|
|
+int zfs_sync_pass_dont_compress = 5; /* don't compress starting in this pass */
|
|
|
+int zfs_sync_pass_rewrite = 2; /* rewrite new bps starting in this pass */
|
|
|
+
|
|
|
+/*
|
|
|
* An allocating zio is one that either currently has the DVA allocate
|
|
|
* stage set or will have it later in its lifetime.
|
|
|
*/
|
|
|
@@ -609,6 +625,9 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
|
|
|
zio->io_vsd_ops = NULL;
|
|
|
zio->io_offset = offset;
|
|
|
zio->io_deadline = 0;
|
|
|
+ zio->io_timestamp = 0;
|
|
|
+ zio->io_delta = 0;
|
|
|
+ zio->io_delay = 0;
|
|
|
zio->io_orig_data = zio->io_data = data;
|
|
|
zio->io_orig_size = zio->io_size = size;
|
|
|
zio->io_orig_flags = zio->io_flags = flags;
|
|
|
@@ -620,7 +639,6 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
|
|
|
zio->io_bp_override = NULL;
|
|
|
zio->io_walk_link = NULL;
|
|
|
zio->io_transform_stack = NULL;
|
|
|
- zio->io_delay = 0;
|
|
|
zio->io_error = 0;
|
|
|
zio->io_child_count = 0;
|
|
|
zio->io_parent_count = 0;
|
|
|
@@ -769,7 +787,9 @@ zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
|
|
|
|
|
|
ASSERT(!BP_IS_HOLE(bp));
|
|
|
ASSERT(spa_syncing_txg(spa) == txg);
|
|
|
- ASSERT(spa_sync_pass(spa) <= SYNC_PASS_DEFERRED_FREE);
|
|
|
+ ASSERT(spa_sync_pass(spa) < zfs_sync_pass_deferred_free);
|
|
|
+
|
|
|
+ arc_freed(spa, bp);
|
|
|
|
|
|
zio = zio_create(pio, spa, txg, bp, NULL, BP_GET_PSIZE(bp),
|
|
|
NULL, NULL, ZIO_TYPE_FREE, ZIO_PRIORITY_FREE, flags,
|
|
|
@@ -1066,7 +1086,7 @@ zio_write_bp_init(zio_t *zio)
|
|
|
ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL);
|
|
|
ASSERT(!BP_GET_DEDUP(bp));
|
|
|
|
|
|
- if (pass > SYNC_PASS_DONT_COMPRESS)
|
|
|
+ if (pass >= zfs_sync_pass_dont_compress)
|
|
|
compress = ZIO_COMPRESS_OFF;
|
|
|
|
|
|
/* Make sure someone doesn't change their mind on overwrites */
|
|
|
@@ -1095,7 +1115,7 @@ zio_write_bp_init(zio_t *zio)
|
|
|
* There should only be a handful of blocks after pass 1 in any case.
|
|
|
*/
|
|
|
if (bp->blk_birth == zio->io_txg && BP_GET_PSIZE(bp) == psize &&
|
|
|
- pass > SYNC_PASS_REWRITE) {
|
|
|
+ pass >= zfs_sync_pass_rewrite) {
|
|
|
enum zio_stage gang_stages = zio->io_pipeline & ZIO_GANG_STAGES;
|
|
|
ASSERT(psize != 0);
|
|
|
zio->io_pipeline = ZIO_REWRITE_PIPELINE | gang_stages;
|
|
|
@@ -1147,7 +1167,7 @@ zio_free_bp_init(zio_t *zio)
|
|
|
*/
|
|
|
|
|
|
static void
|
|
|
-zio_taskq_dispatch(zio_t *zio, enum zio_taskq_type q, boolean_t cutinline)
|
|
|
+zio_taskq_dispatch(zio_t *zio, zio_taskq_type_t q, boolean_t cutinline)
|
|
|
{
|
|
|
spa_t *spa = zio->io_spa;
|
|
|
zio_type_t t = zio->io_type;
|
|
|
@@ -1168,10 +1188,11 @@ zio_taskq_dispatch(zio_t *zio, enum zio_taskq_type q, boolean_t cutinline)
|
|
|
t = ZIO_TYPE_NULL;
|
|
|
|
|
|
/*
|
|
|
- * If this is a high priority I/O, then use the high priority taskq.
|
|
|
+ * If this is a high priority I/O, then use the high priority taskq if
|
|
|
+ * available.
|
|
|
*/
|
|
|
if (zio->io_priority == ZIO_PRIORITY_NOW &&
|
|
|
- spa->spa_zio_taskq[t][q + 1] != NULL)
|
|
|
+ spa->spa_zio_taskq[t][q + 1].stqs_count != 0)
|
|
|
q++;
|
|
|
|
|
|
ASSERT3U(q, <, ZIO_TASKQ_TYPES);
|
|
|
@@ -1182,20 +1203,25 @@ zio_taskq_dispatch(zio_t *zio, enum zio_taskq_type q, boolean_t cutinline)
|
|
|
* to dispatch the zio to another taskq at the same time.
|
|
|
*/
|
|
|
ASSERT(taskq_empty_ent(&zio->io_tqent));
|
|
|
- taskq_dispatch_ent(spa->spa_zio_taskq[t][q],
|
|
|
- (task_func_t *)zio_execute, zio, flags, &zio->io_tqent);
|
|
|
+ spa_taskq_dispatch_ent(spa, t, q, (task_func_t *)zio_execute, zio,
|
|
|
+ flags, &zio->io_tqent);
|
|
|
}
|
|
|
|
|
|
static boolean_t
|
|
|
-zio_taskq_member(zio_t *zio, enum zio_taskq_type q)
|
|
|
+zio_taskq_member(zio_t *zio, zio_taskq_type_t q)
|
|
|
{
|
|
|
kthread_t *executor = zio->io_executor;
|
|
|
spa_t *spa = zio->io_spa;
|
|
|
zio_type_t t;
|
|
|
|
|
|
- for (t = 0; t < ZIO_TYPES; t++)
|
|
|
- if (taskq_member(spa->spa_zio_taskq[t][q], executor))
|
|
|
- return (B_TRUE);
|
|
|
+ for (t = 0; t < ZIO_TYPES; t++) {
|
|
|
+ spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
|
|
|
+ uint_t i;
|
|
|
+ for (i = 0; i < tqs->stqs_count; i++) {
|
|
|
+ if (taskq_member(tqs->stqs_taskq[i], executor))
|
|
|
+ return (B_TRUE);
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
return (B_FALSE);
|
|
|
}
|
|
|
@@ -1222,7 +1248,7 @@ zio_interrupt(zio_t *zio)
|
|
|
* vdev-level caching or aggregation; (5) the I/O is deferred
|
|
|
* due to vdev-level queueing; (6) the I/O is handed off to
|
|
|
* another thread. In all cases, the pipeline stops whenever
|
|
|
- * there's no CPU work; it never burns a thread in cv_wait().
|
|
|
+ * there's no CPU work; it never burns a thread in cv_wait_io().
|
|
|
*
|
|
|
* There's no locking on io_stage because there's no legitimate way
|
|
|
* for multiple threads to be attempting to process the same I/O.
|
|
|
@@ -1422,6 +1448,9 @@ zio_suspend(spa_t *spa, zio_t *zio)
|
|
|
"failure and the failure mode property for this pool "
|
|
|
"is set to panic.", spa_name(spa));
|
|
|
|
|
|
+ cmn_err(CE_WARN, "Pool '%s' has encountered an uncorrectable I/O "
|
|
|
+ "failure and has been suspended.\n", spa_name(spa));
|
|
|
+
|
|
|
zfs_ereport_post(FM_EREPORT_ZFS_IO_FAILURE, spa, NULL, NULL, 0, 0);
|
|
|
|
|
|
mutex_enter(&spa->spa_suspend_lock);
|
|
|
@@ -2025,7 +2054,7 @@ zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde)
|
|
|
|
|
|
ddt_exit(ddt);
|
|
|
|
|
|
- error = arc_read_nolock(NULL, spa, &blk,
|
|
|
+ error = arc_read(NULL, spa, &blk,
|
|
|
arc_getbuf_func, &abuf, ZIO_PRIORITY_SYNC_READ,
|
|
|
ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
|
|
|
&aflags, &zio->io_bookmark);
|
|
|
@@ -2279,7 +2308,7 @@ zio_dva_allocate(zio_t *zio)
|
|
|
}
|
|
|
|
|
|
ASSERT(BP_IS_HOLE(bp));
|
|
|
- ASSERT3U(BP_GET_NDVAS(bp), ==, 0);
|
|
|
+ ASSERT0(BP_GET_NDVAS(bp));
|
|
|
ASSERT3U(zio->io_prop.zp_copies, >, 0);
|
|
|
ASSERT3U(zio->io_prop.zp_copies, <=, spa_max_replication(spa));
|
|
|
ASSERT3U(zio->io_size, ==, BP_GET_PSIZE(bp));
|
|
|
@@ -2906,11 +2935,11 @@ zio_done(zio_t *zio)
|
|
|
vdev_stat_update(zio, zio->io_size);
|
|
|
|
|
|
/*
|
|
|
- * If this I/O is attached to a particular vdev is slow, exeeding
|
|
|
+ * If this I/O is attached to a particular vdev is slow, exceeding
|
|
|
* 30 seconds to complete, post an error described the I/O delay.
|
|
|
* We ignore these errors if the device is currently unavailable.
|
|
|
*/
|
|
|
- if (zio->io_delay >= zio_delay_max) {
|
|
|
+ if (zio->io_delay >= MSEC_TO_TICK(zio_delay_max)) {
|
|
|
if (zio->io_vd != NULL && !vdev_is_dead(zio->io_vd))
|
|
|
zfs_ereport_post(FM_EREPORT_ZFS_DELAY, zio->io_spa,
|
|
|
zio->io_vd, zio, 0, 0);
|
|
|
@@ -3060,8 +3089,8 @@ zio_done(zio_t *zio)
|
|
|
* Hand it off to the otherwise-unused claim taskq.
|
|
|
*/
|
|
|
ASSERT(taskq_empty_ent(&zio->io_tqent));
|
|
|
- (void) taskq_dispatch_ent(
|
|
|
- zio->io_spa->spa_zio_taskq[ZIO_TYPE_CLAIM][ZIO_TASKQ_ISSUE],
|
|
|
+ spa_taskq_dispatch_ent(zio->io_spa,
|
|
|
+ ZIO_TYPE_CLAIM, ZIO_TASKQ_ISSUE,
|
|
|
(task_func_t *)zio_reexecute, zio, 0,
|
|
|
&zio->io_tqent);
|
|
|
}
|
|
|
@@ -3210,4 +3239,16 @@ MODULE_PARM_DESC(zio_delay_max, "Max zio millisec delay before posting event");
|
|
|
|
|
|
module_param(zio_requeue_io_start_cut_in_line, int, 0644);
|
|
|
MODULE_PARM_DESC(zio_requeue_io_start_cut_in_line, "Prioritize requeued I/O");
|
|
|
+
|
|
|
+module_param(zfs_sync_pass_deferred_free, int, 0644);
|
|
|
+MODULE_PARM_DESC(zfs_sync_pass_deferred_free,
|
|
|
+ "defer frees starting in this pass");
|
|
|
+
|
|
|
+module_param(zfs_sync_pass_dont_compress, int, 0644);
|
|
|
+MODULE_PARM_DESC(zfs_sync_pass_dont_compress,
|
|
|
+ "don't compress starting in this pass");
|
|
|
+
|
|
|
+module_param(zfs_sync_pass_rewrite, int, 0644);
|
|
|
+MODULE_PARM_DESC(zfs_sync_pass_rewrite,
|
|
|
+ "rewrite new bps starting in this pass");
|
|
|
#endif
|
|
|
diff --git a/module/zfs/zio_inject.c b/module/zfs/zio_inject.c
|
|
|
index 293f267..eb589c4 100644
|
|
|
--- a/module/zfs/zio_inject.c
|
|
|
+++ b/module/zfs/zio_inject.c
|
|
|
@@ -20,6 +20,7 @@
|
|
|
*/
|
|
|
/*
|
|
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
|
|
+ * Copyright (c) 2012 by Delphix. All rights reserved.
|
|
|
*/
|
|
|
|
|
|
/*
|
|
|
@@ -147,14 +148,8 @@ zio_handle_fault_injection(zio_t *zio, int error)
|
|
|
for (handler = list_head(&inject_handlers); handler != NULL;
|
|
|
handler = list_next(&inject_handlers, handler)) {
|
|
|
|
|
|
- /* Ignore errors not destined for this pool */
|
|
|
- if (zio->io_spa != handler->zi_spa)
|
|
|
- continue;
|
|
|
-
|
|
|
- /* Ignore device errors and panic injection */
|
|
|
- if (handler->zi_record.zi_guid != 0 ||
|
|
|
- handler->zi_record.zi_func[0] != '\0' ||
|
|
|
- handler->zi_record.zi_duration != 0)
|
|
|
+ if (zio->io_spa != handler->zi_spa ||
|
|
|
+ handler->zi_record.zi_cmd != ZINJECT_DATA_FAULT)
|
|
|
continue;
|
|
|
|
|
|
/* If this handler matches, return EIO */
|
|
|
@@ -197,10 +192,7 @@ zio_handle_label_injection(zio_t *zio, int error)
|
|
|
uint64_t start = handler->zi_record.zi_start;
|
|
|
uint64_t end = handler->zi_record.zi_end;
|
|
|
|
|
|
- /* Ignore device only faults or panic injection */
|
|
|
- if (handler->zi_record.zi_start == 0 ||
|
|
|
- handler->zi_record.zi_func[0] != '\0' ||
|
|
|
- handler->zi_record.zi_duration != 0)
|
|
|
+ if (handler->zi_record.zi_cmd != ZINJECT_LABEL_FAULT)
|
|
|
continue;
|
|
|
|
|
|
/*
|
|
|
@@ -246,13 +238,7 @@ zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error)
|
|
|
for (handler = list_head(&inject_handlers); handler != NULL;
|
|
|
handler = list_next(&inject_handlers, handler)) {
|
|
|
|
|
|
- /*
|
|
|
- * Ignore label specific faults, panic injection
|
|
|
- * or fake writes
|
|
|
- */
|
|
|
- if (handler->zi_record.zi_start != 0 ||
|
|
|
- handler->zi_record.zi_func[0] != '\0' ||
|
|
|
- handler->zi_record.zi_duration != 0)
|
|
|
+ if (handler->zi_record.zi_cmd != ZINJECT_DEVICE_FAULT)
|
|
|
continue;
|
|
|
|
|
|
if (vd->vdev_guid == handler->zi_record.zi_guid) {
|
|
|
@@ -316,10 +302,8 @@ zio_handle_ignored_writes(zio_t *zio)
|
|
|
handler = list_next(&inject_handlers, handler)) {
|
|
|
|
|
|
/* Ignore errors not destined for this pool */
|
|
|
- if (zio->io_spa != handler->zi_spa)
|
|
|
- continue;
|
|
|
-
|
|
|
- if (handler->zi_record.zi_duration == 0)
|
|
|
+ if (zio->io_spa != handler->zi_spa ||
|
|
|
+ handler->zi_record.zi_cmd != ZINJECT_IGNORED_WRITES)
|
|
|
continue;
|
|
|
|
|
|
/*
|
|
|
@@ -355,11 +339,8 @@ spa_handle_ignored_writes(spa_t *spa)
|
|
|
for (handler = list_head(&inject_handlers); handler != NULL;
|
|
|
handler = list_next(&inject_handlers, handler)) {
|
|
|
|
|
|
- /* Ignore errors not destined for this pool */
|
|
|
- if (spa != handler->zi_spa)
|
|
|
- continue;
|
|
|
-
|
|
|
- if (handler->zi_record.zi_duration == 0)
|
|
|
+ if (spa != handler->zi_spa ||
|
|
|
+ handler->zi_record.zi_cmd != ZINJECT_IGNORED_WRITES)
|
|
|
continue;
|
|
|
|
|
|
if (handler->zi_record.zi_duration > 0) {
|
|
|
@@ -379,6 +360,34 @@ spa_handle_ignored_writes(spa_t *spa)
|
|
|
rw_exit(&inject_lock);
|
|
|
}
|
|
|
|
|
|
+uint64_t
|
|
|
+zio_handle_io_delay(zio_t *zio)
|
|
|
+{
|
|
|
+ vdev_t *vd = zio->io_vd;
|
|
|
+ inject_handler_t *handler;
|
|
|
+ uint64_t seconds = 0;
|
|
|
+
|
|
|
+ if (zio_injection_enabled == 0)
|
|
|
+ return (0);
|
|
|
+
|
|
|
+ rw_enter(&inject_lock, RW_READER);
|
|
|
+
|
|
|
+ for (handler = list_head(&inject_handlers); handler != NULL;
|
|
|
+ handler = list_next(&inject_handlers, handler)) {
|
|
|
+
|
|
|
+ if (handler->zi_record.zi_cmd != ZINJECT_DELAY_IO)
|
|
|
+ continue;
|
|
|
+
|
|
|
+ if (vd->vdev_guid == handler->zi_record.zi_guid) {
|
|
|
+ seconds = handler->zi_record.zi_timer;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ }
|
|
|
+ rw_exit(&inject_lock);
|
|
|
+ return (seconds);
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* Create a new handler for the given record. We add it to the list, adding
|
|
|
* a reference to the spa_t in the process. We increment zio_injection_enabled,
|
|
|
diff --git a/module/zfs/zpl_file.c b/module/zfs/zpl_file.c
|
|
|
index 9c27b7f..ebae6bf 100644
|
|
|
--- a/module/zfs/zpl_file.c
|
|
|
+++ b/module/zfs/zpl_file.c
|
|
|
@@ -235,6 +235,28 @@ zpl_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
|
|
|
return (wrote);
|
|
|
}
|
|
|
|
|
|
+static loff_t
|
|
|
+zpl_llseek(struct file *filp, loff_t offset, int whence)
|
|
|
+{
|
|
|
+#if defined(SEEK_HOLE) && defined(SEEK_DATA)
|
|
|
+ if (whence == SEEK_DATA || whence == SEEK_HOLE) {
|
|
|
+ struct inode *ip = filp->f_mapping->host;
|
|
|
+ loff_t maxbytes = ip->i_sb->s_maxbytes;
|
|
|
+ loff_t error;
|
|
|
+
|
|
|
+ spl_inode_lock(ip);
|
|
|
+ error = -zfs_holey(ip, whence, &offset);
|
|
|
+ if (error == 0)
|
|
|
+ error = lseek_execute(filp, ip, offset, maxbytes);
|
|
|
+ spl_inode_unlock(ip);
|
|
|
+
|
|
|
+ return (error);
|
|
|
+ }
|
|
|
+#endif /* SEEK_HOLE && SEEK_DATA */
|
|
|
+
|
|
|
+ return generic_file_llseek(filp, offset, whence);
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* It's worth taking a moment to describe how mmap is implemented
|
|
|
* for zfs because it differs considerably from other Linux filesystems.
|
|
|
@@ -433,6 +455,27 @@ zpl_fallocate(struct file *filp, int mode, loff_t offset, loff_t len)
|
|
|
}
|
|
|
#endif /* HAVE_FILE_FALLOCATE */
|
|
|
|
|
|
+static long
|
|
|
+zpl_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
|
|
+{
|
|
|
+ switch (cmd) {
|
|
|
+ case ZFS_IOC_GETFLAGS:
|
|
|
+ case ZFS_IOC_SETFLAGS:
|
|
|
+ return (-EOPNOTSUPP);
|
|
|
+ default:
|
|
|
+ return (-ENOTTY);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+#ifdef CONFIG_COMPAT
|
|
|
+static long
|
|
|
+zpl_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
|
|
+{
|
|
|
+ return zpl_ioctl(filp, cmd, arg);
|
|
|
+}
|
|
|
+#endif /* CONFIG_COMPAT */
|
|
|
+
|
|
|
+
|
|
|
const struct address_space_operations zpl_address_space_operations = {
|
|
|
.readpages = zpl_readpages,
|
|
|
.readpage = zpl_readpage,
|
|
|
@@ -443,15 +486,18 @@ const struct address_space_operations zpl_address_space_operations = {
|
|
|
const struct file_operations zpl_file_operations = {
|
|
|
.open = zpl_open,
|
|
|
.release = zpl_release,
|
|
|
- .llseek = generic_file_llseek,
|
|
|
+ .llseek = zpl_llseek,
|
|
|
.read = zpl_read,
|
|
|
.write = zpl_write,
|
|
|
- .readdir = zpl_readdir,
|
|
|
.mmap = zpl_mmap,
|
|
|
.fsync = zpl_fsync,
|
|
|
#ifdef HAVE_FILE_FALLOCATE
|
|
|
.fallocate = zpl_fallocate,
|
|
|
#endif /* HAVE_FILE_FALLOCATE */
|
|
|
+ .unlocked_ioctl = zpl_ioctl,
|
|
|
+#ifdef CONFIG_COMPAT
|
|
|
+ .compat_ioctl = zpl_compat_ioctl,
|
|
|
+#endif
|
|
|
};
|
|
|
|
|
|
const struct file_operations zpl_dir_file_operations = {
|
|
|
@@ -459,4 +505,8 @@ const struct file_operations zpl_dir_file_operations = {
|
|
|
.read = generic_read_dir,
|
|
|
.readdir = zpl_readdir,
|
|
|
.fsync = zpl_fsync,
|
|
|
+ .unlocked_ioctl = zpl_ioctl,
|
|
|
+#ifdef CONFIG_COMPAT
|
|
|
+ .compat_ioctl = zpl_compat_ioctl,
|
|
|
+#endif
|
|
|
};
|
|
|
diff --git a/module/zfs/zpl_super.c b/module/zfs/zpl_super.c
|
|
|
index d4d4e1b..eee4a50 100644
|
|
|
--- a/module/zfs/zpl_super.c
|
|
|
+++ b/module/zfs/zpl_super.c
|
|
|
@@ -240,6 +240,10 @@ zpl_kill_sb(struct super_block *sb)
|
|
|
{
|
|
|
zfs_preumount(sb);
|
|
|
kill_anon_super(sb);
|
|
|
+
|
|
|
+#ifdef HAVE_S_INSTANCES_LIST_HEAD
|
|
|
+ sb->s_instances.next = &(zpl_fs_type.fs_supers);
|
|
|
+#endif /* HAVE_S_INSTANCES_LIST_HEAD */
|
|
|
}
|
|
|
|
|
|
#ifdef HAVE_SHRINK
|
|
|
diff --git a/module/zfs/zpl_xattr.c b/module/zfs/zpl_xattr.c
|
|
|
index c03764f..eb2c00d 100644
|
|
|
--- a/module/zfs/zpl_xattr.c
|
|
|
+++ b/module/zfs/zpl_xattr.c
|
|
|
@@ -225,6 +225,11 @@ zpl_xattr_get_dir(struct inode *ip, const char *name, void *value,
|
|
|
goto out;
|
|
|
}
|
|
|
|
|
|
+ if (size < i_size_read(xip)) {
|
|
|
+ error = -ERANGE;
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
+
|
|
|
error = zpl_read_common(xip, value, size, 0, UIO_SYSSPACE, 0, cr);
|
|
|
out:
|
|
|
if (xip)
|
|
|
@@ -263,9 +268,12 @@ zpl_xattr_get_sa(struct inode *ip, const char *name, void *value, size_t size)
|
|
|
if (!size)
|
|
|
return (nv_size);
|
|
|
|
|
|
- memcpy(value, nv_value, MIN(size, nv_size));
|
|
|
+ if (size < nv_size)
|
|
|
+ return (-ERANGE);
|
|
|
+
|
|
|
+ memcpy(value, nv_value, nv_size);
|
|
|
|
|
|
- return (MIN(size, nv_size));
|
|
|
+ return (nv_size);
|
|
|
}
|
|
|
|
|
|
static int
|
|
|
@@ -280,7 +288,7 @@ __zpl_xattr_get(struct inode *ip, const char *name, void *value, size_t size,
|
|
|
|
|
|
if (zsb->z_use_sa && zp->z_is_sa) {
|
|
|
error = zpl_xattr_get_sa(ip, name, value, size);
|
|
|
- if (error >= 0)
|
|
|
+ if (error != -ENOENT)
|
|
|
goto out;
|
|
|
}
|
|
|
|
|
|
diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c
|
|
|
index b41eeb2..b516156 100644
|
|
|
--- a/module/zfs/zvol.c
|
|
|
+++ b/module/zfs/zvol.c
|
|
|
@@ -1024,7 +1024,11 @@ out_mutex:
|
|
|
return (error);
|
|
|
}
|
|
|
|
|
|
+#ifdef HAVE_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID
|
|
|
+static void
|
|
|
+#else
|
|
|
static int
|
|
|
+#endif
|
|
|
zvol_release(struct gendisk *disk, fmode_t mode)
|
|
|
{
|
|
|
zvol_state_t *zv = disk->private_data;
|
|
|
@@ -1044,7 +1048,9 @@ zvol_release(struct gendisk *disk, fmode_t mode)
|
|
|
if (drop_mutex)
|
|
|
mutex_exit(&zvol_state_lock);
|
|
|
|
|
|
+#ifndef HAVE_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID
|
|
|
return (0);
|
|
|
+#endif
|
|
|
}
|
|
|
|
|
|
static int
|
|
|
@@ -1214,8 +1220,9 @@ zvol_alloc(dev_t dev, const char *name)
|
|
|
int error = 0;
|
|
|
|
|
|
zv = kmem_zalloc(sizeof (zvol_state_t), KM_SLEEP);
|
|
|
- if (zv == NULL)
|
|
|
- goto out;
|
|
|
+
|
|
|
+ spin_lock_init(&zv->zv_lock);
|
|
|
+ list_link_init(&zv->zv_next);
|
|
|
|
|
|
zv->zv_queue = blk_init_queue(zvol_request, &zv->zv_lock);
|
|
|
if (zv->zv_queue == NULL)
|
|
|
@@ -1250,9 +1257,6 @@ zvol_alloc(dev_t dev, const char *name)
|
|
|
sizeof (rl_t), offsetof(rl_t, r_node));
|
|
|
zv->zv_znode.z_is_zvol = TRUE;
|
|
|
|
|
|
- spin_lock_init(&zv->zv_lock);
|
|
|
- list_link_init(&zv->zv_next);
|
|
|
-
|
|
|
zv->zv_disk->major = zvol_major;
|
|
|
zv->zv_disk->first_minor = (dev & MINORMASK);
|
|
|
zv->zv_disk->fops = &zvol_ops;
|
|
|
@@ -1267,7 +1271,7 @@ out_queue:
|
|
|
blk_cleanup_queue(zv->zv_queue);
|
|
|
out_kmem:
|
|
|
kmem_free(zv, sizeof (zvol_state_t));
|
|
|
-out:
|
|
|
+
|
|
|
return NULL;
|
|
|
}
|
|
|
|
|
|
@@ -1563,30 +1567,36 @@ zvol_init(void)
|
|
|
{
|
|
|
int error;
|
|
|
|
|
|
+ list_create(&zvol_state_list, sizeof (zvol_state_t),
|
|
|
+ offsetof(zvol_state_t, zv_next));
|
|
|
+ mutex_init(&zvol_state_lock, NULL, MUTEX_DEFAULT, NULL);
|
|
|
+
|
|
|
zvol_taskq = taskq_create(ZVOL_DRIVER, zvol_threads, maxclsyspri,
|
|
|
zvol_threads, INT_MAX, TASKQ_PREPOPULATE);
|
|
|
if (zvol_taskq == NULL) {
|
|
|
printk(KERN_INFO "ZFS: taskq_create() failed\n");
|
|
|
- return (-ENOMEM);
|
|
|
+ error = -ENOMEM;
|
|
|
+ goto out1;
|
|
|
}
|
|
|
|
|
|
error = register_blkdev(zvol_major, ZVOL_DRIVER);
|
|
|
if (error) {
|
|
|
printk(KERN_INFO "ZFS: register_blkdev() failed %d\n", error);
|
|
|
- taskq_destroy(zvol_taskq);
|
|
|
- return (error);
|
|
|
+ goto out2;
|
|
|
}
|
|
|
|
|
|
blk_register_region(MKDEV(zvol_major, 0), 1UL << MINORBITS,
|
|
|
THIS_MODULE, zvol_probe, NULL, NULL);
|
|
|
|
|
|
- mutex_init(&zvol_state_lock, NULL, MUTEX_DEFAULT, NULL);
|
|
|
- list_create(&zvol_state_list, sizeof (zvol_state_t),
|
|
|
- offsetof(zvol_state_t, zv_next));
|
|
|
+ return (0);
|
|
|
|
|
|
- (void) zvol_create_minors(NULL);
|
|
|
+out2:
|
|
|
+ taskq_destroy(zvol_taskq);
|
|
|
+out1:
|
|
|
+ mutex_destroy(&zvol_state_lock);
|
|
|
+ list_destroy(&zvol_state_list);
|
|
|
|
|
|
- return (0);
|
|
|
+ return (error);
|
|
|
}
|
|
|
|
|
|
void
|
|
|
diff --git a/rpm/fedora/zfs-kmod.spec.in b/rpm/fedora/zfs-kmod.spec.in
|
|
|
index 6a8e570..88ab3b2 100644
|
|
|
--- a/rpm/fedora/zfs-kmod.spec.in
|
|
|
+++ b/rpm/fedora/zfs-kmod.spec.in
|
|
|
@@ -44,7 +44,7 @@ BuildRequires: spl-devel-kmod = %{version}-%{release}
|
|
|
# Kmodtool does its magic here. A patched version of kmodtool is shipped
|
|
|
# with the source rpm until kmod development packages are supported upstream.
|
|
|
# https://bugzilla.rpmfusion.org/show_bug.cgi?id=2714
|
|
|
-%{expand:%(sh %{SOURCE10} --target %{_target_cpu} --repo %{repo} --kmodname %{name} --devel %{?prefix:--prefix "%{?prefix}"} %{?buildforkernels:--%{buildforkernels}} %{?kernels:--for-kernels "%{?kernels}"} 2>/dev/null) }
|
|
|
+%{expand:%(sh %{SOURCE10} --target %{_target_cpu} --repo %{repo} --kmodname %{name} --devel %{?prefix:--prefix "%{?prefix}"} %{?buildforkernels:--%{buildforkernels}} %{?kernels:--for-kernels "%{?kernels}"} %{?kernelbuildroot:--buildroot "%{?kernelbuildroot}"} 2>/dev/null) }
|
|
|
|
|
|
|
|
|
%description
|
|
|
@@ -55,7 +55,7 @@ This package contains the ZFS kernel modules.
|
|
|
%{?kmodtool_check}
|
|
|
|
|
|
# Print kmodtool output for debugging purposes:
|
|
|
-sh %{SOURCE10} --target %{_target_cpu} --repo %{repo} --kmodname %{name} --devel %{?buildforkernels:--%{buildforkernels}} %{?kernels:--for-kernels "%{?kernels}"} 2>/dev/null
|
|
|
+sh %{SOURCE10} --target %{_target_cpu} --repo %{repo} --kmodname %{name} --devel %{?buildforkernels:--%{buildforkernels}} %{?kernels:--for-kernels "%{?kernels}"} %{?kernelbuildroot:--buildroot "%{?kernelbuildroot}"} 2>/dev/null
|
|
|
|
|
|
%if %{with debug}
|
|
|
%define debug --enable-debug
|
|
|
@@ -69,6 +69,28 @@ sh %{SOURCE10} --target %{_target_cpu} --repo %{repo} --kmodname %{name} --dev
|
|
|
%define debug_dmu_tx --disable-debug-dmu-tx
|
|
|
%endif
|
|
|
|
|
|
+#
|
|
|
+# Allow the overriding of spl locations
|
|
|
+#
|
|
|
+%if %{defined require_splver}
|
|
|
+%define splver %{require_splver}
|
|
|
+%else
|
|
|
+%define splver %{version}
|
|
|
+%endif
|
|
|
+
|
|
|
+%if %{defined require_spldir}
|
|
|
+%define spldir %{require_spldir}
|
|
|
+%else
|
|
|
+%define spldir %{_usrsrc}/spl-%{splver}
|
|
|
+%endif
|
|
|
+
|
|
|
+%if %{defined require_splobj}
|
|
|
+%define splobj %{require_splobj}
|
|
|
+%else
|
|
|
+%define splobj %{spldir}/${kernel_version%%___*}
|
|
|
+%endif
|
|
|
+
|
|
|
+
|
|
|
# Leverage VPATH from configure to avoid making multiple copies.
|
|
|
%define _configure ../%{module}-%{version}/configure
|
|
|
|
|
|
@@ -85,8 +107,8 @@ for kernel_version in %{?kernel_versions}; do
|
|
|
--with-config=kernel \
|
|
|
--with-linux="${kernel_version##*___}" \
|
|
|
--with-linux-obj="${kernel_version##*___}" \
|
|
|
- --with-spl="/usr/src/spl-%{version}" \
|
|
|
- --with-spl-obj="/usr/src/spl-%{version}/${kernel_version%%___*}" \
|
|
|
+ --with-spl="%{spldir}" \
|
|
|
+ --with-spl-obj="%{splobj}" \
|
|
|
%{debug} \
|
|
|
%{debug_dmu_tx}
|
|
|
make %{?_smp_mflags}
|
|
|
diff --git a/rpm/generic/zfs-dkms.spec.in b/rpm/generic/zfs-dkms.spec.in
|
|
|
index 7d871cb..c758baa 100644
|
|
|
--- a/rpm/generic/zfs-dkms.spec.in
|
|
|
+++ b/rpm/generic/zfs-dkms.spec.in
|
|
|
@@ -14,8 +14,14 @@ Source0: %{module}-%{version}.tar.gz
|
|
|
BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)
|
|
|
BuildArch: noarch
|
|
|
|
|
|
+%if 0%{?dkms_version:1}
|
|
|
+Requires: dkms = %{dkms_version}
|
|
|
+%else
|
|
|
Requires: dkms >= 2.2.0.2
|
|
|
+%endif
|
|
|
Requires: spl-dkms = %{version}
|
|
|
+Requires: gcc, make, perl
|
|
|
+Requires: kernel-devel
|
|
|
Provides: %{module}-kmod = %{version}
|
|
|
|
|
|
%description
|
|
|
diff --git a/rpm/generic/zfs-kmod.spec.in b/rpm/generic/zfs-kmod.spec.in
|
|
|
index 6f478f3..cec3f3f 100644
|
|
|
--- a/rpm/generic/zfs-kmod.spec.in
|
|
|
+++ b/rpm/generic/zfs-kmod.spec.in
|
|
|
@@ -15,7 +15,7 @@ License: @ZFS_META_LICENSE@
|
|
|
URL: http://zfsonlinux.org/
|
|
|
Source0: %{module}-%{version}.tar.gz
|
|
|
Source10: kmodtool
|
|
|
-BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)
|
|
|
+BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id} -u -n)
|
|
|
|
|
|
# The developments headers will conflict with the dkms packages.
|
|
|
Conflicts: %{module}-dkms
|
|
|
@@ -26,7 +26,7 @@ Conflicts: %{module}-dkms
|
|
|
%{?suse_version:BuildRequires: kernel-source}
|
|
|
|
|
|
%if 0%{?rhel}%{?fedora}%{?suse_version}
|
|
|
-BuildRequires: spl-devel-kmod = %{version}-%{release}
|
|
|
+BuildRequires: spl-devel-kmod = %{version}
|
|
|
%global KmodsBuildRequires spl-devel-kmod
|
|
|
%global KmodsRequires kmod-spl
|
|
|
%endif
|
|
|
@@ -47,7 +47,7 @@ BuildRequires: spl-devel-kmod = %{version}-%{release}
|
|
|
# Kmodtool does its magic here. A patched version of kmodtool is shipped
|
|
|
# with the source rpm until kmod development packages are supported upstream.
|
|
|
# https://bugzilla.rpmfusion.org/show_bug.cgi?id=2714
|
|
|
-%{expand:%(bash %{SOURCE10} --target %{_target_cpu} --kmodname %{name} --devel %{?prefix:--prefix "%{?prefix}"} %{?kernels:--for-kernels "%{?kernels}"} 2>/dev/null) }
|
|
|
+%{expand:%(bash %{SOURCE10} --target %{_target_cpu} --kmodname %{name} --devel %{?prefix:--prefix "%{?prefix}"} %{?kernels:--for-kernels "%{?kernels}"} %{?kernelbuildroot:--buildroot "%{?kernelbuildroot}"} 2>/dev/null) }
|
|
|
|
|
|
|
|
|
%description
|
|
|
@@ -58,7 +58,7 @@ This package contains the ZFS kernel modules.
|
|
|
%{?kmodtool_check}
|
|
|
|
|
|
# Print kmodtool output for debugging purposes:
|
|
|
-bash %{SOURCE10} --target %{_target_cpu} --repo %{repo} --kmodname %{name} --devel %{?prefix:--prefix "%{?prefix}"} %{?kernels:--for-kernels "%{?kernels}"} 2>/dev/null
|
|
|
+bash %{SOURCE10} --target %{_target_cpu} --repo %{repo} --kmodname %{name} --devel %{?prefix:--prefix "%{?prefix}"} %{?kernels:--for-kernels "%{?kernels}"} %{?kernelbuildroot:--buildroot "%{?kernelbuildroot}"} 2>/dev/null
|
|
|
|
|
|
%if %{with debug}
|
|
|
%define debug --enable-debug
|
|
|
@@ -72,6 +72,28 @@ bash %{SOURCE10} --target %{_target_cpu} --repo %{repo} --kmodname %{name} --d
|
|
|
%define debug_dmu_tx --disable-debug-dmu-tx
|
|
|
%endif
|
|
|
|
|
|
+#
|
|
|
+# Allow the overriding of spl locations
|
|
|
+#
|
|
|
+%if %{defined require_splver}
|
|
|
+%define splver %{require_splver}
|
|
|
+%else
|
|
|
+%define splver %{version}
|
|
|
+%endif
|
|
|
+
|
|
|
+%if %{defined require_spldir}
|
|
|
+%define spldir %{require_spldir}
|
|
|
+%else
|
|
|
+%define spldir %{_usrsrc}/spl-%{splver}
|
|
|
+%endif
|
|
|
+
|
|
|
+%if %{defined require_splobj}
|
|
|
+%define splobj %{require_splobj}
|
|
|
+%else
|
|
|
+%define splobj %{spldir}/${kernel_version%%___*}
|
|
|
+%endif
|
|
|
+
|
|
|
+
|
|
|
# Leverage VPATH from configure to avoid making multiple copies.
|
|
|
%define _configure ../%{module}-%{version}/configure
|
|
|
|
|
|
@@ -90,16 +112,16 @@ for kernel_version in %{?kernel_versions}; do
|
|
|
--with-linux="${kernel_version##*___}" \
|
|
|
--with-linux-obj="${kernel_version##*___}" \
|
|
|
%else
|
|
|
- --with-linux=\
|
|
|
- %(if [ -e /lib/modules/${kernel_version%%___*}/source ]; then \
|
|
|
- echo "/lib/modules/${kernel_version%%___*}/source" \
|
|
|
+ --with-linux="$( \
|
|
|
+ if [ -e "/lib/modules/${kernel_version%%___*}/source" ]; then \
|
|
|
+ echo "/lib/modules/${kernel_version%%___*}/source"; \
|
|
|
else \
|
|
|
- echo "/lib/modules/${kernel_version%%___*}/build" \
|
|
|
- fi) \
|
|
|
+ echo "/lib/modules/${kernel_version%%___*}/build"; \
|
|
|
+ fi)" \
|
|
|
--with-linux-obj="/lib/modules/${kernel_version%%___*}/build" \
|
|
|
%endif
|
|
|
- --with-spl="/usr/src/spl-%{version}" \
|
|
|
- --with-spl-obj="/usr/src/spl-%{version}/${kernel_version%%___*}" \
|
|
|
+ --with-spl="%{spldir}" \
|
|
|
+ --with-spl-obj="%{splobj}" \
|
|
|
%{debug} \
|
|
|
%{debug_dmu_tx}
|
|
|
make %{?_smp_mflags}
|
|
|
diff --git a/rpm/generic/zfs.spec.in b/rpm/generic/zfs.spec.in
|
|
|
index 7ee4ca0..c832404 100644
|
|
|
--- a/rpm/generic/zfs.spec.in
|
|
|
+++ b/rpm/generic/zfs.spec.in
|
|
|
@@ -29,8 +29,6 @@ ExclusiveArch: i386 i686 x86_64
|
|
|
ExcludeArch: ppc ppc64
|
|
|
|
|
|
Requires: spl = %{version}
|
|
|
-Requires: zfs-dracut = %{version}
|
|
|
-Requires: zfs-test = %{version}
|
|
|
Requires: %{name}-kmod >= %{version}
|
|
|
Provides: %{name}-kmod-common = %{version}
|
|
|
|
|
|
@@ -115,7 +113,16 @@ make %{?_smp_mflags}
|
|
|
make install DESTDIR=%{?buildroot}
|
|
|
find %{?buildroot}%{_libdir} -name '*.la' -exec rm -f {} \;
|
|
|
|
|
|
-%post -p /sbin/ldconfig
|
|
|
+%post
|
|
|
+/sbin/ldconfig
|
|
|
+[ -x /sbin/chkconfig ] && /sbin/chkconfig --add zfs
|
|
|
+exit 0
|
|
|
+
|
|
|
+%preun
|
|
|
+if [ $1 -eq 0 ] ; then
|
|
|
+ [ -x /sbin/chkconfig ] && /sbin/chkconfig --del zfs
|
|
|
+fi
|
|
|
+exit 0
|
|
|
|
|
|
%postun -p /sbin/ldconfig
|
|
|
|
|
|
@@ -123,6 +130,7 @@ find %{?buildroot}%{_libdir} -name '*.la' -exec rm -f {} \;
|
|
|
%doc AUTHORS COPYRIGHT DISCLAIMER
|
|
|
%doc OPENSOLARIS.LICENSE README.markdown
|
|
|
%{_sbindir}/*
|
|
|
+%{_bindir}/*
|
|
|
%{_libdir}/*.so.1*
|
|
|
%{_mandir}/man1/*
|
|
|
%{_mandir}/man5/*
|
|
|
diff --git a/scripts/kmodtool b/scripts/kmodtool
|
|
|
index 2fe014c..6b73780 100755
|
|
|
--- a/scripts/kmodtool
|
|
|
+++ b/scripts/kmodtool
|
|
|
@@ -37,6 +37,7 @@ kernel_versions_to_build_for=
|
|
|
prefix=
|
|
|
filterfile=
|
|
|
target=
|
|
|
+buildroot=
|
|
|
|
|
|
error_out()
|
|
|
{
|
|
|
@@ -305,9 +306,9 @@ print_customrpmtemplate ()
|
|
|
{
|
|
|
for kernel in ${1}
|
|
|
do
|
|
|
- if [[ -e "/usr/src/kernels/${kernel}" ]] ; then
|
|
|
+ if [[ -e "${buildroot}/usr/src/kernels/${kernel}" ]] ; then
|
|
|
# this looks like a Fedora/RH kernel -- print a normal template (which includes the proper BR) and be happy :)
|
|
|
- kernel_versions="${kernel_versions}${kernel}___%{_usrsrc}/kernels/${kernel} "
|
|
|
+ kernel_versions="${kernel_versions}${kernel}___${buildroot}%{_usrsrc}/kernels/${kernel} "
|
|
|
|
|
|
# parse kernel versions string and print template
|
|
|
local kernel_verrelarch=${kernel%%${kernels_known_variants}}
|
|
|
@@ -382,7 +383,6 @@ myprog_help ()
|
|
|
echo "Usage: $(basename ${0}) [OPTIONS]"
|
|
|
echo $'\n'"Creates a template to be used during kmod building"
|
|
|
echo $'\n'"Available options:"
|
|
|
- # FIXME echo " --datadir <dir> -- look for our shared files in <dir>"
|
|
|
echo " --filterfile <file> -- filter the results with grep --file <file>"
|
|
|
echo " --for-kernels <list> -- created templates only for these kernels"
|
|
|
echo " --kmodname <file> -- name of the kmod (required)"
|
|
|
@@ -390,6 +390,7 @@ myprog_help ()
|
|
|
echo " --noakmod -- no akmod package"
|
|
|
echo " --repo <name> -- use buildsys-build-<name>-kerneldevpkgs"
|
|
|
echo " --target <arch> -- target-arch (required)"
|
|
|
+ echo " --buildroot <dir> -- Build root (place to look for build files)"
|
|
|
}
|
|
|
|
|
|
while [ "${1}" ] ; do
|
|
|
@@ -478,6 +479,11 @@ while [ "${1}" ] ; do
|
|
|
shift
|
|
|
build_kernels="current"
|
|
|
;;
|
|
|
+ --buildroot)
|
|
|
+ shift
|
|
|
+ buildroot="${1}"
|
|
|
+ shift
|
|
|
+ ;;
|
|
|
--help)
|
|
|
myprog_help
|
|
|
exit 0
|
|
|
diff --git a/scripts/zconfig.sh b/scripts/zconfig.sh
|
|
|
index 141348c..281166c 100755
|
|
|
--- a/scripts/zconfig.sh
|
|
|
+++ b/scripts/zconfig.sh
|
|
|
@@ -264,8 +264,9 @@ test_4() {
|
|
|
zconfig_zvol_device_stat 0 ${POOL_NAME} ${FULL_ZVOL_NAME} \
|
|
|
${FULL_SNAP_NAME} ${FULL_CLONE_NAME} || fail 9
|
|
|
|
|
|
- # Load the modules, wait 1 second for udev
|
|
|
+ # Load the modules, list the pools to ensure they are opened
|
|
|
${ZFS_SH} zfs="spa_config_path=${TMP_CACHE}" || fail 10
|
|
|
+ ${ZPOOL} list &>/dev/null
|
|
|
|
|
|
# Verify the devices were created
|
|
|
zconfig_zvol_device_stat 10 ${POOL_NAME} ${FULL_ZVOL_NAME} \
|
|
|
diff --git a/scripts/zpios.sh b/scripts/zpios.sh
|
|
|
index a5599a6..0fcb88d 100755
|
|
|
--- a/scripts/zpios.sh
|
|
|
+++ b/scripts/zpios.sh
|
|
|
@@ -229,7 +229,7 @@ fi
|
|
|
check_test || die "${ERROR}"
|
|
|
. ${ZPIOS_TEST}
|
|
|
|
|
|
-# Pull in the zpios test module is not loaded. If this fails it is
|
|
|
+# Pull in the zpios test module if not loaded. If this fails, it is
|
|
|
# likely because the full module stack was not yet loaded with zfs.sh
|
|
|
if check_modules; then
|
|
|
if ! load_modules; then
|
|
|
diff --git a/udev/rules.d/90-zfs.rules.in b/udev/rules.d/90-zfs.rules.in
|
|
|
index 52e1d63..a2715d2 100644
|
|
|
--- a/udev/rules.d/90-zfs.rules.in
|
|
|
+++ b/udev/rules.d/90-zfs.rules.in
|
|
|
@@ -1,4 +1,4 @@
|
|
|
-SUBSYSTEM!="block", GOTO="zfs_end"
|
|
|
+SUBSYSTEM!="block|misc", GOTO="zfs_end"
|
|
|
ACTION!="add|change", GOTO="zfs_end"
|
|
|
|
|
|
ENV{ID_FS_TYPE}=="zfs", RUN+="/sbin/modprobe zfs"
|
|
|
@@ -7,4 +7,6 @@ ENV{ID_FS_TYPE}=="zfs_member", RUN+="/sbin/modprobe zfs"
|
|
|
KERNEL=="null", SYMLINK+="root"
|
|
|
SYMLINK=="null", SYMLINK+="root"
|
|
|
|
|
|
+SUBSYSTEM=="misc", KERNEL=="zfs", RUN+="@sbindir@/zpool list"
|
|
|
+
|
|
|
LABEL="zfs_end"
|
|
|
|