summaryrefslogtreecommitdiff
blob: 258dec06bc45e8a57e6085bd11b7b4b9fc5b99e0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
--- ./Documentation/DocBook/deviceiobook.tmpl.mmiowb	2005-10-20 19:13:17.000000000 +0400
+++ ./Documentation/DocBook/deviceiobook.tmpl	2005-10-24 15:14:33.026178680 +0400
@@ -147,8 +147,7 @@
 	compiler is not permitted to reorder the I/O sequence. When the 
 	ordering can be compiler optimised, you can use <function>
 	__readb</function> and friends to indicate the relaxed ordering. Use 
-	this with care. The <function>rmb</function> provides a read memory 
-	barrier. The <function>wmb</function> provides a write memory barrier.
+	this with care.
       </para>
 
       <para>
@@ -159,8 +158,70 @@
 	asynchronously. A driver author must issue a read from the same
 	device to ensure that writes have occurred in the specific cases the
 	author cares. This kind of property cannot be hidden from driver
-	writers in the API.
-      </para>
+	writers in the API.  In some cases, the read used to flush the device
+	may be expected to fail (if the card is resetting, for example).  In
+	that case, the read should be done from config space, which is
+	guaranteed to soft-fail if the card doesn't respond.
+      </para>
+
+      <para>
+	The following is an example of flushing a write to a device when
+	the driver would like to ensure the write's effects are visible prior
+	to continuing execution.
+      </para>
+
+<programlisting>
+static inline void
+qla1280_disable_intrs(struct scsi_qla_host *ha)
+{
+	struct device_reg *reg;
+
+	reg = ha->iobase;
+	/* disable risc and host interrupts */
+	WRT_REG_WORD(&amp;reg->ictrl, 0);
+	/*
+	 * The following read will ensure that the above write
+	 * has been received by the device before we return from this
+	 * function.
+	 */
+	RD_REG_WORD(&amp;reg->ictrl);
+	ha->flags.ints_enabled = 0;
+}
+</programlisting>
+
+      <para>
+	In addition to write posting, on some large multiprocessing systems
+	(e.g. SGI Challenge, Origin and Altix machines) posted writes won't
+	be strongly ordered coming from different CPUs.  Thus it's important
+	to properly protect parts of your driver that do memory-mapped writes
+	with locks and use the <function>mmiowb</function> to make sure they
+	arrive in the order intended.
+      </para>
+
+      <para>
+	Generally, one should use <function>mmiowb</function> prior to
+	releasing a spinlock that protects regions using <function>writeb
+	</function> or similar functions that aren't surrounded by <function>
+	readb</function> calls, which will ensure ordering and flushing.  The
+	following example (again from qla1280.c) illustrates its use.
+      </para>
+
+<programlisting>
+       sp->flags |= SRB_SENT;
+       ha->actthreads++;
+       WRT_REG_WORD(&amp;reg->mailbox4, ha->req_ring_index);
+
+       /*
+        * A Memory Mapped I/O Write Barrier is needed to ensure that this write
+        * of the request queue in register is ordered ahead of writes issued
+        * after this one by other CPUs.  Access to the register is protected
+        * by the host_lock.  Without the mmiowb, however, it is possible for
+        * this CPU to release the host lock, another CPU acquire the host lock,
+        * and write to the request queue in, and have the second write make it
+        * to the chip first.
+        */
+       mmiowb(); /* posted write ordering */
+</programlisting>
 
       <para>
 	PCI ordering rules also guarantee that PIO read responses arrive
@@ -171,7 +232,9 @@
 	<function>readb</function> call has no relation to any previous DMA
 	writes performed by the device.  The driver can use
 	<function>readb_relaxed</function> for these cases, although only
-	some platforms will honor the relaxed semantics.
+	some platforms will honor the relaxed semantics.  Using the relaxed
+	read functions will provide significant performance benefits on
+	platforms that support it.
       </para>
     </sect1>
 
--- ./include/asm-x86_64/io.h.mmiowb	2005-10-20 19:13:17.000000000 +0400
+++ ./include/asm-x86_64/io.h	2005-10-24 15:15:21.200855016 +0400
@@ -186,6 +186,8 @@ extern void iounmap(void *addr);
 #define __raw_readl readl
 #define __raw_readq readq
 
+#define mmiowb()
+
 #define writeb(b,addr) (*(volatile unsigned char *) (addr) = (b))
 #define writew(b,addr) (*(volatile unsigned short *) (addr) = (b))
 #define writel(b,addr) (*(volatile unsigned int *) (addr) = (b))
--- ./include/asm-i386/io.h.mmiowb	2005-10-20 19:13:17.000000000 +0400
+++ ./include/asm-i386/io.h	2005-10-24 15:14:33.026178680 +0400
@@ -156,6 +156,8 @@ static inline void writel(unsigned int b
 #define __raw_writew writew
 #define __raw_writel writel
 
+#define mmiowb()
+
 #define memset_io(a,b,c)	memset((void *)(a),(b),(c))
 #define memcpy_fromio(a,b,c)	__memcpy((a),(void *)(b),(c))
 #define memcpy_toio(a,b,c)	__memcpy((void *)(a),(b),(c))
--- ./include/asm-ia64/machvec.h.mmiowb	2005-10-20 19:13:17.000000000 +0400
+++ ./include/asm-ia64/machvec.h	2005-10-24 15:14:55.417774640 +0400
@@ -62,6 +62,7 @@ typedef unsigned int ia64_mv_inl_t (unsi
 typedef void ia64_mv_outb_t (unsigned char, unsigned long);
 typedef void ia64_mv_outw_t (unsigned short, unsigned long);
 typedef void ia64_mv_outl_t (unsigned int, unsigned long);
+typedef void ia64_mv_mmiowb_t (void);
 typedef unsigned char ia64_mv_readb_t (void *);
 typedef unsigned short ia64_mv_readw_t (void *);
 typedef unsigned int ia64_mv_readl_t (void *);
@@ -130,6 +131,7 @@ extern void machvec_tlb_migrate_finish (
 #  define platform_outb		ia64_mv.outb
 #  define platform_outw		ia64_mv.outw
 #  define platform_outl		ia64_mv.outl
+#  define platform_mmiowb	ia64_mv.mmiowb
 #  define platform_readb        ia64_mv.readb
 #  define platform_readw        ia64_mv.readw
 #  define platform_readl        ia64_mv.readl
@@ -176,6 +178,7 @@ struct ia64_machine_vector {
 	ia64_mv_outb_t *outb;
 	ia64_mv_outw_t *outw;
 	ia64_mv_outl_t *outl;
+	ia64_mv_mmiowb_t *mmiowb;
 	ia64_mv_readb_t *readb;
 	ia64_mv_readw_t *readw;
 	ia64_mv_readl_t *readl;
@@ -218,6 +221,7 @@ struct ia64_machine_vector {
 	platform_outb,				\
 	platform_outw,				\
 	platform_outl,				\
+	platform_mmiowb,			\
 	platform_readb,				\
 	platform_readw,				\
 	platform_readl,				\
@@ -344,6 +348,9 @@ extern ia64_mv_dma_supported		swiotlb_dm
 #ifndef platform_outl
 # define platform_outl		__ia64_outl
 #endif
+#ifndef platform_mmiowb
+# define platform_mmiowb	__ia64_mmiowb
+#endif
 #ifndef platform_readb
 # define platform_readb		__ia64_readb
 #endif
--- ./include/asm-ia64/io.h.mmiowb	2005-10-20 19:13:17.000000000 +0400
+++ ./include/asm-ia64/io.h	2005-10-24 15:14:33.042176248 +0400
@@ -91,6 +91,20 @@ extern int valid_phys_addr_range (unsign
  */
 #define __ia64_mf_a()	ia64_mfa()
 
+/**
+ * __ia64_mmiowb - I/O write barrier
+ *
+ * Ensure ordering of I/O space writes.  This will make sure that writes
+ * following the barrier will arrive after all previous writes.  For most
+ * ia64 platforms, this is a simple 'mf.a' instruction.
+ *
+ * See Documentation/DocBook/deviceiobook.tmpl for more information.
+ */
+static inline void __ia64_mmiowb(void)
+{
+	ia64_mfa();
+}
+
 static inline const unsigned long
 __ia64_get_io_port_base (void)
 {
@@ -267,6 +281,7 @@ __outsl (unsigned long port, void *src, 
 #define __outb		platform_outb
 #define __outw		platform_outw
 #define __outl		platform_outl
+#define __mmiowb	platform_mmiowb
 
 #define inb(p)		__inb(p)
 #define inw(p)		__inw(p)
@@ -280,6 +295,7 @@ __outsl (unsigned long port, void *src, 
 #define outsb(p,s,c)	__outsb(p,s,c)
 #define outsw(p,s,c)	__outsw(p,s,c)
 #define outsl(p,s,c)	__outsl(p,s,c)
+#define mmiowb()	__mmiowb()
 
 /*
  * The address passed to these functions are ioremap()ped already.
--- ./include/asm-ia64/machvec_sn2.h.mmiowb	2005-10-20 19:13:17.000000000 +0400
+++ ./include/asm-ia64/machvec_sn2.h	2005-10-24 15:16:15.025672400 +0400
@@ -92,6 +92,9 @@ extern ia64_mv_dma_supported		sn_dma_sup
 #define platform_outb			__sn_outb
 #define platform_outw			__sn_outw
 #define platform_outl			__sn_outl
+#ifdef CONFIG_IA64_SGI_SN2
+#error "MMIOWB is broken on this arch!!!"
+#endif
 #define platform_readb			__sn_readb
 #define platform_readw			__sn_readw
 #define platform_readl			__sn_readl
--- ./include/asm-ia64/machvec_init.h.mmiowb	2005-10-20 19:13:17.000000000 +0400
+++ ./include/asm-ia64/machvec_init.h	2005-10-24 15:14:33.045175792 +0400
@@ -12,6 +12,7 @@ extern ia64_mv_inl_t __ia64_inl;
 extern ia64_mv_outb_t __ia64_outb;
 extern ia64_mv_outw_t __ia64_outw;
 extern ia64_mv_outl_t __ia64_outl;
+extern ia64_mv_mmiowb_t __ia64_mmiowb;
 extern ia64_mv_readb_t __ia64_readb;
 extern ia64_mv_readw_t __ia64_readw;
 extern ia64_mv_readl_t __ia64_readl;