Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
423 | giacomo | 1 | /* |
2 | md_k.h : kernel internal structure of the Linux MD driver |
||
3 | Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman |
||
4 | |||
5 | This program is free software; you can redistribute it and/or modify |
||
6 | it under the terms of the GNU General Public License as published by |
||
7 | the Free Software Foundation; either version 2, or (at your option) |
||
8 | any later version. |
||
9 | |||
10 | You should have received a copy of the GNU General Public License |
||
11 | (for example /usr/src/linux/COPYING); if not, write to the Free |
||
12 | Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
||
13 | */ |
||
14 | |||
15 | #ifndef _MD_K_H |
||
16 | #define _MD_K_H |
||
17 | |||
18 | #define MD_RESERVED 0UL |
||
19 | #define LINEAR 1UL |
||
20 | #define RAID0 2UL |
||
21 | #define RAID1 3UL |
||
22 | #define RAID5 4UL |
||
23 | #define TRANSLUCENT 5UL |
||
24 | #define HSM 6UL |
||
25 | #define MULTIPATH 7UL |
||
26 | #define MAX_PERSONALITY 8UL |
||
27 | |||
28 | #define LEVEL_MULTIPATH (-4) |
||
29 | #define LEVEL_LINEAR (-1) |
||
30 | |||
31 | #define MaxSector (~(sector_t)0) |
||
32 | #define MD_THREAD_NAME_MAX 14 |
||
33 | |||
34 | static inline int pers_to_level (int pers) |
||
35 | { |
||
36 | switch (pers) { |
||
37 | case MULTIPATH: return LEVEL_MULTIPATH; |
||
38 | case HSM: return -3; |
||
39 | case TRANSLUCENT: return -2; |
||
40 | case LINEAR: return LEVEL_LINEAR; |
||
41 | case RAID0: return 0; |
||
42 | case RAID1: return 1; |
||
43 | case RAID5: return 5; |
||
44 | } |
||
45 | BUG(); |
||
46 | return MD_RESERVED; |
||
47 | } |
||
48 | |||
49 | static inline int level_to_pers (int level) |
||
50 | { |
||
51 | switch (level) { |
||
52 | case LEVEL_MULTIPATH: return MULTIPATH; |
||
53 | case -3: return HSM; |
||
54 | case -2: return TRANSLUCENT; |
||
55 | case LEVEL_LINEAR: return LINEAR; |
||
56 | case 0: return RAID0; |
||
57 | case 1: return RAID1; |
||
58 | case 4: |
||
59 | case 5: return RAID5; |
||
60 | } |
||
61 | return MD_RESERVED; |
||
62 | } |
||
63 | |||
64 | typedef struct mddev_s mddev_t; |
||
65 | typedef struct mdk_rdev_s mdk_rdev_t; |
||
66 | |||
67 | #define MAX_MD_DEVS 256 /* Max number of md dev */ |
||
68 | |||
69 | /* |
||
70 | * options passed in raidrun: |
||
71 | */ |
||
72 | |||
73 | #define MAX_CHUNK_SIZE (4096*1024) |
||
74 | |||
75 | /* |
||
76 | * default readahead |
||
77 | */ |
||
78 | |||
79 | static inline int disk_faulty(mdp_disk_t * d) |
||
80 | { |
||
81 | return d->state & (1 << MD_DISK_FAULTY); |
||
82 | } |
||
83 | |||
84 | static inline int disk_active(mdp_disk_t * d) |
||
85 | { |
||
86 | return d->state & (1 << MD_DISK_ACTIVE); |
||
87 | } |
||
88 | |||
89 | static inline int disk_sync(mdp_disk_t * d) |
||
90 | { |
||
91 | return d->state & (1 << MD_DISK_SYNC); |
||
92 | } |
||
93 | |||
94 | static inline int disk_spare(mdp_disk_t * d) |
||
95 | { |
||
96 | return !disk_sync(d) && !disk_active(d) && !disk_faulty(d); |
||
97 | } |
||
98 | |||
99 | static inline int disk_removed(mdp_disk_t * d) |
||
100 | { |
||
101 | return d->state & (1 << MD_DISK_REMOVED); |
||
102 | } |
||
103 | |||
104 | static inline void mark_disk_faulty(mdp_disk_t * d) |
||
105 | { |
||
106 | d->state |= (1 << MD_DISK_FAULTY); |
||
107 | } |
||
108 | |||
109 | static inline void mark_disk_active(mdp_disk_t * d) |
||
110 | { |
||
111 | d->state |= (1 << MD_DISK_ACTIVE); |
||
112 | } |
||
113 | |||
114 | static inline void mark_disk_sync(mdp_disk_t * d) |
||
115 | { |
||
116 | d->state |= (1 << MD_DISK_SYNC); |
||
117 | } |
||
118 | |||
119 | static inline void mark_disk_spare(mdp_disk_t * d) |
||
120 | { |
||
121 | d->state = 0; |
||
122 | } |
||
123 | |||
124 | static inline void mark_disk_removed(mdp_disk_t * d) |
||
125 | { |
||
126 | d->state = (1 << MD_DISK_FAULTY) | (1 << MD_DISK_REMOVED); |
||
127 | } |
||
128 | |||
129 | static inline void mark_disk_inactive(mdp_disk_t * d) |
||
130 | { |
||
131 | d->state &= ~(1 << MD_DISK_ACTIVE); |
||
132 | } |
||
133 | |||
134 | static inline void mark_disk_nonsync(mdp_disk_t * d) |
||
135 | { |
||
136 | d->state &= ~(1 << MD_DISK_SYNC); |
||
137 | } |
||
138 | |||
139 | /* |
||
140 | * MD's 'extended' device |
||
141 | */ |
||
142 | struct mdk_rdev_s |
||
143 | { |
||
144 | struct list_head same_set; /* RAID devices within the same set */ |
||
145 | |||
146 | sector_t size; /* Device size (in blocks) */ |
||
147 | mddev_t *mddev; /* RAID array if running */ |
||
148 | unsigned long last_events; /* IO event timestamp */ |
||
149 | |||
150 | struct block_device *bdev; /* block device handle */ |
||
151 | |||
152 | struct page *sb_page; |
||
153 | int sb_loaded; |
||
154 | sector_t data_offset; /* start of data in array */ |
||
155 | sector_t sb_offset; |
||
156 | int preferred_minor; /* autorun support */ |
||
157 | |||
158 | /* A device can be in one of three states based on two flags: |
||
159 | * Not working: faulty==1 in_sync==0 |
||
160 | * Fully working: faulty==0 in_sync==1 |
||
161 | * Working, but not |
||
162 | * in sync with array |
||
163 | * faulty==0 in_sync==0 |
||
164 | * |
||
165 | * It can never have faulty==1, in_sync==1 |
||
166 | * This reduces the burden of testing multiple flags in many cases |
||
167 | */ |
||
168 | int faulty; /* if faulty do not issue IO requests */ |
||
169 | int in_sync; /* device is a full member of the array */ |
||
170 | |||
171 | int desc_nr; /* descriptor index in the superblock */ |
||
172 | int raid_disk; /* role of device in array */ |
||
173 | |||
174 | atomic_t nr_pending; /* number of pending requests. |
||
175 | * only maintained for arrays that |
||
176 | * support hot removal |
||
177 | */ |
||
178 | }; |
||
179 | |||
180 | typedef struct mdk_personality_s mdk_personality_t; |
||
181 | |||
182 | struct mddev_s |
||
183 | { |
||
184 | void *private; |
||
185 | mdk_personality_t *pers; |
||
186 | int __minor; |
||
187 | struct list_head disks; |
||
188 | int sb_dirty; |
||
189 | int ro; |
||
190 | |||
191 | /* Superblock information */ |
||
192 | int major_version, |
||
193 | minor_version, |
||
194 | patch_version; |
||
195 | int persistent; |
||
196 | int chunk_size; |
||
197 | time_t ctime, utime; |
||
198 | int level, layout; |
||
199 | int raid_disks; |
||
200 | int max_disks; |
||
201 | sector_t size; /* used size of component devices */ |
||
202 | sector_t array_size; /* exported array size */ |
||
203 | __u64 events; |
||
204 | |||
205 | char uuid[16]; |
||
206 | |||
207 | struct mdk_thread_s *thread; /* management thread */ |
||
208 | struct mdk_thread_s *sync_thread; /* doing resync or reconstruct */ |
||
209 | unsigned long curr_resync; /* blocks scheduled */ |
||
210 | unsigned long resync_mark; /* a recent timestamp */ |
||
211 | unsigned long resync_mark_cnt;/* blocks written at resync_mark */ |
||
212 | |||
213 | /* recovery/resync flags |
||
214 | * NEEDED: we might need to start a resync/recover |
||
215 | * RUNNING: a thread is running, or about to be started |
||
216 | * SYNC: actually doing a resync, not a recovery |
||
217 | * ERR: and IO error was detected - abort the resync/recovery |
||
218 | * INTR: someone requested a (clean) early abort. |
||
219 | * DONE: thread is done and is waiting to be reaped |
||
220 | */ |
||
221 | #define MD_RECOVERY_RUNNING 0 |
||
222 | #define MD_RECOVERY_SYNC 1 |
||
223 | #define MD_RECOVERY_ERR 2 |
||
224 | #define MD_RECOVERY_INTR 3 |
||
225 | #define MD_RECOVERY_DONE 4 |
||
226 | #define MD_RECOVERY_NEEDED 5 |
||
227 | unsigned long recovery; |
||
228 | |||
229 | int in_sync; /* know to not need resync */ |
||
230 | struct semaphore reconfig_sem; |
||
231 | atomic_t active; |
||
232 | |||
233 | int degraded; /* whether md should consider |
||
234 | * adding a spare |
||
235 | */ |
||
236 | |||
237 | atomic_t recovery_active; /* blocks scheduled, but not written */ |
||
238 | wait_queue_head_t recovery_wait; |
||
239 | sector_t recovery_cp; |
||
240 | unsigned int safemode; /* if set, update "clean" superblock |
||
241 | * when no writes pending. |
||
242 | */ |
||
243 | unsigned int safemode_delay; |
||
244 | struct timer_list safemode_timer; |
||
245 | atomic_t writes_pending; |
||
246 | request_queue_t *queue; /* for plugging ... */ |
||
247 | |||
248 | struct list_head all_mddevs; |
||
249 | }; |
||
250 | |||
251 | struct mdk_personality_s |
||
252 | { |
||
253 | char *name; |
||
254 | struct module *owner; |
||
255 | int (*make_request)(request_queue_t *q, struct bio *bio); |
||
256 | int (*run)(mddev_t *mddev); |
||
257 | int (*stop)(mddev_t *mddev); |
||
258 | void (*status)(struct seq_file *seq, mddev_t *mddev); |
||
259 | /* error_handler must set ->faulty and clear ->in_sync |
||
260 | * if appropriate, and should abort recovery if needed |
||
261 | */ |
||
262 | void (*error_handler)(mddev_t *mddev, mdk_rdev_t *rdev); |
||
263 | int (*hot_add_disk) (mddev_t *mddev, mdk_rdev_t *rdev); |
||
264 | int (*hot_remove_disk) (mddev_t *mddev, int number); |
||
265 | int (*spare_active) (mddev_t *mddev); |
||
266 | int (*sync_request)(mddev_t *mddev, sector_t sector_nr, int go_faster); |
||
267 | }; |
||
268 | |||
269 | |||
270 | /* |
||
271 | * Currently we index md_array directly, based on the minor |
||
272 | * number. This will have to change to dynamic allocation |
||
273 | * once we start supporting partitioning of md devices. |
||
274 | */ |
||
275 | static inline int mdidx (mddev_t * mddev) |
||
276 | { |
||
277 | return mddev->__minor; |
||
278 | } |
||
279 | |||
280 | extern mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr); |
||
281 | |||
282 | /* |
||
283 | * iterates through some rdev ringlist. It's safe to remove the |
||
284 | * current 'rdev'. Dont touch 'tmp' though. |
||
285 | */ |
||
286 | #define ITERATE_RDEV_GENERIC(head,rdev,tmp) \ |
||
287 | \ |
||
288 | for ((tmp) = (head).next; \ |
||
289 | (rdev) = (list_entry((tmp), mdk_rdev_t, same_set)), \ |
||
290 | (tmp) = (tmp)->next, (tmp)->prev != &(head) \ |
||
291 | ; ) |
||
292 | /* |
||
293 | * iterates through the 'same array disks' ringlist |
||
294 | */ |
||
295 | #define ITERATE_RDEV(mddev,rdev,tmp) \ |
||
296 | ITERATE_RDEV_GENERIC((mddev)->disks,rdev,tmp) |
||
297 | |||
298 | /* |
||
299 | * Iterates through 'pending RAID disks' |
||
300 | */ |
||
301 | #define ITERATE_RDEV_PENDING(rdev,tmp) \ |
||
302 | ITERATE_RDEV_GENERIC(pending_raid_disks,rdev,tmp) |
||
303 | |||
304 | typedef struct mdk_thread_s { |
||
305 | void (*run) (mddev_t *mddev); |
||
306 | mddev_t *mddev; |
||
307 | wait_queue_head_t wqueue; |
||
308 | unsigned long flags; |
||
309 | struct completion *event; |
||
310 | struct task_struct *tsk; |
||
311 | const char *name; |
||
312 | } mdk_thread_t; |
||
313 | |||
314 | #define THREAD_WAKEUP 0 |
||
315 | |||
316 | #define __wait_event_lock_irq(wq, condition, lock) \ |
||
317 | do { \ |
||
318 | wait_queue_t __wait; \ |
||
319 | init_waitqueue_entry(&__wait, current); \ |
||
320 | \ |
||
321 | add_wait_queue(&wq, &__wait); \ |
||
322 | for (;;) { \ |
||
323 | set_current_state(TASK_UNINTERRUPTIBLE); \ |
||
324 | if (condition) \ |
||
325 | break; \ |
||
326 | spin_unlock_irq(&lock); \ |
||
327 | blk_run_queues(); \ |
||
328 | schedule(); \ |
||
329 | spin_lock_irq(&lock); \ |
||
330 | } \ |
||
331 | current->state = TASK_RUNNING; \ |
||
332 | remove_wait_queue(&wq, &__wait); \ |
||
333 | } while (0) |
||
334 | |||
335 | #define wait_event_lock_irq(wq, condition, lock) \ |
||
336 | do { \ |
||
337 | if (condition) \ |
||
338 | break; \ |
||
339 | __wait_event_lock_irq(wq, condition, lock); \ |
||
340 | } while (0) |
||
341 | |||
342 | |||
343 | #define __wait_disk_event(wq, condition) \ |
||
344 | do { \ |
||
345 | wait_queue_t __wait; \ |
||
346 | init_waitqueue_entry(&__wait, current); \ |
||
347 | \ |
||
348 | add_wait_queue(&wq, &__wait); \ |
||
349 | for (;;) { \ |
||
350 | set_current_state(TASK_UNINTERRUPTIBLE); \ |
||
351 | if (condition) \ |
||
352 | break; \ |
||
353 | blk_run_queues(); \ |
||
354 | schedule(); \ |
||
355 | } \ |
||
356 | current->state = TASK_RUNNING; \ |
||
357 | remove_wait_queue(&wq, &__wait); \ |
||
358 | } while (0) |
||
359 | |||
360 | #define wait_disk_event(wq, condition) \ |
||
361 | do { \ |
||
362 | if (condition) \ |
||
363 | break; \ |
||
364 | __wait_disk_event(wq, condition); \ |
||
365 | } while (0) |
||
366 | |||
367 | #endif |
||
368 |