ext4: Fix bigalloc quota accounting and i_blocks value
authorAditya Kali <adityakali@google.com>
Fri, 9 Sep 2011 23:04:51 +0000 (19:04 -0400)
committerTheodore Ts'o <tytso@mit.edu>
Fri, 9 Sep 2011 23:04:51 +0000 (19:04 -0400)
With bigalloc changes, the i_blocks value was not correctly set (it was still
set to number of blocks being used, but in case of bigalloc, we want i_blocks
to represent the number of clusters being used). Since the quota subsystem sets
the i_blocks value, this patch fixes the quota accounting and makes sure that
the i_blocks value is set correctly.

Signed-off-by: Aditya Kali <adityakali@google.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
fs/ext4/balloc.c
fs/ext4/ext4.h
fs/ext4/ext4_extents.h
fs/ext4/extents.c
fs/ext4/inode.c
fs/ext4/mballoc.c
fs/ext4/super.c

index 9080a857cda96949c9a2b5a30feccb69f08497c4..bf42b3219e3c592cc27bc4a023028cf9815d8959 100644 (file)
@@ -485,7 +485,7 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
  * @handle:             handle to this transaction
  * @inode:              file inode
  * @goal:               given target block(filesystem wide)
- * @count:             pointer to total number of blocks needed
+ * @count:             pointer to total number of clusters needed
  * @errp:               error code
  *
  * Return 1st allocated block number on success, *count stores total account
@@ -517,7 +517,8 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
                spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
                EXT4_I(inode)->i_allocated_meta_blocks += ar.len;
                spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
-               dquot_alloc_block_nofail(inode, ar.len);
+               dquot_alloc_block_nofail(inode,
+                               EXT4_C2B(EXT4_SB(inode->i_sb), ar.len));
        }
        return ret;
 }
index d2584224c89aa96862e58210e45678d450495cc0..a6307f7c98074457ee33e4f7f05f514e42c8e50f 100644 (file)
@@ -144,9 +144,17 @@ struct ext4_allocation_request {
 #define EXT4_MAP_UNWRITTEN     (1 << BH_Unwritten)
 #define EXT4_MAP_BOUNDARY      (1 << BH_Boundary)
 #define EXT4_MAP_UNINIT                (1 << BH_Uninit)
+/* Sometimes (in the bigalloc case, from ext4_da_get_block_prep) the caller of
+ * ext4_map_blocks wants to know whether or not the underlying cluster has
+ * already been accounted for. EXT4_MAP_FROM_CLUSTER conveys to the caller that
+ * the requested mapping was from previously mapped (or delayed allocated)
+ * cluster. We use BH_AllocFromCluster only for this flag. BH_AllocFromCluster
+ * should never appear on buffer_head's state flags.
+ */
+#define EXT4_MAP_FROM_CLUSTER  (1 << BH_AllocFromCluster)
 #define EXT4_MAP_FLAGS         (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\
                                 EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\
-                                EXT4_MAP_UNINIT)
+                                EXT4_MAP_UNINIT | EXT4_MAP_FROM_CLUSTER)
 
 struct ext4_map_blocks {
        ext4_fsblk_t m_pblk;
@@ -1884,6 +1892,7 @@ extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
 extern qsize_t *ext4_get_reserved_space(struct inode *inode);
 extern void ext4_da_update_reserve_space(struct inode *inode,
                                        int used, int quota_claim);
+extern int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock);
 
 /* indirect.c */
 extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
@@ -2284,6 +2293,11 @@ extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
 enum ext4_state_bits {
        BH_Uninit       /* blocks are allocated but uninitialized on disk */
          = BH_JBDPrivateStart,
+       BH_AllocFromCluster,    /* allocated blocks were part of already
+                                * allocated cluster. Note that this flag will
+                                * never, ever appear in a buffer_head's state
+                                * flag. See EXT4_MAP_FROM_CLUSTER to see where
+                                * this is used. */
 };
 
 BUFFER_FNS(Uninit, uninit)
index 095c36f3b6129e97db937f30a969174c4facacf1..a52db3a69a30e88df3af4a5d56811985effe4dfd 100644 (file)
@@ -290,5 +290,7 @@ extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
                                                        struct ext4_ext_path *);
 extern void ext4_ext_drop_refs(struct ext4_ext_path *);
 extern int ext4_ext_check_inode(struct inode *inode);
+extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk,
+                                     int search_hint_reverse);
 #endif /* _EXT4_EXTENTS */
 
index cd4479c080318686a0446bf8568895d5abb8fd0f..c4e0058645345f1b1655431fc28826f31006ffe6 100644 (file)
@@ -2686,6 +2686,21 @@ again:
                }
        }
 
+       /* If we still have something in the partial cluster and we have removed
+        * even the first extent, then we should free the blocks in the partial
+        * cluster as well. */
+       if (partial_cluster && path->p_hdr->eh_entries == 0) {
+               int flags = EXT4_FREE_BLOCKS_FORGET;
+
+               if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
+                       flags |= EXT4_FREE_BLOCKS_METADATA;
+
+               ext4_free_blocks(handle, inode, NULL,
+                                EXT4_C2B(EXT4_SB(sb), partial_cluster),
+                                EXT4_SB(sb)->s_cluster_ratio, flags);
+               partial_cluster = 0;
+       }
+
        /* TODO: flexible tree reduction should be here */
        if (path->p_hdr->eh_entries == 0) {
                /*
@@ -3233,6 +3248,195 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
        return ext4_mark_inode_dirty(handle, inode);
 }
 
+/**
+ * ext4_find_delalloc_range: find delayed allocated block in the given range.
+ *
+ * Goes through the buffer heads in the range [lblk_start, lblk_end] and returns
+ * whether there are any buffers marked for delayed allocation. It returns '1'
+ * on the first delalloc'ed buffer head found. If no buffer head in the given
+ * range is marked for delalloc, it returns 0.
+ * lblk_start should always be <= lblk_end.
+ * search_hint_reverse is to indicate that searching in reverse from lblk_end to
+ * lblk_start might be more efficient (i.e., we will likely hit the delalloc'ed
+ * block sooner). This is useful when blocks are truncated sequentially from
+ * lblk_start towards lblk_end.
+ */
+static int ext4_find_delalloc_range(struct inode *inode,
+                                   ext4_lblk_t lblk_start,
+                                   ext4_lblk_t lblk_end,
+                                   int search_hint_reverse)
+{
+       struct address_space *mapping = inode->i_mapping;
+       struct buffer_head *head, *bh = NULL;
+       struct page *page;
+       ext4_lblk_t i, pg_lblk;
+       pgoff_t index;
+
+       /* reverse search wont work if fs block size is less than page size */
+       if (inode->i_blkbits < PAGE_CACHE_SHIFT)
+               search_hint_reverse = 0;
+
+       if (search_hint_reverse)
+               i = lblk_end;
+       else
+               i = lblk_start;
+
+       index = i >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
+
+       while ((i >= lblk_start) && (i <= lblk_end)) {
+               page = find_get_page(mapping, index);
+               if (!page || !PageDirty(page))
+                       goto nextpage;
+
+               if (PageWriteback(page)) {
+                       /*
+                        * This might be a race with allocation and writeout. In
+                        * this case we just assume that the rest of the range
+                        * will eventually be written and there wont be any
+                        * delalloc blocks left.
+                        * TODO: the above assumption is troublesome, but might
+                        * work better in practice. other option could be note
+                        * somewhere that the cluster is getting written out and
+                        * detect that here.
+                        */
+                       page_cache_release(page);
+                       return 0;
+               }
+
+               if (!page_has_buffers(page))
+                       goto nextpage;
+
+               head = page_buffers(page);
+               if (!head)
+                       goto nextpage;
+
+               bh = head;
+               pg_lblk = index << (PAGE_CACHE_SHIFT -
+                                               inode->i_blkbits);
+               do {
+                       if (unlikely(pg_lblk < lblk_start)) {
+                               /*
+                                * This is possible when fs block size is less
+                                * than page size and our cluster starts/ends in
+                                * middle of the page. So we need to skip the
+                                * initial few blocks till we reach the 'lblk'
+                                */
+                               pg_lblk++;
+                               continue;
+                       }
+
+                       if (buffer_delay(bh)) {
+                               page_cache_release(page);
+                               return 1;
+                       }
+                       if (search_hint_reverse)
+                               i--;
+                       else
+                               i++;
+               } while ((i >= lblk_start) && (i <= lblk_end) &&
+                               ((bh = bh->b_this_page) != head));
+nextpage:
+               if (page)
+                       page_cache_release(page);
+               /*
+                * Move to next page. 'i' will be the first lblk in the next
+                * page.
+                */
+               if (search_hint_reverse)
+                       index--;
+               else
+                       index++;
+               i = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+       }
+
+       return 0;
+}
+
+int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk,
+                              int search_hint_reverse)
+{
+       struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+       ext4_lblk_t lblk_start, lblk_end;
+       lblk_start = lblk & (~(sbi->s_cluster_ratio - 1));
+       lblk_end = lblk_start + sbi->s_cluster_ratio - 1;
+
+       return ext4_find_delalloc_range(inode, lblk_start, lblk_end,
+                                       search_hint_reverse);
+}
+
+/**
+ * Determines how many complete clusters (out of those specified by the 'map')
+ * are under delalloc and were reserved quota for.
+ * This function is called when we are writing out the blocks that were
+ * originally written with their allocation delayed, but then the space was
+ * allocated using fallocate() before the delayed allocation could be resolved.
+ * The cases to look for are:
+ * ('=' indicated delayed allocated blocks
+ *  '-' indicates non-delayed allocated blocks)
+ * (a) partial clusters towards beginning and/or end outside of allocated range
+ *     are not delalloc'ed.
+ *     Ex:
+ *     |----c---=|====c====|====c====|===-c----|
+ *              |++++++ allocated ++++++|
+ *     ==> 4 complete clusters in above example
+ *
+ * (b) partial cluster (outside of allocated range) towards either end is
+ *     marked for delayed allocation. In this case, we will exclude that
+ *     cluster.
+ *     Ex:
+ *     |----====c========|========c========|
+ *          |++++++ allocated ++++++|
+ *     ==> 1 complete clusters in above example
+ *
+ *     Ex:
+ *     |================c================|
+ *            |++++++ allocated ++++++|
+ *     ==> 0 complete clusters in above example
+ *
+ * The ext4_da_update_reserve_space will be called only if we
+ * determine here that there were some "entire" clusters that span
+ * this 'allocated' range.
+ * In the non-bigalloc case, this function will just end up returning num_blks
+ * without ever calling ext4_find_delalloc_range.
+ */
+static unsigned int
+get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
+                          unsigned int num_blks)
+{
+       struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+       ext4_lblk_t alloc_cluster_start, alloc_cluster_end;
+       ext4_lblk_t lblk_from, lblk_to, c_offset;
+       unsigned int allocated_clusters = 0;
+
+       alloc_cluster_start = EXT4_B2C(sbi, lblk_start);
+       alloc_cluster_end = EXT4_B2C(sbi, lblk_start + num_blks - 1);
+
+       /* max possible clusters for this allocation */
+       allocated_clusters = alloc_cluster_end - alloc_cluster_start + 1;
+
+       /* Check towards left side */
+       c_offset = lblk_start & (sbi->s_cluster_ratio - 1);
+       if (c_offset) {
+               lblk_from = lblk_start & (~(sbi->s_cluster_ratio - 1));
+               lblk_to = lblk_from + c_offset - 1;
+
+               if (ext4_find_delalloc_range(inode, lblk_from, lblk_to, 0))
+                       allocated_clusters--;
+       }
+
+       /* Now check towards right. */
+       c_offset = (lblk_start + num_blks) & (sbi->s_cluster_ratio - 1);
+       if (allocated_clusters && c_offset) {
+               lblk_from = lblk_start + num_blks;
+               lblk_to = lblk_from + (sbi->s_cluster_ratio - c_offset) - 1;
+
+               if (ext4_find_delalloc_range(inode, lblk_from, lblk_to, 0))
+                       allocated_clusters--;
+       }
+
+       return allocated_clusters;
+}
+
 static int
 ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
                        struct ext4_map_blocks *map,
@@ -3338,8 +3542,15 @@ out:
         * But fallocate would have already updated quota and block
         * count for this offset. So cancel these reservation
         */
-       if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
-               ext4_da_update_reserve_space(inode, allocated, 0);
+       if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
+               unsigned int reserved_clusters;
+               reserved_clusters = get_reserved_cluster_alloc(inode,
+                               map->m_lblk, map->m_len);
+               if (reserved_clusters)
+                       ext4_da_update_reserve_space(inode,
+                                                    reserved_clusters,
+                                                    0);
+       }
 
 map_out:
        map->m_flags |= EXT4_MAP_MAPPED;
@@ -3484,6 +3695,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
        ext4_fsblk_t newblock = 0;
        int free_on_err = 0, err = 0, depth, ret;
        unsigned int allocated = 0, offset = 0;
+       unsigned int allocated_clusters = 0, reserved_clusters = 0;
        unsigned int punched_out = 0;
        unsigned int result = 0;
        struct ext4_allocation_request ar;
@@ -3499,6 +3711,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
        if (!(flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) &&
                ext4_ext_in_cache(inode, map->m_lblk, &newex)) {
                if (!newex.ee_start_lo && !newex.ee_start_hi) {
+                       if ((sbi->s_cluster_ratio > 1) &&
+                           ext4_find_delalloc_cluster(inode, map->m_lblk, 0))
+                               map->m_flags |= EXT4_MAP_FROM_CLUSTER;
+
                        if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
                                /*
                                 * block isn't allocated yet and
@@ -3509,6 +3725,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                        /* we should allocate requested block */
                } else {
                        /* block is already allocated */
+                       if (sbi->s_cluster_ratio > 1)
+                               map->m_flags |= EXT4_MAP_FROM_CLUSTER;
                        newblock = map->m_lblk
                                   - le32_to_cpu(newex.ee_block)
                                   + ext4_ext_pblock(&newex);
@@ -3665,6 +3883,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                }
        }
 
+       if ((sbi->s_cluster_ratio > 1) &&
+           ext4_find_delalloc_cluster(inode, map->m_lblk, 0))
+               map->m_flags |= EXT4_MAP_FROM_CLUSTER;
+
        /*
         * requested block isn't allocated yet;
         * we couldn't try to create block if create flag is zero
@@ -3681,6 +3903,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
        /*
         * Okay, we need to do block allocation.
         */
+       map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
        newex.ee_block = cpu_to_le32(map->m_lblk);
        cluster_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
 
@@ -3692,6 +3915,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
            get_implied_cluster_alloc(sbi, map, ex, path)) {
                ar.len = allocated = map->m_len;
                newblock = map->m_pblk;
+               map->m_flags |= EXT4_MAP_FROM_CLUSTER;
                goto got_allocated_blocks;
        }
 
@@ -3712,6 +3936,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
            get_implied_cluster_alloc(sbi, map, ex2, path)) {
                ar.len = allocated = map->m_len;
                newblock = map->m_pblk;
+               map->m_flags |= EXT4_MAP_FROM_CLUSTER;
                goto got_allocated_blocks;
        }
 
@@ -3765,6 +3990,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
        ext_debug("allocate new block: goal %llu, found %llu/%u\n",
                  ar.goal, newblock, allocated);
        free_on_err = 1;
+       allocated_clusters = ar.len;
        ar.len = EXT4_C2B(sbi, ar.len) - offset;
        if (ar.len > allocated)
                ar.len = allocated;
@@ -3822,8 +4048,80 @@ got_allocated_blocks:
         * Update reserved blocks/metadata blocks after successful
         * block allocation which had been deferred till now.
         */
-       if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
-               ext4_da_update_reserve_space(inode, allocated, 1);
+       if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
+               /*
+                * Check how many clusters we had reserved this allocted range.
+                */
+               reserved_clusters = get_reserved_cluster_alloc(inode,
+                                               map->m_lblk, allocated);
+               if (map->m_flags & EXT4_MAP_FROM_CLUSTER) {
+                       if (reserved_clusters) {
+                               /*
+                                * We have clusters reserved for this range.
+                                * But since we are not doing actual allocation
+                                * and are simply using blocks from previously
+                                * allocated cluster, we should release the
+                                * reservation and not claim quota.
+                                */
+                               ext4_da_update_reserve_space(inode,
+                                               reserved_clusters, 0);
+                       }
+               } else {
+                       BUG_ON(allocated_clusters < reserved_clusters);
+                       /* We will claim quota for all newly allocated blocks.*/
+                       ext4_da_update_reserve_space(inode, allocated_clusters,
+                                                       1);
+                       if (reserved_clusters < allocated_clusters) {
+                               int reservation = allocated_clusters -
+                                                 reserved_clusters;
+                               /*
+                                * It seems we claimed few clusters outside of
+                                * the range of this allocation. We should give
+                                * it back to the reservation pool. This can
+                                * happen in the following case:
+                                *
+                                * * Suppose s_cluster_ratio is 4 (i.e., each
+                                *   cluster has 4 blocks. Thus, the clusters
+                                *   are [0-3],[4-7],[8-11]...
+                                * * First comes delayed allocation write for
+                                *   logical blocks 10 & 11. Since there were no
+                                *   previous delayed allocated blocks in the
+                                *   range [8-11], we would reserve 1 cluster
+                                *   for this write.
+                                * * Next comes write for logical blocks 3 to 8.
+                                *   In this case, we will reserve 2 clusters
+                                *   (for [0-3] and [4-7]; and not for [8-11] as
+                                *   that range has a delayed allocated blocks.
+                                *   Thus total reserved clusters now becomes 3.
+                                * * Now, during the delayed allocation writeout
+                                *   time, we will first write blocks [3-8] and
+                                *   allocate 3 clusters for writing these
+                                *   blocks. Also, we would claim all these
+                                *   three clusters above.
+                                * * Now when we come here to writeout the
+                                *   blocks [10-11], we would expect to claim
+                                *   the reservation of 1 cluster we had made
+                                *   (and we would claim it since there are no
+                                *   more delayed allocated blocks in the range
+                                *   [8-11]. But our reserved cluster count had
+                                *   already gone to 0.
+                                *
+                                *   Thus, at the step 4 above when we determine
+                                *   that there are still some unwritten delayed
+                                *   allocated blocks outside of our current
+                                *   block range, we should increment the
+                                *   reserved clusters count so that when the
+                                *   remaining blocks finally gets written, we
+                                *   could claim them.
+                                */
+                               while (reservation) {
+                                       ext4_da_reserve_space(inode,
+                                                             map->m_lblk);
+                                       reservation--;
+                               }
+                       }
+               }
+       }
 
        /*
         * Cache the extent and update transaction to commit on fdatasync only
index 40f51aae42fed44fe4498580b8eefdb3577b909a..d1c17e47c1c6ad75e72f893b987595001c353a52 100644 (file)
@@ -300,14 +300,14 @@ void ext4_da_update_reserve_space(struct inode *inode,
 
        /* Update quota subsystem for data blocks */
        if (quota_claim)
-               dquot_claim_block(inode, used);
+               dquot_claim_block(inode, EXT4_C2B(sbi, used));
        else {
                /*
                 * We did fallocate with an offset that is already delayed
                 * allocated. So on delayed allocated writeback we should
                 * not re-claim the quota for fallocated blocks.
                 */
-               dquot_release_reservation_block(inode, used);
+               dquot_release_reservation_block(inode, EXT4_C2B(sbi, used));
        }
 
        /*
@@ -1037,14 +1037,14 @@ static int ext4_journalled_write_end(struct file *file,
 }
 
 /*
- * Reserve a single block located at lblock
+ * Reserve a single cluster located at lblock
  */
-static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
+int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
 {
        int retries = 0;
        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
        struct ext4_inode_info *ei = EXT4_I(inode);
-       unsigned long md_needed;
+       unsigned int md_needed;
        int ret;
 
        /*
@@ -1054,7 +1054,8 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
         */
 repeat:
        spin_lock(&ei->i_block_reservation_lock);
-       md_needed = ext4_calc_metadata_amount(inode, lblock);
+       md_needed = EXT4_NUM_B2C(sbi,
+                                ext4_calc_metadata_amount(inode, lblock));
        trace_ext4_da_reserve_space(inode, md_needed);
        spin_unlock(&ei->i_block_reservation_lock);
 
@@ -1063,7 +1064,7 @@ repeat:
         * us from metadata over-estimation, though we may go over by
         * a small amount in the end.  Here we just reserve for data.
         */
-       ret = dquot_reserve_block(inode, 1);
+       ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1));
        if (ret)
                return ret;
        /*
@@ -1071,7 +1072,7 @@ repeat:
         * we cannot afford to run out of free blocks.
         */
        if (ext4_claim_free_blocks(sbi, md_needed + 1, 0)) {
-               dquot_release_reservation_block(inode, 1);
+               dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
                if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
                        yield();
                        goto repeat;
@@ -1118,6 +1119,8 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
                 * We can release all of the reserved metadata blocks
                 * only when we have written all of the delayed
                 * allocation blocks.
+                * Note that in case of bigalloc, i_reserved_meta_blocks,
+                * i_reserved_data_blocks, etc. refer to number of clusters.
                 */
                percpu_counter_sub(&sbi->s_dirtyclusters_counter,
                                   ei->i_reserved_meta_blocks);
@@ -1130,7 +1133,7 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
 
        spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
 
-       dquot_release_reservation_block(inode, to_free);
+       dquot_release_reservation_block(inode, EXT4_C2B(sbi, to_free));
 }
 
 static void ext4_da_page_release_reservation(struct page *page,
@@ -1139,6 +1142,9 @@ static void ext4_da_page_release_reservation(struct page *page,
        int to_release = 0;
        struct buffer_head *head, *bh;
        unsigned int curr_off = 0;
+       struct inode *inode = page->mapping->host;
+       struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+       int num_clusters;
 
        head = page_buffers(page);
        bh = head;
@@ -1151,7 +1157,20 @@ static void ext4_da_page_release_reservation(struct page *page,
                }
                curr_off = next_off;
        } while ((bh = bh->b_this_page) != head);
-       ext4_da_release_space(page->mapping->host, to_release);
+
+       /* If we have released all the blocks belonging to a cluster, then we
+        * need to release the reserved space for that cluster. */
+       num_clusters = EXT4_NUM_B2C(sbi, to_release);
+       while (num_clusters > 0) {
+               ext4_fsblk_t lblk;
+               lblk = (page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits)) +
+                       ((num_clusters - 1) << sbi->s_cluster_bits);
+               if (sbi->s_cluster_ratio == 1 ||
+                   !ext4_find_delalloc_cluster(inode, lblk, 1))
+                       ext4_da_release_space(inode, 1);
+
+               num_clusters--;
+       }
 }
 
 /*
@@ -1352,7 +1371,8 @@ static void ext4_print_free_blocks(struct inode *inode)
               (long long) EXT4_C2B(EXT4_SB(inode->i_sb),
                percpu_counter_sum(&sbi->s_freeclusters_counter)));
        printk(KERN_CRIT "dirty_blocks=%lld\n",
-              (long long) percpu_counter_sum(&sbi->s_dirtyclusters_counter));
+              (long long) EXT4_C2B(EXT4_SB(inode->i_sb),
+               percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
        printk(KERN_CRIT "Block reservation details\n");
        printk(KERN_CRIT "i_reserved_data_blocks=%u\n",
               EXT4_I(inode)->i_reserved_data_blocks);
@@ -1626,10 +1646,14 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
                /*
                 * XXX: __block_write_begin() unmaps passed block, is it OK?
                 */
-               ret = ext4_da_reserve_space(inode, iblock);
-               if (ret)
-                       /* not enough space to reserve */
-                       return ret;
+               /* If the block was allocated from previously allocated cluster,
+                * then we dont need to reserve it again. */
+               if (!(map.m_flags & EXT4_MAP_FROM_CLUSTER)) {
+                       ret = ext4_da_reserve_space(inode, iblock);
+                       if (ret)
+                               /* not enough space to reserve */
+                               return ret;
+               }
 
                map_bh(bh, inode->i_sb, invalid_block);
                set_buffer_new(bh);
index 63dd5670334215e0d9e38b0137ec4d4b2fbea085..5e1215d383316ed376ee11cecef849db0f72d0bf 100644 (file)
@@ -4718,6 +4718,9 @@ do_more:
 
        freed += count;
 
+       if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
+               dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
+
        /* We dirtied the bitmap block */
        BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
        err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
@@ -4736,8 +4739,6 @@ do_more:
        }
        ext4_mark_super_dirty(sb);
 error_return:
-       if (freed && !(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
-               dquot_free_block(inode, freed);
        brelse(bitmap_bh);
        ext4_std_error(sb, err);
        return;
index 6810957e0ac75c713a76811b2314bbf52d6cf19a..66b8cfa15636b89d83c862be02931ebc04fe1dee 100644 (file)
@@ -2473,7 +2473,8 @@ static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a,
                                              char *buf)
 {
        return snprintf(buf, PAGE_SIZE, "%llu\n",
-               (s64) percpu_counter_sum(&sbi->s_dirtyclusters_counter));
+               (s64) EXT4_C2B(sbi,
+                       percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
 }
 
 static ssize_t session_write_kbytes_show(struct ext4_attr *a,