<br><div class="gmail_quote">From 19c055d73cee8e65f8c24393450014b3560a8c6a Mon Sep 17 00:00:00 2001<br>From: Manish Sharma <<a href="mailto:manishrma@gmail.com" target="_blank">manishrma@gmail.com</a>><br>Date: Mon, 1 Apr 2013 12:52:35 +0530<br>
Subject: [RFC PATCH 1/1] Dual Squashfs: multicore implementation<br>
<br>The basic idea includes getting big requests by using readpages and<br>then decompressing two blocks on each core.<br>This implementation gives 50% improvement for the sequential file reads.<br>1.Split the two chunks based on the squashfs block size in readpages<br>
2.Removed the locks of the decompressor(zlib/lzo) for percpu.<br>3.Increase the number of the data cache to per cpu.<br>Points to consider:-<br>1. Need a lot of memory for the mutiple cache & multiple workspaces.<br>
2. All the cpu will be too busy to process all the requests. cpu %usage increase.<br>
3. Own queue method is implemented can be replaced with workqueues.<br>4. percpu data strucutures can be used.<br><br>Signed-off-by: Manish Sharma <<a href="mailto:manishrma@gmail.com" target="_blank">manishrma@gmail.com</a>><br>
---<br>
fs/squashfs/Kconfig | 23 +++<br> fs/squashfs/Makefile | 1 +<br> fs/squashfs/file.c | 250 ++++++++++++++++++++++++++++<br> fs/squashfs/lzo_wrapper.c | 113 ++++++++++++-<br> fs/squashfs/squashfs_fs_sb.h | 6 +-<br>
fs/squashfs/super.c | 59 ++++++-<br> fs/squashfs/tegra_mp.c | 368 ++++++++++++++++++++++++++++++++++++++++++<br> fs/squashfs/tegra_mp.h | 58 +++++++<br> fs/squashfs/zlib_wrapper.c | 160 +++++++++++++++++-<br>
9 files changed, 1030 insertions(+), 8 deletions(-)<br> create mode 100644 fs/squashfs/tegra_mp.c<br> create mode 100644 fs/squashfs/tegra_mp.h<br><br>diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig<br>index c70111e..ffcf730 100644<br>
--- a/fs/squashfs/Kconfig<br>+++ b/fs/squashfs/Kconfig<br>@@ -121,3 +121,26 @@ config SQUASHFS_FRAGMENT_CACHE_SIZE<br> <br> Note there must be at least one cached fragment. Anything<br> much more than three will probably not make much difference.<br>
+<br>+config SQUASHFS_READPAGES_ENABLE<br>+ bool "Enable Readpages for Squashfs"<br>+ depends on SQUASHFS<br>+ default n<br>+ help<br>+ Saying Y here enables readpages functionality.<br>+ If unsure, say N.<br>
+<br>+config SQUASHFS_MPCORE<br>+ bool "Include Multi Core support in SquashFS file systems"<br>+ depends on SQUASHFS && SQUASHFS_READPAGES_ENABLE<br>+ default n<br>+ select SQUASHFS_4K_DEVBLK_SIZE<br>
+ select TEGRA_MPCORE<br>+ help<br>+ Saying Y here includes support for Multi Core in SquashFS file systems<br>+ Multi Core supports creates the different kernel threads to improve the<br>+ SquashFS boot time performance.<br>
+ This implementation is independent of the TEGRA board anyway as of now.<br>+ If unsure, say N.<br>+<br>+<br>diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile<br>index 110b047..0b99517 100644<br>--- a/fs/squashfs/Makefile<br>
+++ b/fs/squashfs/Makefile<br>@@ -9,3 +9,4 @@ squashfs-$(CONFIG_SQUASHFS_XATTR) += xattr.o xattr_id.o<br> squashfs-$(CONFIG_SQUASHFS_LZO) += lzo_wrapper.o<br> squashfs-$(CONFIG_SQUASHFS_XZ) += xz_wrapper.o<br> squashfs-$(CONFIG_SQUASHFS_ZLIB) += zlib_wrapper.o<br>
+squashfs-$(CONFIG_SQUASHFS_MPCORE) += tegra_mp.o<br>diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c<br>index 8ca62c2..c134e13 100644<br>--- a/fs/squashfs/file.c<br>+++ b/fs/squashfs/file.c<br>@@ -38,6 +38,11 @@<br>
* Larger files use multiple slots, with 1.75 TiB files using all 8 slots.<br>
* The index cache is designed to be memory efficient, and by default uses<br> * 16 KiB.<br>+ *<br>+ * <a href="mailto:manish.s2@samsung.com" target="_blank">manish.s2@samsung.com</a><br>+ * Added support for readpages for getting the bigger requests.<br>
+ * Added Multithread support for the bigger chunks > squashfs block size<br>+ *<br> */<br> <br> #include <linux/fs.h><br>@@ -53,6 +58,22 @@<br> #include "squashfs_fs_i.h"<br> #include "squashfs.h"<br>
<br>+#ifdef CONFIG_SQUASHFS_MPCORE<br>+#include "tegra_mp.h"<br>+<br>+<br>+extern struct squashfs_queue *to_reader_1;<br>+#endif /* CONFIG_SQUASHFS_MPCORE*/<br>+<br>+<br>+<br>+#ifdef CONFIG_SQUASHFS_READPAGES_ENABLE<br>
+#define list_to_page(head) (list_entry((head)->prev, struct page, lru))<br>+#define list_to_page_index(pos, head, index) \<br>+ for (pos = list_entry((head)->prev, struct page, lru); pos->index != index;\<br>
+ pos = list_entry((pos)->prev, struct page, lru))<br>+#endif<br>+<br> /*<br> * Locate cache slot in range [offset, index] for specified inode. If<br> * there's more than one return the slot closest to index.<br>
@@ -494,8 +515,237 @@ out:<br> <br> return 0;<br> }<br>+#ifdef CONFIG_SQUASHFS_READPAGES_ENABLE<br>+<br>+/*<br>+ * copy of squashfs_readpage function for<br>+ * supports<br>+ * readpages & Multicore implementation<br>
+ */<br>+int read_this_page(struct file *file, struct page *page)<br>+{<br>+ struct inode *inode = page->mapping->host;<br>+ struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;<br>+ int bytes, i, offset = 0, sparse = 0;<br>
+ struct squashfs_cache_entry *buffer = NULL;<br>+ void *pageaddr;<br>+<br>+ int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1;<br>+ int index = page->index >> (msblk->block_log - PAGE_CACHE_SHIFT);<br>
+ int start_index = page->index & ~mask;<br>+ int end_index = start_index | mask;<br>+ int file_end = i_size_read(inode) >> msblk->block_log;<br>+<br>+ TRACE("Entered read_this_page, page index %lx, start block %llx\n",<br>
+ page->index, squashfs_i(inode)->start);<br>+<br>+<br>+ if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >><br>+ PAGE_CACHE_SHIFT)){<br>+ goto out;<br>
+ }<br>+<br>+ if (index < file_end || squashfs_i(inode)->fragment_block ==<br>+ SQUASHFS_INVALID_BLK) {<br>+ /*<br>+ * Reading a datablock from disk. Need to read block list<br>
+ * to get location and block size.<br>+ */<br>+ u64 block = 0;<br>+ int bsize = read_blocklist(inode, index, &block);<br>+ if (bsize < 0)<br>+ goto error_out;<br>+<br>
+<br>+ if (bsize == 0) { /* hole */<br>+ bytes = index == file_end ?<br>+ (i_size_read(inode) & (msblk->block_size - 1)) :<br>+ msblk->block_size;<br>+ sparse = 1;<br>
+ } else {<br>+ /*<br>+ * Read and decompress datablock.<br>+ */<br>+ buffer = squashfs_get_datablock(inode->i_sb,<br>+ block, bsize);<br>
+ if (buffer->error) {<br>+ ERROR("Unable to read page, block %llx, size %x"<br>+ "\n", block, bsize);<br>+ squashfs_cache_put(buffer);<br>
+ goto error_out;<br>+ }<br>+ bytes = buffer->length;<br>+ }<br>+ } else {<br>+ /*<br>+ * Datablock is stored inside a fragment (tail-end packed<br>+ * block).<br>
+ */<br>+ buffer = squashfs_get_fragment(inode->i_sb,<br>+ squashfs_i(inode)->fragment_block,<br>+ squashfs_i(inode)->fragment_size);<br>+<br>+ if (buffer->error) {<br>
+ ERROR("Unable to read page, block %llx, size %x\n",<br>+ squashfs_i(inode)->fragment_block,<br>+ squashfs_i(inode)->fragment_size);<br>+ squashfs_cache_put(buffer);<br>
+ goto error_out;<br>+ }<br>+ bytes = i_size_read(inode) & (msblk->block_size - 1);<br>+ offset = squashfs_i(inode)->fragment_offset;<br>+ }<br>+<br>+ /*<br>+ * Loop copying datablock into pages. As the datablock likely covers<br>
+ * many PAGE_CACHE_SIZE pages (default block size is 128 KiB) explicitly<br>+ * grab the pages from the page cache, except for the page that we've<br>+ * been called to fill.<br>+ */<br>+ for (i = start_index; i <= end_index && bytes > 0; i++,<br>
+ bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) {<br>+ struct page *push_page;<br>+ int avail = sparse ? 0 : min_t(int, bytes, PAGE_CACHE_SIZE);<br>+<br>+ TRACE("bytes %d, i %d, available_bytes %d\n", bytes, i, avail);<br>
+<br>+ push_page = (i == page->index) ? page :<br>+ grab_cache_page_nowait(page->mapping, i);<br>+<br>+ if (!push_page)<br>+ continue;<br>+<br>+ if (PageUptodate(push_page))<br>
+ goto skip_page;<br>+<br>+ pageaddr = kmap_atomic(push_page);<br>+ squashfs_copy_data(pageaddr, buffer, offset, avail);<br>+ memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail);<br>+ kunmap_atomic(pageaddr);<br>
+ flush_dcache_page(push_page);<br>+ SetPageUptodate(push_page);<br>+skip_page:<br>+ unlock_page(push_page);<br>+ if (i != page->index)<br>+ page_cache_release(push_page);<br>+ }<br>
+<br>+ if (!sparse)<br>+ squashfs_cache_put(buffer);<br>+#ifdef CONFIG_SQUASHFS_MPCORE<br>+ page_cache_release(page);<br>+#endif<br>+<br>+ return 0;<br>+<br>+error_out:<br>+ SetPageError(page);<br>+out:<br>
+ pageaddr = kmap_atomic(page);<br>+ memset(pageaddr, 0, PAGE_CACHE_SIZE);<br>+ kunmap_atomic(pageaddr);<br>+ flush_dcache_page(page);<br>+ if (!PageError(page))<br>+ SetPageUptodate(page);<br>+ unlock_page(page);<br>
+<br>+ return 0;<br>+}<br>+<br>+/*<br>+ * readpages implementation and Multi Core implementation<br>+ * for squashfs<br>+ *<br>+ */<br>+static int squashfs_readpages(struct file *filp, struct address_space *mapping,<br>
+ struct list_head *pages, unsigned nr_pages)<br>+{<br>+ unsigned page_idx;<br>+<br>+<br>+#ifdef CONFIG_SQUASHFS_MPCORE<br>+ unsigned first_page_idx;<br>+ int err;<br>+ unsigned long index = 0;<br>+ struct squashfs_sb_info *msblk = filp->f_path.dentry->d_inode->i_sb->s_fs_info;<br>
+ unsigned int pages_per_block;<br>+<br>+ pages_per_block = (msblk->block_size/(PAGE_CACHE_SIZE));<br>+<br>+#ifdef DEBUG<br>+ printk(KERN_EMERG"[%d]%s %d %d Ino %lu \n", current->pid, __FUNCTION__, nr_pages, pages_per_block, filp->f_path.dentry->d_inode->i_ino);<br>
+#endif<br>+<br>+ if (nr_pages > pages_per_block) {<br>+<br>+ /*Here we will grab the page and put into queue */<br>+ for (first_page_idx = 0, page_idx = 0; page_idx < nr_pages; ) {<br>+<br>
+ struct page *page = NULL;<br>+<br>+ if (first_page_idx == page_idx) {<br>+ page = list_to_page(pages);<br>+ prefetchw(&page->flags);<br>+ list_del(&page->lru);<br>
+ /* Add this page to page-cache */<br>+ /*err = add_to_page_cache_lru(page, mapping, page->index, GFP_KERNEL);*/<br>+ err = add_to_page_cache(page, mapping, page->index, GFP_KERNEL);<br>
+ if (unlikely(err)) {<br>+ /*printk(KERN_EMERG "releasing page cache \n");*/<br>+ page_cache_release(page);<br>+ page_idx += 1;<br>
+ first_page_idx = page_idx;<br>+ continue;<br>+ }<br>+ page_idx += pages_per_block;<br>+ index = page->index;<br>
+ if (queue_put(to_reader_1, filp, page))<br>
+ break;<br>+ } else {<br>+<br>+ page = grab_cache_page_nowait(mapping, (index + page_idx));<br>+ if (unlikely(!page)) {<br>+ /*Need to do error checking here*/<br>
+ page_idx += 1;<br>+ continue;<br>+ /*return -ENOMEM;*/<br>+ } else {<br>+ page_idx += pages_per_block;<br>+ queue_put(to_reader_1, filp, page);<br>
+ }<br>+<br>+ }<br>+<br>+ }<br>+<br>+ work_on_queue(to_reader_1);<br>+ } else<br>+<br>+#endif /* CONFIG_SQUASHFS_MPCORE */<br>+ {<br>+ /* readpages Implementation */<br>
+ for (page_idx = 0; page_idx < nr_pages; page_idx++) {<br>+ struct page *page = list_to_page(pages);<br>+ prefetchw(&page->flags);<br>+ list_del(&page->lru);<br>
+ /*if (!add_to_page_cache_lru(page, mapping, page->index, GFP_KERNEL)) {*/<br>+ if (!add_to_page_cache(page, mapping, page->index, GFP_KERNEL)) {<br>+ squashfs_readpage(filp, page);<br>
+ }<br>+ page_cache_release(page);<br>+ }<br>+ }<br>+<br>+<br>+ /*always return 0 as readpages either writes to a page or release it*/<br>+ return 0;<br>+}<br>+#endif<br> <br>
<br> const struct address_space_operations squashfs_aops = {<br>+#ifdef CONFIG_SQUASHFS_READPAGES_ENABLE<br>+ .readpages = squashfs_readpages,<br>+#endif<br> .readpage = squashfs_readpage<br> };<br>diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c<br>
index 00f4dfc..4bcdf64 100644<br>--- a/fs/squashfs/lzo_wrapper.c<br>+++ b/fs/squashfs/lzo_wrapper.c<br>@@ -37,7 +37,114 @@ struct squashfs_lzo {<br> void *output;<br> };<br> <br>-static void *lzo_init(struct squashfs_sb_info *msblk, void *buff, int len)<br>
+#ifdef CONFIG_SQUASHFS_MPCORE<br>+static void *lzo_init(struct squashfs_sb_info *msblk)<br>+{<br>+ unsigned int i = 0;<br>+ int block_size = max_t(int, msblk->block_size, SQUASHFS_METADATA_SIZE);<br>+ unsigned int processors = num_online_cpus();<br>
+<br>+ /* Initialization of the lzo streams */<br>+ struct squashfs_lzo *stream = kmalloc(processors * sizeof(stream), GFP_KERNEL);<br>+ if (NULL == stream) {<br>+ ERROR("Failed to allocate zlib stream\n");<br>
+ goto failed;<br>+ }<br>+ for_each_online_cpu(i) {<br>+ stream[i].input = vmalloc(block_size);<br>+ if (stream[i].input == NULL)<br>+ goto failed;<br>+ stream[i].output = vmalloc(block_size);<br>
+ if (stream[i].output == NULL)<br>+ goto failed;<br>+ }<br>+ return stream;<br>+<br>+failed:<br>+ ERROR("Failed to allocate lzo workspace\n");<br>+ i = 0;<br>+ for_each_online_cpu(i) {<br>
+ if (stream[i].input)<br>+ vfree(stream[i].input);<br>+ }<br>+ if (stream)<br>+ kfree(stream);<br>+ return NULL;<br>+}<br>+<br>+<br>+static void lzo_free(void *strm)<br>+{<br>+ unsigned int i = 0;<br>
+ struct squashfs_lzo *stream = strm;<br>+<br>+ if (stream) {<br>+ for_each_online_cpu(i) {<br>+ if (stream[i].input)<br>+ vfree(stream[i].input);<br>+ if (stream[i].output)<br>
+ vfree(stream[i].output);<br>+ }<br>+ kfree(stream);<br>+ }<br>+ strm = NULL;<br>+}<br>+<br>+static int lzo_uncompress(struct squashfs_sb_info *msblk, void **buffer,<br>+ struct buffer_head **bh, int b, int offset, int length, int srclength,<br>
+ int pages)<br>+{<br>+ unsigned int pid = smp_processor_id();<br>+ struct squashfs_lzo *stream = msblk->stream;<br>+ void *buff = stream[pid].input;<br>+ int avail, i, bytes = length, res;<br>+ size_t out_len = srclength;<br>
+<br>+ mutex_lock(&msblk->read_data_mutex[pid]);<br>+<br>+ for (i = 0; i < b; i++) {<br>+ wait_on_buffer(bh[i]);<br>+ if (!buffer_uptodate(bh[i]))<br>+ goto block_release;<br>+<br>
+ avail = min(bytes, msblk->devblksize - offset);<br>+ memcpy(buff, bh[i]->b_data + offset, avail);<br>+ buff += avail;<br>+ bytes -= avail;<br>+ offset = 0;<br>+ put_bh(bh[i]);<br>
+ }<br>+<br>+ res = lzo1x_decompress_safe(stream[pid].input, (size_t)length,<br>+ stream[pid].output, &out_len);<br>+ if (res != LZO_E_OK)<br>+ goto failed;<br>+<br>+ res = bytes = (int)out_len;<br>
+ for (i = 0, buff = stream[pid].output; bytes && i < pages; i++) {<br>+ avail = min_t(int, bytes, PAGE_CACHE_SIZE);<br>+ memcpy(buffer[i], buff, avail);<br>+ buff += avail;<br>+ bytes -= avail;<br>
+ }<br>+<br>+ mutex_unlock(&msblk->read_data_mutex[pid]);<br>+ return res;<br>+<br>+block_release:<br>+ for (; i < b; i++)<br>+ put_bh(bh[i]);<br>+<br>+failed:<br>+ mutex_unlock(&msblk->read_data_mutex[pid]);<br>
+<br>+ ERROR("lzo decompression failed, data probably corrupt\n");<br>+ return -EIO;<br>+}<br>+<br>+#else /* MPCORE*/<br>+<br>+static void *lzo_init(struct squashfs_sb_info *msblk)<br> {<br> int block_size = max_t(int, msblk->block_size, SQUASHFS_METADATA_SIZE);<br>
<br>@@ -58,7 +165,7 @@ failed2:<br> failed:<br> ERROR("Failed to allocate lzo workspace\n");<br> kfree(stream);<br>- return ERR_PTR(-ENOMEM);<br>+ return NULL;<br> }<br> <br> <br>@@ -125,6 +232,8 @@ failed:<br>
return -EIO;<br> }<br> <br>+#endif /*MPCORE*/<br>+<br> const struct squashfs_decompressor squashfs_lzo_comp_ops = {<br> .init = lzo_init,<br> .free = lzo_free,<br>diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h<br>
index 52934a2..bd39cd5 100644<br>--- a/fs/squashfs/squashfs_fs_sb.h<br>+++ b/fs/squashfs/squashfs_fs_sb.h<br>@@ -63,7 +63,11 @@ struct squashfs_sb_info {<br> __le64 *id_table;<br> __le64 *fragment_index;<br>
__le64 *xattr_id_table;<br>- struct mutex read_data_mutex;<br>+#ifdef CONFIG_SQUASHFS_MPCORE<br>+ struct mutex *read_data_mutex;<br>+#else<br>+ struct mutex read_data_mutex;<br>
+#endif /*MPCORE*/<br> struct mutex meta_index_mutex;<br> struct meta_index *meta_index;<br> void *stream;<br>diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c<br>
index 260e392..2484a67 100644<br>--- a/fs/squashfs/super.c<br>+++ b/fs/squashfs/super.c<br>@@ -25,6 +25,8 @@<br> * This file implements code to read the superblock, read and initialise<br> * in-memory structures at mount time, and all the VFS glue code to register<br>
* the filesystem.<br>+ * manish.s2 : added support for multicore<br>+ * : Added generic decompression selection with multicore<br> */<br> <br> #include <linux/fs.h><br>@@ -43,6 +45,9 @@<br> #include "squashfs.h"<br>
#include "decompressor.h"<br> #include "xattr.h"<br>+#ifdef CONFIG_SQUASHFS_MPCORE<br>+#include "tegra_mp.h"<br>+#endif<br> <br> static struct file_system_type squashfs_fs_type;<br> static const struct super_operations squashfs_super_ops;<br>
@@ -85,7 +90,10 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)<br> unsigned int fragments;<br> u64 lookup_table_start, xattr_id_table_start, next_table;<br> int err;<br>-<br>
+#ifdef CONFIG_SQUASHFS_MPCORE<br>+ unsigned int i = 0;<br>+ unsigned int processors = num_online_cpus();<br>+#endif<br> TRACE("Entered squashfs_fill_superblock\n");<br> <br> sb->s_fs_info = kzalloc(sizeof(*msblk), GFP_KERNEL);<br>
@@ -98,7 +106,20 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)<br> msblk->devblksize = sb_min_blocksize(sb, SQUASHFS_DEVBLK_SIZE);<br> msblk->devblksize_log2 = ffz(~msblk->devblksize);<br>
<br>+#ifdef CONFIG_SQUASHFS_MPCORE<br>+ /* Initialization of mutex for each core */<br>+ i = 0;<br>+ msblk->read_data_mutex = kmalloc((processors)*sizeof(struct mutex), GFP_KERNEL);<br>+ if (NULL == msblk->read_data_mutex) {<br>
+ ERROR("unable to allocate Mutex Mem \n");<br>+ goto failed_mount;<br>+ }<br>+ for_each_online_cpu(i) {<br>+ mutex_init(&msblk->read_data_mutex[i]);<br>+ }<br>+#else /*MPCORE */<br>
mutex_init(&msblk->read_data_mutex);<br>+#endif /*MPCORE */<br> mutex_init(&msblk->meta_index_mutex);<br> <br> /*<br>@@ -205,13 +226,21 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)<br>
if (msblk->block_cache == NULL)<br> goto failed_mount;<br> <br>+#ifdef CONFIG_SQUASHFS_MPCORE<br>+ /* Allocate read_page block */<br>+ msblk->read_page = squashfs_cache_init("data", processors, (msblk->block_size));<br>
+ if (msblk->read_page == NULL) {<br>+ ERROR("Failed to allocate read_page block\n");<br>+ goto failed_mount;<br>+ }<br>+#else<br> /* Allocate read_page block */<br> msblk->read_page = squashfs_cache_init("data", 1, msblk->block_size);<br>
if (msblk->read_page == NULL) {<br> ERROR("Failed to allocate read_page block\n");<br> goto failed_mount;<br> }<br>-<br>+#endif<br> msblk->stream = squashfs_decompressor_init(sb, flags);<br>
if (IS_ERR(msblk->stream)) {<br> err = PTR_ERR(msblk->stream);<br>@@ -446,7 +475,26 @@ static int __init init_squashfs_fs(void)<br> destroy_inodecache();<br> return err;<br> }<br>-<br>
+#ifdef CONFIG_SQUASHFS_MPCORE<br>+/*M.S the size of different cache */<br>+/*fragment_buffer_size = msblk->block_size;<br>+data_buffer_size = msblk->block_size;<br>+metadata_buffer_size = SQUASHFS_METADATA_SIZE;<br>
+queue_buffer_size = data_buffer_size;<br>+pages_per_block = (msblk->block_size/(PAGE_CACHE_SIZE));*/<br>+/*<br>+* queue_buffer_size = fragment_buffer_size + data_buffer_size + metadata_buffer_size;<br>+* M.S :- As of now we don't need that much big size of queue<br>
+* 1. we are currently working on offsets equal to number of pages in the block size<br>+* so we will take the size of the queue equal to data_buffer_size only<br>+* 2. The metadata requests are same as previous no threading.<br>
+* 3. We reduced the queue size further to 64<br>+* As of now max queue request will not be more than 64.<br>+*/<br>+/* M.S Adding Threads here */<br>+initialise_threads(SQFS_QBUFFER_SIZE);<br>+<br>+#endif<br> printk(KERN_INFO "squashfs: version 4.0 (2009/01/31) "<br>
"Phillip Lougher\n");<br> <br>@@ -456,6 +504,11 @@ static int __init init_squashfs_fs(void)<br> <br> static void __exit exit_squashfs_fs(void)<br> {<br>+#ifdef CONFIG_SQUASHFS_MPCORE<br>+ printk(KERN_INFO"%s \n", __FUNCTION__);<br>
+ /*MS Adding the exiting code */<br>+ exit_threads();<br>+#endif<br> unregister_filesystem(&squashfs_fs_type);<br> destroy_inodecache();<br> }<br>diff --git a/fs/squashfs/tegra_mp.c b/fs/squashfs/tegra_mp.c<br>
new file mode 100644<br>index 0000000..1d7e03f<br>--- /dev/null<br>+++ b/fs/squashfs/tegra_mp.c<br>@@ -0,0 +1,368 @@<br>+/**<br>+* @file: tegra_mp.c<br>+* @brief: Multi Core support for squashFS<br>+* Copyright: Copyright(C) Samsung India Pvt. Ltd 2011. All Rights Reserved.<br>
+* @author: SISC: manish.s2<br>+* @date: 2011/03/10<br>+* @History:<br>+* v1.1a is stable & support dual core.<br>+* v1.2 added multi core support.<br>+* v1.8 Fix the bug for the queue fill ptr overrun<br>
+*/<br>+<br>+#include <linux/delay.h><br>+#include <linux/fs.h><br>+#include <linux/module.h><br>+#include <linux/kernel.h><br>+#include <linux/kthread.h><br>+#include <linux/cpumask.h><br>
+#include <linux/cpu.h><br>+#include <linux/slab.h><br>+<br>+#include "squashfs_fs_i.h"<br>+#include "squashfs.h"<br>+#include "tegra_mp.h"<br>+<br>+<br>+struct squashfs_queue *to_reader_1;<br>
+static struct task_struct **thread;<br>+<br>+extern int read_this_page(struct file *file, struct page *page);<br>+<br>+int queue_fini(struct squashfs_queue *queue)<br>+{<br>+<br>+ if (queue == NULL) {<br>+ printk(KERN_INFO "ERROR: Wrong queue ptr\n");<br>
+ return -EFAULT;<br>+ }<br>+<br>+<br>+ if (NULL != queue->data) {<br>+ kfree(queue->data);<br>+ queue->data = NULL;<br>+ }<br>+<br>+ if (NULL != queue->cpu) {<br>+ kfree(queue->cpu);<br>
+ queue = NULL;<br>+ }<br>+ if (NULL != queue) {<br>+ kfree(queue);<br>+ queue = NULL;<br>+ }<br>+<br>+ return 0;<br>+}<br>+<br>+struct squashfs_queue *queue_init(int size)<br>+{<br>+<br>
+ unsigned int i = 0;<br>
+ int processors;<br>+ struct squashfs_queue *queue = NULL;<br>+<br>+#ifdef DEBUG<br>+ printk(KERN_INFO "[%s] size %d \n", __FUNCTION__, size + 1);<br>+#endif<br>+<br>+ processors = num_online_cpus();<br>
+<br>+ queue = (struct squashfs_queue *)kmalloc(sizeof(struct squashfs_queue), GFP_KERNEL);<br>+ if (queue == NULL) {<br>+ printk(KERN_INFO "Out of memory in queue_init\n");<br>+ return NULL;<br>
+ }<br>+<br>+ queue->data = (struct squashfs_qdata *)kmalloc((sizeof(struct squashfs_qdata) * (size + 1)), GFP_KERNEL);<br>+ if (NULL == queue->data) {<br>+ printk(KERN_INFO "unable to get the memory \n");<br>
+ queue_fini(queue);<br>+ return NULL;<br>+ }<br>+<br>+ queue->cpu = kmalloc(processors * (sizeof(int)), GFP_KERNEL);<br>+ if (NULL == queue->cpu) {<br>+ printk(KERN_INFO "unable to get the memory for cpu \n");<br>
+ queue_fini(queue);<br>+ return NULL;<br>+ }<br>+<br>+<br>+ for_each_online_cpu(i) {<br>+ queue->cpu[i] = 0;<br>+ }<br>+<br>+ queue->size = size + 1;<br>+ queue->readp = queue->writep = 0;<br>
+ queue->empty = 1;<br>+ queue->full = 0;<br>+ queue->stop = 0;<br>+ init_waitqueue_head(&queue->wait_queue);<br>+ spin_lock_init(&queue->lock);<br>+<br>+ return queue;<br>+}<br>+<br>
+<br>+int queue_put(struct squashfs_queue *queue, void *filp, void *page)<br>+{<br>+ int processor_id = 0;<br>+ unsigned int i = 0;<br>+ spin_lock(&queue->lock);<br>+<br>+ processor_id = raw_smp_processor_id();<br>
+<br>+ if (((queue->writep + 1) % queue->size) == queue->readp) {<br>+#ifdef DEBUG<br>+ printk(KERN_INFO "[%d] Queue is full: page %lu \n", current->pid, ((struct page *)page)->index);<br>
+#endif<br>+ queue->full = 1;<br>+ spin_unlock(&queue->lock);<br>+<br>+ for_each_online_cpu(i) {<br>+ if (i != processor_id) {<br>+ queue->cpu[i] = 1;<br>+ }<br>
+<br>+ }<br>+ wake_up(&queue->wait_queue);<br>+ wait_event_timeout(queue->wait_queue, !queue->full, msecs_to_jiffies(100));<br>+ spin_lock(&queue->lock);<br>+ if (((queue->writep + 1) % queue->size) == queue->readp) {<br>
+#ifdef DEBUG<br>+ printk(KERN_EMERG "[%d] Queue is still full: page %lu \n", current->pid, ((struct page *)page)->index);<br>+ printk(KERN_EMERG "[%d] Check threads \n", current->pid);<br>
+#endif<br>+ spin_unlock(&queue->lock);<br>+ return -1;<br>+ }<br>+ processor_id = raw_smp_processor_id();<br>+ }<br>+<br>+ queue->data[queue->writep].filp = filp;<br>
+ queue->data[queue->writep].page = page;<br>+ queue->writep = ((queue->writep + 1) % queue->size);<br>+ queue->empty = 0;<br>+<br>+#ifdef DEBUG<br>+ printk(KERN_EMERG "[%d]queue put w%d:r%d page %lu \n", current->pid, queue->writep, queue->readp, ((struct page *)page)->index);<br>
+#endif<br>+<br>+ for_each_online_cpu(i) {<br>+ if (i != processor_id) {<br>+ /*printk(KERN_INFO"waking up %d processor \n",i);*/<br>+ queue->cpu[i] = 1;<br>+ }<br>+<br>
+ }<br>+ spin_unlock(&queue->lock);<br>+ wake_up(&queue->wait_queue);<br>+ return 0;<br>+}<br>+<br>+<br>+int queue_get(struct squashfs_queue *queue, int id, struct squashfs_qdata *data)<br>+{<br>
+ /*struct squashfs_qdata *data;*/<br>+ int processor_id = 0;<br>+#ifdef DEBUG<br>+ printk(KERN_INFO "queue get %d \n", raw_smp_processor_id());<br>+#endif<br>+ spin_lock(&queue->lock);<br>+ processor_id = raw_smp_processor_id();<br>
+<br>+ /* wait here if queue is empty */<br>+ if (queue->readp == queue->writep) {<br>+<br>+ if (1 == id) {<br>+ queue->empty = 1;<br>+ queue->full = 0;<br>
+ spin_unlock(&queue->lock);<br>+ wake_up(&queue->wait_queue);<br>+ return -1;<br>+ }<br>+<br>+#ifdef DEBUG<br>+ printk(KERN_EMERG "[%d] Need to wait here as queue is empty \n", current->pid);<br>
+#endif<br>+ queue->empty = 1;<br>+ queue->full = 0;<br>+ queue->cpu[processor_id] = 0;<br>+ wake_up(&queue->wait_queue);<br>+ spin_unlock(&queue->lock);<br>
+ wait_event_interruptible(queue->wait_queue, queue->cpu[processor_id]);<br>+<br>+ /* After the thread gets out from wait queue */<br>+ spin_lock(&queue->lock);<br>+ if (queue->stop || (queue->readp == queue->writep)) {<br>
+ queue->empty = 1;<br>+ queue->full = 0;<br>+ wake_up(&queue->wait_queue);<br>+ spin_unlock(&queue->lock);<br>+#ifdef DEBUG<br>+ printk(KERN_INFO " Thread%ld %s \n", current->cpus_allowed, (queue->stop ? "should stop" : "queue is empty"));<br>
+#endif<br>+ return -1;<br>+ }<br>+ }<br>+<br>+<br>+ data->filp = queue->data[queue->readp].filp;<br>+ data->page = queue->data[queue->readp].page;<br>+ queue->data[queue->readp].filp = NULL;<br>
+ queue->data[queue->readp].page = NULL;<br>+ queue->readp = (queue->readp + 1) % queue->size;<br>+ queue->full = 0;<br>+#ifdef DEBUG<br>+ printk(KERN_EMERG "[%d]queue get w%d:r%d page %lu \n", \<br>
+ current->pid, queue->writep, queue->readp, ((struct page *)data->page)->index);<br>+#endif<br>+ spin_unlock(&queue->lock);<br>+ wake_up(&queue->wait_queue);<br>+<br>+<br>+ return 0;<br>
+}<br>+<br>+<br>+<br>+void squashfs_thread(void *arg)<br>+{<br>+<br>+ struct squashfs_qdata data;<br>+ int ret = 0;<br>+<br>+ set_user_nice(current, -20);<br>+ printk(KERN_INFO "### Started squashfs thread_%d \n", raw_smp_processor_id());<br>
+ while (!kthread_should_stop()) {<br>+<br>+<br>+ ret = queue_get(to_reader_1, 0, &data);<br>+ if (unlikely(0 > ret)) {<br>+ if (to_reader_1->stop) {<br>+ printk(KERN_INFO"ERROR : We are seeing the stop being set\n");<br>
+ break;<br>+ } else {<br>+ continue;<br>+ }<br>+ } else {<br>+#ifdef DEBUG<br>+ /* Can remove this as its for error checking */<br>+ if ((NULL != data.filp) && (NULL != data.page)) {<br>
+ printk(KERN_INFO "here it is page index %ld \n", data.page->index);<br>+ read_this_page(data.filp, data.page);<br>+ } else {<br>+ printk(KERN_INFO"Ptr is NULL \n");<br>
+ }<br>+#else<br>+ read_this_page(data.filp, data.page);<br>+#endif<br>+<br>+<br>+ }<br>+<br>+ }<br>+ printk(KERN_INFO"SquashFS Thread : I am dying!\n");<br>+<br>+}<br>+<br>+void squashfs_process_data(void)<br>
+{<br>+<br>+ struct squashfs_qdata data;<br>+ int ret = 0;<br>+<br>+ while (1) {<br>+<br>+<br>+ ret = queue_get(to_reader_1, 1, &data);<br>+ if (unlikely(0 > ret)) {<br>+#ifdef DEBUG<br>+ printk(KERN_INFO "[%s][%d] Q is empty so we are exiting \n", __FUNCTION__, current->pid);<br>
+#endif<br>+ break;<br>+ } else {<br>+ read_this_page(data.filp, data.page);<br>+ }<br>+<br>+ }<br>+<br>+}<br>+<br>+void work_on_queue(struct squashfs_queue *queue)<br>+{<br>+ squashfs_process_data();<br>
+}<br>+<br>+int initialise_threads(int queue_buffer_size)<br>+{<br>+ unsigned int i = 0;<br>+ int processors;<br>+<br>+ processors = num_online_cpus();<br>+<br>+#ifdef DEBUG<br>+ printk(KERN_INFO "no of active cores %d \n", processors);<br>
+#endif<br>+<br>+ /* Initialize the Queue */<br>+ to_reader_1 = queue_init(queue_buffer_size);<br>+<br>+<br>+ if ((thread = kmalloc((NOTHR_THREADS + processors) * sizeof(struct task_struct *), GFP_KERNEL)) == NULL) {<br>
+ printk(KERN_INFO "Out of memory allocating thread descriptors\n");<br>+ return -ENOMEM;<br>+ }<br>+<br>+<br>+ /* Create Number n Number of Deflator threads same as core.*/<br>+ for_each_online_cpu(i) {<br>
+ printk(KERN_INFO "Created %d thread \n", i);<br>+ thread[NOTHR_THREADS + i] = kthread_create((void *)squashfs_thread, NULL, MODULE_NAME);<br>+ if (IS_ERR(thread[NOTHR_THREADS + i])) {<br>
+ printk(KERN_ERR ": unable to start deflator kernel thread\n");<br>
+ return -ENOMEM;<br>+ } else {<br>+ printk(KERN_INFO" ################## \n");<br>+ printk(KERN_INFO"Binding cpu %d \n", i);<br>+ kthread_bind(thread[NOTHR_THREADS + i], i);<br>
+ wake_up_process(thread[NOTHR_THREADS + i]);<br>+ }<br>+ }<br>+<br>+<br>+ return 0;<br>+<br>+}<br>+<br>+void exit_threads()<br>+{<br>+ int i = 0;<br>+<br>+ /* wake up both threads */<br>+ to_reader_1->empty = 0;<br>
+ to_reader_1->stop = 1;<br>+ for_each_online_cpu(i) {<br>+ to_reader_1->cpu[i] = 1;<br>+ }<br>+ wake_up_all(&to_reader_1->wait_queue);<br>+<br>+#if 0<br>+ for (i = NOTHR_THREADS; i < (NOTHR_THREADS + NR_CPUS); i++) {<br>
+<br>+ if (NULL != thread[i])<br>+ kthread_stop(thread[i]);<br>+<br>+ }<br>+ if (thread)<br>+ kfree(thread);<br>+<br>+#endif<br>+ /* We have only one queue as of now */<br>+ if (queue_fini(to_reader_1))<br>
+ printk(KERN_INFO"ERROR: In queue deallocation \n");<br>+<br>+<br>+}<br>+<br>diff --git a/fs/squashfs/tegra_mp.h b/fs/squashfs/tegra_mp.h<br>new file mode 100644<br>index 0000000..ca60c56<br>--- /dev/null<br>
+++ b/fs/squashfs/tegra_mp.h<br>@@ -0,0 +1,58 @@<br>+/**<br>+* @file tegra_mp.h<br>+* @brief Multi Core support for squashFS<br>+* Copyright: Copyright(C) Samsung India Pvt. Ltd 2011. All Rights Reserved.<br>+* @author SISC: manish.s2<br>
+* @date 2011/03/10<br>+* @desc Added Multi core support in squashfs<br>+*/<br>+#ifndef __MP_TEGRA__<br>+#define __MP_TEGRA__<br>+<br>+<br>+#include <linux/fs.h><br>+#include <linux/vfs.h><br>+#include <linux/wait.h><br>
+<br>+/* Total number of other threads except if needed */<br>+/*#define NOTHR_THREADS 3 // To be used if we additional threads or so.*/<br>+#define NOTHR_THREADS 0<br>+#define MODULE_NAME "tegra_mpcore"<br>
+<br>+/* Max page pool size 64 and min squashfs block size 4k */<br>+#define SQFS_QBUFFER_SIZE (64)<br>+<br>+/*#define DEBUG*/<br>+<br>+struct squashfs_qdata{<br>+ struct file *filp;<br>+ struct page *page;<br>+ int index;<br>
+};<br>+<br>+<br>+/* struct describing queues used to pass data between threads */<br>+struct squashfs_queue {<br>+ int size;<br>+ int readp;<br>+ int writep;<br>+ wait_queue_head_t wait_queue;<br>
+ spinlock_t lock;<br>+<br>+ int empty;<br>+ int full;<br>+ int *cpu;<br>+ int stop;<br>+ struct squashfs_qdata *data;<br>+};<br>+<br>+<br>+/* Functions */<br>+int initialise_threads(int queue_buffer_size);<br>
+void exit_threads(void);<br>+int queue_put(struct squashfs_queue *queue, void *filp, void *page);<br>+int queue_get(struct squashfs_queue *queue, int id, struct squashfs_qdata *data);<br>+struct squashfs_queue *queue_init(int size);<br>
+void work_on_queue(struct squashfs_queue *queue);<br>+<br>+#endif /*__MP_TEGRA__*/<br>diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c<br>index 55d918f..5e8b0a1 100644<br>--- a/fs/squashfs/zlib_wrapper.c<br>
+++ b/fs/squashfs/zlib_wrapper.c<br>@@ -19,7 +19,13 @@<br> * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.<br> *<br> * zlib_wrapper.c<br>- */<br>+ * manish.s2: added the dual core support for squashfs.<br>
+ * : Seperate mutex & z_stream for each core.<br>+ * : generalized for multicores.<br>+ * : Added seperate mutex and zlib stream for Multicore.<br>+ * : Replace zlib_init with zlib_reset for performance.<br>
+ *<br>+*/<br> <br> <br> #include <linux/mutex.h><br>@@ -33,6 +39,156 @@<br> #include "squashfs.h"<br> #include "decompressor.h"<br> <br>+#ifdef CONFIG_SQUASHFS_MPCORE<br>+static void *zlib_init(struct squashfs_sb_info *dummy, void *buff, int len)<br>
+{<br>+ unsigned int processors = num_online_cpus();<br>+ unsigned int i = 0;<br>+ int zlib_err = 0;<br>+<br>+ z_stream *stream = kmalloc((processors * sizeof(z_stream)), GFP_KERNEL);<br>+ if (stream == NULL)<br>
+ goto failed;<br>+<br>+ for_each_online_cpu(i) {<br>+ stream[i].workspace = kmalloc(zlib_inflate_workspacesize(),<br>+ GFP_KERNEL);<br>+ if (stream[i].workspace == NULL)<br>+ goto failed;<br>
+ zlib_err = zlib_inflateInit(&(stream[i]));<br>+ if (zlib_err != Z_OK) {<br>+ ERROR("zlib_inflateInit returned unexpected "<br>+ "result 0x%x\n",<br>+ zlib_err);<br>
+ goto failed;<br>+ }<br>+ }<br>+ return stream;<br>+<br>+failed:<br>+ ERROR("Failed to allocate zlib workspace\n");<br>+ i = 0;<br>+ for_each_online_cpu(i) {<br>+ if (stream[i].workspace)<br>
+ kfree(stream[i].workspace);<br>+ }<br>+ if (stream)<br>+ kfree(stream);<br>+ return NULL;<br>+}<br>+<br>+<br>+static void zlib_free(void *strm)<br>+{<br>+ z_stream *stream = strm;<br>+ unsigned int i = 0;<br>
+<br>+ for_each_online_cpu(i) {<br>+ if (stream[i].workspace)<br>+ kfree(stream[i].workspace);<br>+ }<br>+ if (stream)<br>+ kfree(stream);<br>+ strm = NULL;<br>+}<br>+<br>+<br>+static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer,<br>
+ struct buffer_head **bh, int b, int offset, int length, int srclength,<br>+ int pages)<br>+{<br>+ int zlib_err = 0;<br>+ int avail, bytes, k = 0, page = 0;<br>+ unsigned int pid = smp_processor_id();<br>
+ z_stream *stream = msblk->stream;<br>
+<br>+ mutex_lock(&msblk->read_data_mutex[pid]);<br>+ /*printk(KERN_INFO "[%s] pid %d \n",__FUNCTION__,pid);*/<br>+ /*<br>+ * We are resetting zlib stream here so that it avoids the<br>+ * overhead of zlib_init again and again for each<br>
+ * request.<br>+ */<br>+ zlib_err = zlib_inflateReset(&(stream[pid]));<br>+ if (zlib_err != Z_OK) {<br>+ ERROR("zlib_Reset returned %d \n", zlib_err);<br>+ printk(KERN_EMERG"zlib_Reset returned %d \n", zlib_err);<br>
+ goto release_mutex;<br>+ }<br>+<br>+ stream[pid].avail_out = 0;<br>+ stream[pid].avail_in = 0;<br>+<br>+ bytes = length;<br>+ do {<br>+ if (stream[pid].avail_in == 0 && k < b) {<br>
+ avail = min(bytes, msblk->devblksize - offset);<br>+ bytes -= avail;<br>+ wait_on_buffer(bh[k]);<br>+ if (!buffer_uptodate(bh[k]))<br>+ goto release_mutex;<br>
+<br>+ if (avail == 0) {<br>+ offset = 0;<br>+ put_bh(bh[k++]);<br>+ continue;<br>+ }<br>+<br>+ stream[pid].next_in = bh[k]->b_data + offset;<br>
+ stream[pid].avail_in = avail;<br>+ offset = 0;<br>+ }<br>+<br>+ if (stream[pid].avail_out == 0 && page < pages) {<br>+ stream[pid].next_out = buffer[page++];<br>
+ stream[pid].avail_out = PAGE_CACHE_SIZE;<br>+ }<br>+#if 0<br>+ if (!zlib_init) {<br>+ zlib_err = zlib_inflateInit(&(stream[pid]));<br>+ if (zlib_err != Z_OK) {<br>+ ERROR("zlib_inflateInit returned unexpected "<br>
+ "result 0x%x, srclength %d\n",<br>+ zlib_err, srclength);<br>+ goto release_mutex;<br>+ }<br>+ zlib_init = 1;<br>+ }<br>+#endif<br>
+<br>+ zlib_err = zlib_inflate(&(stream[pid]), Z_SYNC_FLUSH);<br>+<br>+ if (stream[pid].avail_in == 0 && k < b)<br>+ put_bh(bh[k++]);<br>+ } while (zlib_err == Z_OK);<br>+<br>+ if (zlib_err != Z_STREAM_END) {<br>
+ ERROR("zlib_inflate error, data probably corrupt %d \n", zlib_err);<br>+ printk(KERN_INFO"avail in %d avail out %d \n", stream[pid].avail_in, stream[pid].avail_out);<br>+ goto release_mutex;<br>
+ }<br>+#if 0<br>+ zlib_err = zlib_inflateEnd(&(stream[pid]));<br>+ if (zlib_err != Z_OK) {<br>+ ERROR("zlib_inflate error, data probably corrupt\n");<br>+ goto release_mutex;<br>+ }<br>
+#endif<br>+ length = stream[pid].total_out;<br>+ mutex_unlock(&msblk->read_data_mutex[pid]);<br>+ return length;<br>+<br>+release_mutex:<br>+ mutex_unlock(&msblk->read_data_mutex[pid]);<br>+<br>
+ for (; k < b; k++)<br>+ put_bh(bh[k]);<br>+<br>+ return -EIO;<br>+}<br>+<br>+#else /* MPCORE*/<br>+<br> static void *zlib_init(struct squashfs_sb_info *dummy, void *buff, int len)<br> {<br> z_stream *stream = kmalloc(sizeof(z_stream), GFP_KERNEL);<br>
@@ -137,7 +293,7 @@ release_mutex:<br> <br> return -EIO;<br> }<br>-<br>+#endif /* MPCORE*/<br> const struct squashfs_decompressor squashfs_zlib_comp_ops = {<br> .init = zlib_init,<br> .free = zlib_free,<span class="HOEnZb"><font color="#888888"><br>
-- <br>
1.7.9.5<br><br><br>
</font></span></div><br>