-->
  • Recent Articles

    Top 15 Hadoop HDFS Commands with Examples and Usage

    Top 15 Hadoop HDFS Commands with Examples and Usage

    Hadoop HDFS Commands


    Hadoop HDFS is a distributed file system that provides redundant storage space for files having huge sizes. HDFS stores each file as blocks, and distribute it across the Hadoop cluster. The default size of a block in HDFS is 128 MB (Hadoop 2. x) and 64 MB (Hadoop 1. x) which is much larger as compared to the Linux system where the block size is 4KB.

    Hadoop HDFS Commands

    With the help of the HDFS command, we can perform Hadoop HDFS file operations like creating files, directories, changing the file permissions, viewing the file contents, creating files or directories, copying file/directory from the local file system to HDFS or vice-versa, etc.

    1. Check Hadoop version
    hadoop version

    [root@hadoopserver ~]# hadoop version
    Hadoop 3.1.1.3.0.1.0-187
    Source code repository git@github.com:hortonworks/hadoop.git -r 2820e4d6fc7ec31ac42187083ed5933c823e9784
    Compiled by jenkins on 2018-09-19T10:19Z
    Compiled with protoc 2.5.0
    From source with checksum 889327faf5a6ca5fc06fcf97c13af29
    This command was run using /usr/hdp/3.0.1.0-187/hadoop/hadoop-common-3.1.1.3.0.1.0-187.jar
    [root@hadoopserver ~]#


    2. Create a New directory
    To create a "test"  directory in HDFS , we'll be using the "mkdir" command in conjunction with "hadoop fs".

    hadoop fs -mkdir /test

    [root@hadoopserver ~]# hadoop fs -mkdir /test
    [root@hadoopserver ~]#

    3. Using the "ls" command, we can check for the files & directories in HDFS.
    [root@hadoopserver ~]# hadoop fs -ls /
    Found 1 items
    drwxr-xr-x   - root   hdfs            0 2020-04-22 06:11 /test

    4. Create Nested directories in HDFS
    We will be creating nested directories under "/test" directory.

    hadoop fs -mkdir -p /test/<nested folder names>

    [root@hadoopserver ~]# hadoop fs -mkdir -p /test/dir1/nestdir2
    [root@hadoopserver ~]#


    5. List All Files and directories under a directory "/test"
    hadoop fs -ls -R /test

    [root@hadoopserver ~]# hadoop fs -ls /test
    Found 1 items
    drwxr-xr-x   - root hdfs          0 2020-04-22 06:17 /test/dir1
    [root@hadoopserver ~]#
    [root@hadoopserver ~]# hadoop fs -ls -R /test
    drwxr-xr-x   - root hdfs          0 2020-04-22 06:17 /test/dir1
    drwxr-xr-x   - root hdfs          0 2020-04-22 06:17 /test/dir1/nestdir2
    [root@hadoopserver ~]#

    6. Create a file
    We will be creating a file called "sita.txt" under "/test" directory.

    hadoop fs -touch <file name>

    [root@hadoopserver ~]# hadoop fs -touch /test/sita.txt
    7. Copy file in local file system to the Hadoop filesystem
    Copy file "anaconda-ks.cfg"  in local file system to Hadoop directory "/test".

    hadoop fs -put <source>  <dest>

    [root@hadoopserver ~]# hadoop fs -put anaconda-ks.cfg /test/
    [root@hadoopserver ~]# hadoop fs -ls /test
    Found 2 items
    -rw-r--r--   3 root hdfs       6921 2020-04-22 06:25 /test/anaconda-ks.cfg
    drwxr-xr-x   - root hdfs          0 2020-04-22 06:17 /test/dir1
    [root@hadoopserver ~]#

    8. Copy file from  Hadoop filesystem to local file system 
    We will copy "/test/ram.txt" to Local file system.

    hadoop fs -get <hadoop source>  <local destination>

    [root@hadoopserver ~]# hadoop fs -ls /test
    Found 3 items
    -rw-r--r--   3 root hdfs       6921 2020-04-22 06:25 /test/anaconda-ks.cfg
    drwxr-xr-x   - root hdfs          0 2020-04-22 06:17 /test/dir1
    -rw-r--r--   3 root hdfs          0 2020-04-22 06:27 /test/ram.txt

    [root@hadoopserver ~]# hadoop fs -get /test/ram.txt .
    [root@hadoopserver ~]# ls
    anaconda-ks.cfg  logs  original-ks.cfg  ram.txt
    [root@hadoopserver ~]#

    9. copyFromLocal

    Copy file "original-ks.cfg"  in local file system to hadoop directory "/test/dir1".

    hadoop fs -copyFromLocal <local source> <hdfs destination>

    [root@hadoopserver ~]# hadoop fs -copyFromLocal original-ks.cfg /test/dir1
    [root@hadoopserver ~]# hadoop fs -ls /test/dir1
    Found 2 items
    drwxr-xr-x   - root hdfs          0 2020-04-22 06:17 /test/dir1/nestdir2
    -rw-r--r--   3 root hdfs       6577 2020-04-22 17:22 /test/dir1/original-ks.cfg
    [root@hadoopserver ~]#
    HDFS COPY

    10. copyToLocal
    We will copy "/test/sita.txt" to Local file system.

    hadoop fs -copyToLocal <hdfs source> <local dstination>

    [root@hadoopserver ~]# ls
    anaconda-ks.cfg  logs  original-ks.cfg  ram.txt
    [root@hadoopserver ~]# hadoop fs -touch /test/sita.txt
    [root@hadoopserver ~]# hadoop fs -copyToLocal /test/sita.txt .
    [root@hadoopserver ~]# ls
    anaconda-ks.cfg  logs  original-ks.cfg  ram.txt  sita.txt
    [root@hadoopserver ~]#

    11. Move  a folder 
    We'll move a folder in " /test/dir1/" to " /test/"

    hadoop fs -mv <src> <dest>

    [root@hadoopserver ~]# hadoop fs -ls -R /test/
    -rw-r--r--   3 root hdfs       6921 2020-04-22 06:25 /test/anaconda-ks.cfg
    drwxr-xr-x   - root hdfs          0 2020-04-22 17:22 /test/dir1
    drwxr-xr-x   - root hdfs          0 2020-04-22 06:17 /test/dir1/nestdir2
    -rw-r--r--   3 root hdfs       6577 2020-04-22 17:22 /test/dir1/original-ks.cfg
    -rw-r--r--   3 root hdfs          0 2020-04-22 06:27 /test/ram.txt
    -rw-r--r--   3 root hdfs          0 2020-04-22 17:24 /test/sita.txt
    [root@hadoopserver ~]# hadoop fs -mv /test/dir1/nestdir2 /test/
    [root@hadoopserver ~]# hadoop fs -ls -R /test/
    -rw-r--r--   3 root hdfs       6921 2020-04-22 06:25 /test/anaconda-ks.cfg
    drwxr-xr-x   - root hdfs          0 2020-04-22 17:31 /test/dir1
    -rw-r--r--   3 root hdfs       6577 2020-04-22 17:22 /test/dir1/original-ks.cfg
    drwxr-xr-x   - root hdfs          0 2020-04-22 06:17 /test/nestdir2
    -rw-r--r--   3 root hdfs          0 2020-04-22 06:27 /test/ram.txt
    -rw-r--r--   3 root hdfs          0 2020-04-22 17:24 /test/sita.txt
    [root@hadoopserver ~]#

    12. Modify Permission
    We'll modify the permission of hadoop file " /test/sita.txt" to 775.

    hadoop fs -chmod 775 <file path>

    [root@hadoopserver ~]# hadoop fs -chmod 775 /test/sita.txt
    [root@hadoopserver ~]# hadoop fs -ls  /test/sita.txt
    -rwxrwxr-x   3 root hdfs          0 2020-04-22 17:24 /test/sita.txt
    [root@hadoopserver ~]#

    13. Change ownership of files
    We'll change the ownership of file "/test/sita.txt" to "rantripa:hdfs".

    User root is not a super user (non-super user cannot change owner) in hdfs. Hence it can not change the ownership. We need to switch to "hdfs" user to perform this action.

    hadoop fs -chown rantripa:hdfs  <file path>

    [root@hadoopserver ~]# hadoop fs -ls  /test/sita.txt
    -rwxrwxr-x   3 root hdfs          0 2020-04-22 17:24 /test/sita.txt
    [root@hadoopserver ~]# su - hdfs
    Last login: Fri Apr 17 05:28:40 UTC 2020
    [hdfs@hadoopserver ~]$ hadoop fs -chown rantripa:hdfs /test/sita.txt
    [hdfs@hadoopserver ~]$ hadoop fs -ls  /test/sita.txt
    -rwxrwxr-x   3 rantripa hdfs          0 2020-04-22 17:24 /test/sita.txt
    [hdfs@hadoopserver ~]$

    14. Remove directory
    We'll remove a directory "/test/nestdir2" from hdfs.

    hadoop fs -rmdir <directory path>

    [root@hadoopserver ~]# hadoop fs -rmdir  /test/nestdir2
    [root@hadoopserver ~]# hadoop fs -ls -R /test/
    -rw-r--r--   3 root     hdfs       6921 2020-04-22 06:25 /test/anaconda-ks.cfg
    drwxr-xr-x   - root     hdfs          0 2020-04-22 17:31 /test/dir1
    -rw-r--r--   3 root     hdfs       6577 2020-04-22 17:22 /test/dir1/original-ks.cfg
    -rw-r--r--   3 root     hdfs          0 2020-04-22 06:27 /test/ram.txt
    -rwxrwxr-x   3 rantripa hdfs          0 2020-04-22 17:24 /test/sita.txt
    [root@hadoopserver ~]#

    15. Delete non-empty directory
    We'll be deleting "/test" from hdfs, which is a non-empty directory.

    hadoop fs -rmr -f  <directory path>

    [root@hadoopserver ~]# hadoop fs -rmr -f  /test/
    rmr: DEPRECATED: Please use '-rm -r' instead.
    20/04/22 17:50:44 INFO fs.TrashPolicyDefault: Moved:
    [root@hadoopserver ~]#
    [root@hadoopserver ~]#
    [root@hadoopserver ~]#
    [root@hadoopserver ~]#
    [root@hadoopserver ~]# hadoop fs -ls -R /test/
    ls: `/test/': No such file or directory
    [root@hadoopserver ~]#

    No comments