2010/12/12

hadoop

#ubuntu-10.10-server-amd64.iso   
#jdk-6u23-linux-x64.bin
#hadoop-0.20.2.tar.gz



[ip]

sudo vim.tiny /etc/network/interfaces


# The loopback network interface
auto lo
iface lo inet loopback

# The primary network interface
auto eth0

iface eth0 inet static

address 192.168.31.101
netmask 255.255.255.0
network 192.168.31.0
broadcast 192.168.31.255
gateway 192.168.31.1

dns-nameserver 168.95.1.1

[update-ubuntu]

sudo apt-get update ; sudo apt-get dist-upgrade ;

[install SSH & rsync]

sudo apt-get install ssh rsync ;

[cp]

scp /tmp/jdk-6u23-linux-x64.bin hadoop@192.168.31.101:~/

scp /tmp/hadoop-0.20.2.tar.gz hadoop@192.168.31.101:~/

[install java]

sh jdk-6u23-linux-x64.bin

[install hadoop]

tar zxvf hadoop-0.20.2.tar.gz -C /home/hadoop/

mv /home/hadoop/hadoop-0.20.2/* /home/hadoop/

sudo chown –R hadoop:hadoop /home/hadoop/

[setup java environment]

vim.tiny /home/hadoop/conf/hadoop-env.sh
>>>
export JAVA_HOME=/home/hadoop/jdk1.6.0_23

[before-clone-1/4]

vim.tiny /home/hadoop/conf/core-site.xml
>>>
<property> 
   <name>fs.default.name</name>
    <value>hdfs://hadoop01:9000</value> 
  </property>

[before-clone-2/4]
vim.tiny /home/hadoop/conf/hdfs-site.xml
>>>
<property> 
   <name>dfs.replication</name>
    <value>3</value> 
  </property>

[before-clone-3/4]
vim.tiny /home/hadoop/conf/mapred-site.xml
>>>
<property> 
   <name>mapred.job.tracker</name>
    <value>hadoop02:9001</value> 
  </property>

[before-clone-4/4]
mkdir /home/hadoop/.ssh/

[make a copy]

virt-clone \
     --original fog \
     --name hadoop18 \
     --file /var/lib/xen/images/hadoop18.img

[@masters]
sudo vim.tiny /etc/hosts

192.168.31.101 192.168.31.101 hadoop01
192.168.31.102 hadoop02
192.168.31.103 hadoop03
192.168.31.104 hadoop04
192.168.31.105 hadoop05
192.168.31.106 hadoop06
192.168.31.107 hadoop07
192.168.31.108 hadoop08

[@slaves]
sudo vim.tiny /etc/hosts

192.168.31.101 hadoop01
192.168.31.102 hadoop02

192.168.31.103 192.168.31.103 hadoop03

192.168.31.104 192.168.31.104 hadoop04

192.168.31.105 192.168.31.105 hadoop05

192.168.31.106 192.168.31.106 hadoop06

192.168.31.107 192.168.31.107 hadoop07

192.168.31.108 192.168.31.108 hadoop08

[gen public&private key]

ssh-keygen -t dsa -P "" -f /home/hadoop/.ssh/id_dsa

[cat public key]

cp /home/hadoop/.ssh/id_dsa.pub /home/hadoop/.ssh/authorized_keys

[share public key]

scp /home/hadoop/.ssh/authorized_keys hadoop@192.168.31.102:/home/hadoop/.ssh/
scp /home/hadoop/.ssh/authorized_keys hadoop@192.168.31.103:/home/hadoop/.ssh/
scp /home/hadoop/.ssh/authorized_keys hadoop@192.168.31.104:/home/hadoop/.ssh/
scp /home/hadoop/.ssh/authorized_keys hadoop@192.168.31.105:/home/hadoop/.ssh/
scp /home/hadoop/.ssh/authorized_keys hadoop@192.168.31.106:/home/hadoop/.ssh/
scp /home/hadoop/.ssh/authorized_keys hadoop@192.168.31.107:/home/hadoop/.ssh/
scp /home/hadoop/.ssh/authorized_keys hadoop@192.168.31.108:/home/hadoop/.ssh/

[share pravite key]

scp /home/hadoop/.ssh/id_dsa hadoop@192.168.31.102:/home/hadoop/.ssh/

[@masters]

vim.tiny /home/hadoop/conf/masters

hadoop01
hadoop02

vim.tiny /home/hadoop/conf/slaves

hadoop03
hadoop04
hadoop05
hadoop06
hadoop07
hadoop08

[format]

/home/hadoop/bin/hadoop namenode -format

hadoop namenode -format

[start-start-dfs]

/home/hadoop/bin/start-dfs.sh

[start-start-mapred]

/home/hadoop/bin/start-mapred.sh

[copy file from host to guest]

scp ./pg5000.txt hadoop@192.168.31.101:~/pg5000-data

[copy file from guest to dfs]

hadoop dfs -copyFromLocal pg5000-data pg5000-data-in

[dfs-ls]

hadoop dfs -ls

[dfs-mkdir]

hadoop dfs –mkdir /tmp/input
hadoop dfs –mkdir /out

[dfs-del]

hadoop fs -rmr /folder_name or file_name 

[wordcount]

hadoop jar hadoop-0.20.2-examples.jar wordcount pg5000-data-in pg5000-data-ans

[get the file from dfs]

hadoop dfs -cat pg5000-data-ans/part-r-00000 >> pg5000-data-ans-out

[scp file from guest to host]

scp /home/hadoop/pg5000-data-ans-out hadoop@192.168.122.1:/home/hadoop/pg5000-data-ans-out



///

scp ./pg20417.txt hadoop_001@192.168.31.101:~/pg20417-data

./exec-hadoop dfs -copyFromLocal pg20417-data pg20417-data-in

./exec-hadoop dfs -ls

./exec-hadoop jar hadoop-0.20.2-examples.jar wordcount pg20417-data-in pg20417-data-ans

沒有留言:

張貼留言