Shailesh Gudimalla Oracle Apps DBA All Posting is my own workshop example,if you're using this then please check first in your test environment.
Wednesday, December 9, 2009
Killiing the Standard Manger when one or instances running on the same server
set linesize 120
set head off
set pages 0
select 'kill -9 '||' '|| substr(b.os_process_id,0,10) "OS Proc"
from fnd_concurrent_queues a
, fnd_concurrent_processes b
where a.concurrent_queue_id=b.concurrent_queue_id
and a.concurrent_queue_name='STANDARD'
and b.process_status_code='A'
order by b.process_status_code;
SELECT A.CONCURRENT_PROGRAM_NAME "Program Name",
SUBSTR(A.USER_CONCURRENT_PROGRAM_NAME,1,40) "User Program Name",
SUBSTR(B.USER_NAME,1,15) "Last Updated By",
SUBSTR(B.DESCRIPTION,1,25) DESCRIPTION
FROM APPS.FND_CONCURRENT_PROGRAMS_VL A, APPLSYS.FND_USER B
WHERE A.ENABLE_TRACE='Y'
AND A.LAST_UPDATED_BY=B.USER_ID;
Run this script to see if any Debug or Trace profile options have been set to Y, meaning that they are enabled. Some profile options may be required to be set to Y, but others should be N.
select distinct
a.application_short_name app_short,
user_profile_option_name optname,
decode(level_id,
10001,'SITE',
10002,'APP : '||a2.application_short_name,
10003,'RESP: '||r.responsibility_key,
10004,'USER: '||u.user_name,
'Unknown') d_level,
profile_option_value optval,
v.last_update_date updated
from fnd_profile_options_vl o,
fnd_profile_option_values v,
fnd_application a,
fnd_application a2,
fnd_responsibility r,
fnd_user u
where (
o.user_profile_option_name like '%Debug%' or
o.user_profile_option_name like '%DEBUG%' or
o.user_profile_option_name like '%Trace%' or
o.user_profile_option_name like '%TRACE%'
)
and a.application_id = v.application_id
and o.application_id = v.application_id
and o.profile_option_id = v.profile_option_id
-- Find the associate level for profile
and r.application_id (+) = v.level_value_application_id
and r.responsibility_id (+) = v.level_value
and a2.application_id (+) = v.level_value
and u.user_id (+) = v.level_value
and profile_option_value = 'Y'
order by 2,1,3,4;
http://oracle-apps-dba.blogspot.com/2007/07/apps-user-connection-details.html
And here is Modified version that shows and requests as well others session details.
Remark:
it use input parameter to reduce result on only named apps user
2) It should be run as apps user or use "set current_schema" part in front.
set show off
SET VER OFF
set head on;
set timing on;
undefine apps_user;
accept apps_user char prompt 'Input starting letters of APPS username (Enter for all...): ';
set linesize 300;
set pagesize 200;
col sid_serial for a13;
col user_name for A10;
col module for a22;
col Responsibility for a29;
col function for a30;
col F_Type for a10;
col ap_pid for a6;
col db_pid for a6;
break on USER_NAME on db_pid on ap_pid on sid_serial
select * from (
select
usr.user_name user_name
,v.spid db_pid
,ses.process ap_pid
,ses.sid||','||ses.serial# sid_serial
,ses.module
,rsp.responsibility_name Responsibility
,fuc.function_name Function
,i.function_type F_Type
,to_char(i.last_connect,'dd.mm hh24:mi') F_Start
from
apps.icx_sessions i
,apps.fnd_logins l
,apps.fnd_appl_sessions a
,apps.fnd_user usr
,apps.fnd_responsibility_tl rsp
,apps.fnd_form_functions fuc
,gv$process v
,gv$session ses
where i.disabled_flag = 'N'
and i.login_id = l.login_id
and l.end_time is null
and i.user_id = usr.user_id
and l.login_id = a.login_id
and a.audsid = ses.audsid
and l.pid = v.pid
and l.serial# = v.serial#
and i.responsibility_application_id = rsp.application_id(+)
and i.responsibility_id = rsp.responsibility_id(+)
and i.function_id = fuc.function_id(+)
and i.responsibility_id not in (select t1.responsibility_id
from apps.fnd_login_responsibilities t1
where t1.login_id = l.login_id
)
and usr.user_name like '&apps_user%'
union
select
usr.user_name
,v.spid
,ses.process
,ses.sid||','||ses.serial# sid_serial
,ses.module
,rsp.responsibility_name
,null
,null
,null form_start_time
from
apps.fnd_logins l
,apps.fnd_login_responsibilities r
,apps.fnd_user usr
,apps.fnd_responsibility_tl rsp
,gv$process v
,gv$session ses
where l.end_time is null
and l.user_id = usr.user_id
and l.pid = v.pid
and l.serial# = v.serial#
and v.addr = ses.paddr
and l.login_id = r.login_id(+)
and r.end_time is null
and r.responsibility_id = rsp.responsibility_id(+)
and r.resp_appl_id = rsp.application_id(+)
and r.audsid = ses.audsid
and usr.user_name like '&apps_user%'
union
select
usr.user_name
,v.spid
,ses.process
,ses.sid||','||ses.serial# sid_serial
,ses.module
,null
,frm.user_form_name
,ff.type
,to_char(f.start_time,'dd.mm hh24:mi')
from
apps.fnd_logins l
,apps.fnd_login_resp_forms f
,apps.fnd_user usr
,apps.fnd_form_tl frm
,apps.fnd_form_functions ff
,gv$process v
,gv$session ses
where l.end_time is null
and l.user_id = usr.user_id
and l.pid = v.pid
and l.serial# = v.serial#
and v.addr = ses.paddr
and l.login_id = f.login_id(+)
and f.end_time is null
and f.form_id = frm.form_id(+)
and f.form_appl_id = frm.application_id(+)
and f.audsid = ses.audsid
and ff.form_id = frm.form_id
and usr.user_name like '&apps_user%'
union
select
fu.user_name
,cr.oracle_process_id
,cr.OS_PROCESS_ID
,vs.SID || ',' || vs.serial# sid_serial
,(SELECT cp.concurrent_program_name
FROM APPS.fnd_concurrent_programs cp
WHERE cp.application_id = cr.program_application_id
AND cp.concurrent_program_id = cr.concurrent_program_id
)
,(select responsibility_name
from apps.fnd_responsibility_vl
where responsibility_id = cr.responsibility_id
and application_id = cr.responsibility_application_id
)
,(select execution_file_name
from apps.fnd_executables fe, apps.fnd_concurrent_programs fcp
where fcp.concurrent_program_id = cr.concurrent_program_id
and fcp.application_id = cr.program_application_id
and fe.executable_id = fcp.executable_id
and fe.application_id = fcp.executable_application_id
)
,'REQUEST'
,TO_CHAR(NVL(cr.actual_start_date, cr.requested_start_date), 'DD.MM HH24:MI') start_time
FROM
APPS.fnd_concurrent_requests cr,
v$process vp,
v$session vs,
apps.fnd_user fu
WHERE
cr.phase_code <> 'I'
AND (cr.phase_code < 'C' OR cr.phase_code > 'C')
/*
(cr.phase_code < 'I' OR cr.phase_code > 'I')
AND (cr.phase_code < 'C' OR cr.phase_code > 'C')
*/
AND cr.status_code NOT IN ('U', 'X', 'D', 'E', 'I', 'C')
AND cr.oracle_process_id = vp.spid (+)
AND cr.oracle_session_id = vs.audsid (+)
AND fu.user_id = cr.requested_by
AND vs.sid is not null
and fu.user_name like '&apps_user%'
)
order by user_name, db_pid, ap_pid, sid_serial
;
Saturday, October 17, 2009
Using Big IP hardware load balancers with 11i and R12
All our Production instances are load balanced on web tier. We have multiple app tiers for each instance on which Apache and forms is running. The user point of entry is the load balancer URL which further redirects the traffic to the individual app servers. Oracle uses BigIP load balancer on their internal Global Single Instance also. Here are some very good metalink notes which give you a good overview:380489.1 Using Load-Balancers with Oracle E-Business Suite Release 12 (Relevant for 11i too)
217368.1 Advanced Configurations and Topologies for Enterprise Deployments of E-Biz Suite 11i
601694.1 How To Check Session Persistence On a BigIP F5 Load Balancer
456906.1 11i/R12 How to Debug "Transaction Context Is Lost"
387306.1 Random error Your login session has expired when using Load Balancing
When a hardware load balancer is used, the context file variables which need to be changed are:
s_webentryurlprotocol http or https
s_webentryhost load balancer hostname
s_webentrydomain load balancer domain name
s_active_webport load balancer port
s_login_page load balancer url
s_external_url load balancer url
Load balancer settings:
persistence timeout = 1 day
persistence type = cookie based persistence
Sunday, October 4, 2009
FND_STATS vs DBMS_STATS
FND_STATS vs DBMS_STATS
I have been asked this questions many times. Shoud we use fnd_stats or should we use dbms_stats in 11i (or release 12). Most of the Apps DBAs you will face will say use fnd_stats. In fact Oracle also recommends to use fnd_stats in 11i E-Business Environments. But do you know what are the basic difference or you can say advantages or using FND_STATS over DBMS_STATS.
Here I have tried to compare the two…
Tuesday, September 15, 2009
Managing CRS/ Commands
$ORA_CRS_HOME/crs/log Contains trace files for the CRS resources.
$ORA_CRS_HOME/crs/init Contains trace files of the CRS daemon during startup. Good place to start with any CRS login problems.
$ORA_CRS_HOME/css/log The Cluster Synchronization (CSS) logs indicate all actions such as reconfigurations, missed check-ins, connects, and disconnects from the client CSS listener. In some cases, the logger logs messages with the category of auth.crit for the reboots done by Oracle. This could be used for checking the exact time when the reboot occurred.
$ORA_CRS_HOME/css/init Contains core dumps from the Oracle Cluster Synchronization Service daemon (OCSSd) and the process ID (PID) for the CSS daemon whose death is treated as fatal. If abnormal restarts for CSS exist, the core files will have the format of core..
$ORA_CRS_HOME/evm/log Log files for the Event Volume Manager (EVM) and evmlogger daemons. Not used as often for debugging as the CRS and CSS directories.
$ORA_CRS_HOME/evm/init PID and lock files for EVM. Core files for EVM should also be written here.
$ORA_CRS_HOME/srvm/log Log files for Oracle Cluster Registry (OCR), which contains the details at the Oracle cluster level.
$ORA_CRS_HOME//log Log files for Oracle Clusterware (known as the cluster alert log), which contains diagnostic messages at the Oracle cluster level. This is available from Oracle database 10g R2.
CRS DAEMON FUNCTIONALITY
------------------------
Here is a short description of each of the CRS daemon processes:
CRSD:
- Engine for HA operation
- Manages 'application resources'
- Starts, stops, and fails 'application resources' over
- Spawns separate 'actions' to start/stop/check application resources
- Maintains configuration profiles in the OCR (Oracle Configuration Repository)
- Stores current known state in the OCR.
- Runs as root
- Is restarted automatically on failure
OCSSD:
- OCSSD is part of RAC and Single Instance with ASM
- Provides access to node membership
- Provides group services
- Provides basic cluster locking
- Integrates with existing vendor clusteware, when present
- Can also runs without integration to vendor clustware
- Runs as Oracle.
- Failure exit causes machine reboot.
--- This is a feature to prevent data corruption in event of a split brain.
EVMD:
- Generates events when things happen
- Spawns a permanent child evmlogger
- Evmlogger, on demand, spawns children
- Scans callout directory and invokes callouts.
- Runs as Oracle.
- Restarted automatically on failure
CRS RESOURCE STATUS
Status of the database, all instances and all services.
srvctl status database -d ORACLE -v
Status of named instances with their current services.
srvctl status instance -d ORACLE -i RAC01, RAC02 -v
Status of a named services.
srvctl status service -d ORACLE -s ERP -v
Status of all nodes supporting database applications.
srvctl status node
START CRS RESOURCES
Start the database with all enabled instances.
srvctl start database -d ORACLE
Start named instances.
srvctl start instance -d ORACLE -i RAC03, RAC04
Start named services. Dependent instances are started as needed.
srvctl start service -d ORACLE -s CRM
Start a service at the named instance.
srvctl start service -d ORACLE -s CRM -i RAC04
Start node applications.
srvctl start nodeapps -n myclust-4
STOP CRS RESOURCES
Stop the database, all instances and all services.
srvctl stop database -d ORACLE
Stop named instances, first relocating all existing services.
srvctl stop instance -d ORACLE -i RAC03,RAC04
Stop the service.
srvctl stop service -d ORACLE -s CRM
Stop the service at the named instances.
srvctl stop service -d ORACLE -s CRM -i RAC04
Stop node applications. Note that instances and services also stop.
srvctl stop nodeapps -n myclust-4
ADD CRS RESOURCES
Add a new node:
srvctl add nodeapps -n myclust-1 -o $ORACLE_HOME –A
139.184.201.1/255.255.255.0/hme0
Add a new database.
srvctl add database -d ORACLE -o $ORACLE_HOME
Add named instances to an existing database.
srvctl add instance -d ORACLE -i RAC01 -n myclust-1
srvctl add instance -d ORACLE -i RAC02 -n myclust-2
srvctl add instance -d ORACLE -i RAC03 -n myclust-3
Add a service to an existing database with preferred instances (-r) and
available instances (-a). Use basic failover to the available instances.
srvctl add service -d ORACLE -s STD_BATCH -r RAC01,RAC02 -a RAC03,RAC04
Add a service to an existing database with preferred instances in list one and
available instances in list two. Use preconnect at the available instances.
srvctl add service -d ORACLE -s STD_BATCH -r RAC01,RAC02 -a RAC03,RAC04 -P PRECONNECT
REMOVE CRS RESOURCES
Remove the applications for a database.
srvctl remove database -d ORACLE
Remove the applications for named instances of an existing database.
srvctl remove instance -d ORACLE -i RAC03
srvctl remove instance -d ORACLE -i RAC04
Remove the service.
srvctl remove service -d ORACLE -s STD_BATCH
Remove the service from the instances.
srvctl remove service -d ORACLE -s STD_BATCH -i RAC03,RAC04
Remove all node applications from a node.
srvctl remove nodeapps -n myclust-4
MODIFY CRS RESOURCES
Modify an instance to execute on another node.
srvctl modify instance -d ORACLE -n myclust-4
Modify a service to execute on another node.
srvctl modify service -d ORACLE -s HOT_BATCH -i RAC01 -t RAC02
Modify an instance to be a preferred instance for a service.
srvctl modify service -d ORACLE -s HOT_BATCH -i RAC02 –r
RELOCATE SERVICES
Relocate a service from one instance to another
srvctl relocate service -d ORACLE -s CRM -i RAC04 -t RAC01
ENABLE CRS RESOURCES
(The resource may be up or down to use this function)
Enable the database.
srvctl enable database -d ORACLE
Enable the named instances.
srvctl enable instance -d ORACLE -i RAC01, RAC02
Enable the service.
srvctl enable service -d ORACLE -s ERP,CRM
Enable the service at the named instance.
srvctl enable service -d ORACLE -s CRM -i RAC03
DISABLE CRS RESOURCES (
The resource must be down to use this function)
Disable the database globally.
srvctl disable database -d ORACLE
Disable the named instances.
srvctl disable instance -d ORACLE -i RAC01, RAC02
Disable the service globally.
srvctl disable service -d ORACLE -s ERP,CRM
Disable the service at the named instance.
srvctl disable service -d ORACLE -s CRM -i RAC03,RAC04
Monday, September 14, 2009
1 Objectives
The objectives of this document are to:
- Record the setup and configuration of the 10g Oracle Standalone environment.
· Supplement the handover of the environment to ICM personnel.
1.1 Scope
This document provides an overview of Oracle Clusterware and Oracle Real Application Clusters (RAC) installation and configuration procedures.
2 System Configuration
2.1 Machine Configuration
The table below details the specifications for Server.
Node Name | ra01.-asp.com |
Purpose | RAC Node 1 |
Ip Address | 10.13.100.11 |
Manufacturer | HP |
Model | DL585 G2 |
Operating System | Linux Red Hat Advanced Server |
OS Version | 4.0 |
Update Version | Nahant Update 5 |
OS Patches | See “Check installed packages” section |
Memory | 16 GB |
Swap | 16 GB |
No Of Processors | 2 Dual-Core AMD Opteron(tm) Processor 8218 cache size 1024 KB |
No of Oracle Instances | 1 x Database |
Adapter | Target | Lun | SD# | Mpath | Size | Usage |
1 | 1 | 3 | sdj | mpath2 | 1GB | VOTE3 |
1 | 0 | 3 | sdc | mpath2 | 1GB | VOTE3 |
1 | 1 | 2 | sdi | mpath1 | 1GB | VOTE2 |
1 | 0 | 2 | sdb | mpath1 | 1GB | VOTE2 |
1 | 1 | 1 | sdh | mpath0 | 1GB | VOTE1 |
1 | 0 | 1 | sda | mpath0 | 1GB | VOTE1 |
1 | 1 | 7 | sdn | mpath6 | 400GB | ASM1 |
1 | 0 | 7 | sdg | mpath6 | 400GB | ASM1 |
1 | 1 | 6 | sdm | mpath5 | 175GB | ASM2 |
1 | 0 | 6 | sdf | mpath5 | 175GB | ASM2 |
1 | 1 | 5 | sdl | mpath4 | 5GB | OCR1 |
1 | 0 | 5 | sde | mpath4 | 5GB | OCR1 |
1 | 1 | 4 | sdk | mpath3 | 5GB | OCR2 |
1 | 0 | 4 | sdd | mpath3 | 5GB | OC2 |
The following table details the disk configuration information used to partition the internal disks during the installation process:
File System | Storage | Size | Mount Point |
ext3 | /dev/mapper/VolGroup00-LogVol00 | 20 GB | / |
ext3 | /dev/cciss/c0d0p1 | 100 M | /boot |
ext3 | /dev/mapper/VolGroup00-LogVol02 | 2 GB | /home |
ext3 | /dev/mapper/VolGroup00-LogVol05 | 4 GB | /tmp |
ext3 | /dev/mapper/VolGroup00-LogVol03 | 1 GB | /opt |
ext3 | /dev/mapper/VolGroup00-LogVol04 | 2 GB | /var |
ext3 | /dev/mapper/VolGroup00-LogVol06 | 22 GB | /u01 |
2.2 External/Shared Storage
File System | Size | Mount Point |
ASM | 187.9 GB | +DATA |
ASM | 429.4 GB | +BACKUP |
File System | Storage | Size | Mount Point |
OCFS2 | /dev/mapper/mpath0 | 1 GB | /u02/vote1 |
OCFS2 | /dev/mapper/mpath1 | 1 GB | /u02/vote2 |
OCFS2 | /dev/mapper/mpath2 | 1 GB | /u02/vote3 |
OCFS2 | /dev/mapper/mpath4 | 5 GB | /u02/ocr1 |
OCFS2 | /dev/mapper/mpath3 | 5 GB | /u02/ocr2 |
2.3 Kernel Parameters
Kernel parameters were reconfigured to support the Oracle environment. The table below details all Kernel parameter changes.
Parameter Name | rac01 | rac01 |
SEMMNI | 4096 | 4096 |
SHMMAX | 4294967295 | 4294967295 |
SHMMNI | 4096 | 4096 |
SHMALL | 2097152 | 2097152 |
IP_LOCAL_PORT_RANGE (START) | 1024 | 1024 |
IP_LOCAL_PORT_RANGE (RANGE) | 65000 | 65000 |
RMEM_DEFAULT | 262144 | 262144 |
RMEM_MAX | 262144 | 262144 |
WMEM_DEFAULT | 262144 | 262144 |
WMEM_MAX | 262144 | 262144 |
User Process Limit | 16384 | 16384 |
User Max Files | 65536 | 65536 |
3 Oracle Software Configuration
3.1 Directory Structure
The following folders and ASM disk groups were created during the installation process for rac01 and rac02.
Description | Location |
Oracle Base Directory | /u01/app/oracle |
Oracle Inventory Directory | /u01/appl/oracle/oraInventory |
CRS (ORACLE_HOME) | /u01/crs/oracle/product/10/crs |
ASM (ORACLE_HOME) | /u01/app/oracle/product/10.2.0/asm |
DB (ORACLE_HOME) | /u01/app/oracle/product/10.2.0/db_1 |
Datafiles | +DATA (ASM disk group) |
Recovery Area | +BACKUP (ASM disk group) |
3.2 Database Layout
The following table details the datafile configuration:
3.3 Redo Logs
The following tables detail the redo log configuration
RedoLogs | Directory | Datafile |
1 | +DATA/prod/onlinelog/ | group_1.261.647361523 |
1 | +BACKUP/prod/onlinelog/ | group_1.257.647361525 |
2 | +DATA/prod/onlinelog/ | group_2.262.647361525 |
2 | +BACKUP/prod/onlinelog/ | group_2.258.647361525 |
3 | +DATA/prod/onlinelog/ | group_3.266.647361665 |
3 | +BACKUP/prod/onlinelog/ | group_3.259.647361665 |
4 | +DATA/prod/onlinelog/ | group_4.267.647361667 |
4 | +BACKUP/prod/onlinelog/ | group_4.260.647361667 |
3.4 Controlfiles
The following tables detail the controlfile configurations.
Controlfiles | Location |
current.260.647361521 | +DATA/prod/controlfile/ |
current.256.647361521 | +BACKUP/prod/controlfile/ |
4 Oracle Pre-Installation tasks
This section details the tasks and checks carried out prior to the installation of Oracle 10g RAC.
4.1 Installing Redhat
Please ensure following RED HAT packages are installed before going ahead with an Oracle Install.
- Development Tools
- Compatibility Arch Development Support
- Legacy Development Support
4.2 Network Configuration
Please configure /etc/hosts as below at rac01/rac02:
Ensure that the node names are not included for the loopback address in the /etc/hosts file.
RAC01:
# Do not remove the following line, or various programs
# that require network functionality will fail.
127.0.0.1 localhost.localdomain localhost
# Public
10.13.100.11 rac01.-asp.com rac01
10.13.100.12 rac02.-asp.com rac02
#Private
172.16.2.1 orapriv01.-asp.com orapriv01
172.16.2.2 orapriv02.-asp.com orapriv02
#Virtual
10.13.100.13 oravip01.-asp.com oravip01
10.13.100.14 oravip02.-asp.com oravip02
RAC02:
127.0.0.1 localhost.localdomain localhost
# Public
10.13.100.11 rac01.-asp.com rac01
10.13.100.12 rac02.-asp.com rac02
#Private
172.16.2.1 orapriv01.-asp.com orapriv01
172.16.2.2 orapriv02.-asp.com orapriv02
#Virtual
10.13.100.13 oravip01.-asp.com oravip01
10.13.100.14 oravip02.-asp.com oravip02
4.3 Copy Oracle 10.2.0.1 software onto server
Connect to http://edelivery.oracle.com and download Oracle Database for Platform “Linux Intel (64-bit)
Click “Oracle® Database 10g Release 2 (10.2.0.1.0) Media Pack (with Oracle® Enterprise Manager 10g Release 2 Grid Control (10.2.0.1.0) for Linux x86 and Oracle® Warehouse Builder 10g Release 2 (10.2.0.1)) for Linux x86-64”
Download part 1 of 5 to part 5 of 5
Upload zip files onto server to /u01/10gr2, and run the following commands:
cd /u01/10gr2
mkdir 10gr2
cd 10gr2
for i in `ls ../*5.zip`
do
unzip $i
done
4.4 Check installed packages
To interpret the package name, e.g. ocfs2-2.6.9-22.0.1.ELsmp-1.2.1-1.i686.rpm
The package name is comprised of multiple parts separated by '-'.
· ocfs2 - Package name
· 2.6.9-22.0.1.ELsmp - Kernel version and flavor
· 1.2.1 - Package version
· 1 - Package subversion
· i686 - Architecture
4.4.1 Adding packages
Adding packages can be completed by either install using Add and remove applications from gnome VNC session or install directly from CD’s. Dependency issues may require packages to be removed and reinstalled.
4.4.2 Oracle required packages
Oracle x86_64 requires these packages and versions at a minimum. This list is based upon a "default-RPMs" installation of RHEL AS/ES 4. The x86_64 packages are on the Red Hat Enterprise Linux 4 x86-64 distribution. Run the following rpm command list packages installed and to distinguish between a 32-bit or 64-bit package
rpm -q --qf '%{NAME}-%{VERSION}-%{RELEASE} (%{ARCH})\n' \
binutils compat-db control-center gcc gcc-c++ glibc glibc-common \
gnome-libs libstdc++ libstdc++-devel make pdksh sysstat \
xscreensaver compat-libstdc++-33 glibc-kernheaders glibc-headers libaio \
libgcc glibc-devel ORBit xorg-x11-deprecated-libs | sort
Required packages | Installed on rac01 | Installed on rac02 |
binutils-2.15.92.0.2-18.x86_64 | binutils-2.15.92.0.2-22 (x86_64) | binutils-2.15.92.0.2-22 (x86_64) |
compat-db-4.1.25-9.i386 | compat-db-4.1.25-9 (i386) | compat-db-4.1.25-9 (i386) |
compat-db-4.1.25-9.x86_64 | compat-db-4.1.25-9 (x86_64) | compat-db-4.1.25-9 (x86_64) |
compat-libstdc++-33-3.2.3-47.3.i386 | compat-libstdc++-33-3.2.3-47.3 (i386) | compat-libstdc++-33-3.2.3-47.3 (i386) |
compat-libstdc++-33-3.2.3-47.3.x86_64 | compat-libstdc++-33-3.2.3-47.3 (x86_64) | compat-libstdc++-33-3.2.3-47.3 (x86_64) |
control-center-2.8.0-12.x86_64 | control-center-2.8.0-12.rhel4.5 (x86_64) | control-center-2.8.0-12.rhel4.5 (x86_64) |
gcc-3.4.3-22.1.x86_64 | gcc-3.4.6-8 (x86_64) | gcc-3.4.6-8 (x86_64) |
gcc-c++-3.4.3-22.1.x86_64 | gcc-c++-3.4.6-8 (x86_64) | gcc-c++-3.4.6-8 (x86_64) |
glibc-2.3.4-2.9.i686 | glibc-2.3.4-2.36 (i686) | glibc-2.3.4-2.36 (i686) |
glibc-2.3.4-2.9.x86_64 | glibc-2.3.4-2.36 (x86_64) | glibc-2.3.4-2.36 (x86_64) |
glibc-common-2.3.4-2.9.x86_64 | glibc-common-2.3.4-2.36 (x86_64) | glibc-common-2.3.4-2.36 (x86_64) |
glibc-devel-2.3.4-2.9.i386 | glibc-common-2.3.4-2 (i686) | glibc-common-2.3.4-2 (i686) |
glibc-devel-2.3.4-2.9.x86_64 | glibc-devel-2.3.4-2.36 (x86_64) | glibc-devel-2.3.4-2.36 (x86_64) |
glibc-headers-2.3.4-2.9.x86_64 | glibc-headers-2.3.4-2.36 (x86_64) | glibc-headers-2.3.4-2.36 (x86_64) |
glibc-kernheaders-2.4-9.1.87.x86_64 | glibc-kernheaders-2.4-9.1.100.EL (x86_64) | glibc-kernheaders-2.4-9.1.100.EL (x86_64) |
gnome-libs-1.4.1.2.90-44.1.x86_64 | gnome-libs-1.4.1.2.90-44.1 (x86_64) | gnome-libs-1.4.1.2.90-44.1 (x86_64) |
libaio-0.3.103-3.i386 | libaio-0.3.105-2 (i386) | libaio-0.3.105-2 (i386) |
libaio-0.3.103-3.x86_64 | libaio-0.3.105-2 (x86_64) | libaio-0.3.105-2 (x86_64) |
libgcc-3.4.3-22.1.i386 | libgcc-3.4.6-8 (i386) | libgcc-3.4.6-8 (i386) |
libgcc-3.4.6-8 (x86_64) | libgcc-3.4.6-8 (x86_64) | |
libstdc++-3.4.6-8 (i386) | libstdc++-3.4.6-8 (i386) | |
libstdc++-3.4.3-22.1.x86_64 | libstdc++-3.4.6-8 (x86_64) | libstdc++-3.4.6-8 (x86_64) |
libstdc++-devel-3.4.3-22.1.x86_64 | libstdc++-devel-3.4.5-2 (x86_64) | libstdc++-devel-3.4.5-2 (x86_64) |
make-3.80-5.x86_64 | make-3.80-5 (x86_64) | make-3.80-5 (x86_64) |
ORBit-0.5.17-14.i386 | ORBit-0.5.17-14 (x86_64) | ORBit-0.5.17-14 (x86_64) |
pdksh-5.2.14-30.x86_64 | pdksh-5.2.14-30.3 (x86_64) | pdksh-5.2.14-30.3 (x86_64) |
sysstat-5.0.5-1.x86_64 | sysstat-5.0.5-7.rhel4 (x86_64) | sysstat-5.0.5-7.rhel4 (x86_64) |
xorg-x11-deprecated-libs-6.8.2-1.EL.13.6.i386 | xorg-x11-deprecated-libs-6.8.2-1.EL.13.25.1 (i386) | xorg-x11-deprecated-libs-6.8.2-1.EL.13.25.1 (i386) |
xorg-x11-deprecated-libs-6.8.2-1.EL.13.25.1 (x86_64) | xorg-x11-deprecated-libs-6.8.2-1.EL.13.25.1 (x86_64) | |
xscreensaver-4.18-5.rhel4.2.x86_64 | xscreensaver-4.18-5.rhel4.10 (x86_64) | xscreensaver-4.18-5.rhel4.10 (x86_64) |
4.5 validate script
Oracle provides validate scripts for most platforms and Oracle releases.
Doc ID: Note:342555.1
Pre-Install checks for 10gR2 RDBMS (10.2.x) - Linux AMD64/EM64T Platforms is Oracle Note 342555.1.
To run the rules:
- Save the file as "validate.tar".
- Untar the files to a local directory. i.e tar xvf validate.tar
- Set your environment to the instance to be validated.
- Execute perl validate.pl filename.txt from the command line, as in the following examples:
[oracle@rac01 software]$ perl validate.pl 10gr2_rdbms_linuxamd64_hcve_043006.txt
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Health Check/Validation (V 01.07.00)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
"Validation Rule Engine" will be run in following environment:
HOSTNAME : rac01.-asp.com
USERNAME : oracle
ORACLE_SID : LIVE1
ORACLE_HOME : /u01/app/oracle/product/10.2.0/db_1
If this is not correct environment
Please set correct env parameters and rerun the program
Would you like to continue [Y]es/[N]o (Hit return for [Y]es) : Y
Executing Rules
~~~~~~~~~~~~~~~
Executing Rule: OS certified? - completed successfully.
Executing Rule: User in /etc/passwd? - completed successfully.
Executing Rule: Group in /etc/group? - completed successfully.
Executing Rule: Input ORACLE_HOME - user INPUT Required.
Enter value for <>
(Hit return for [$ORACLE_HOME]) :
- completed successfully.
Executing Rule: ORACLE_HOME valid? - completed successfully.
Executing Rule: O_H perms OK? - completed successfully.
Executing Rule: Umask set to 022? - completed successfully.
Executing Rule: LDLIBRARYPATH unset? - completed successfully.
Executing Rule: JAVA_HOME unset? - completed successfully.
Executing Rule: Other O_Hs in PATH? - completed successfully.
Executing Rule: oraInventory perms - completed successfully.
Executing Rule: /tmp adequate? - completed successfully.
Executing Rule: Swap (in Mb) - completed successfully.
Executing Rule: RAM (in Mb) - completed successfully.
Executing Rule: Swap OK? - completed successfully.
Executing Rule: Disk Space OK? - completed successfully.
Executing Rule: Kernel params OK? - completed successfully.
Executing Rule: Got ld,nm,ar,make? - completed successfully.
Executing Rule: ulimits OK? - completed successfully.
Executing Rule: RHEL3 rpms(Pt1) ok? - completed successfully.
Executing Rule: RHEL3 rpms(Pt2) ok? - completed successfully.
Executing Rule: RHEL4 Pt 1 rpms ok?
- completed successfully.
Executing Rule: RHEL4 Pt 2 rpms ok? - completed successfully.
Executing Rule: SuSE SLES9 rpms ok? - completed successfully.
Executing Rule: ip_local_port_range - completed successfully.
Executing Rule: Tainted Kernel? - completed successfully.
Executing Rule: other OUI up? - completed successfully.
Test "10gr2_rdbms_linuxamd64_hcve_043006" executed at Wed Feb 13 10:51:06 2008
Test Results
~~~~~~~~~~~~
ID NAME RESULT C VALUE
===== ==================== ====== = ========================================
10 OS certified? PASSED = Certified with 10gR2 RDBMS
20 User in /etc/passwd? PASSED = userOK
30 Group in /etc/group? PASSED = GroupOK
40 Input ORACLE_HOME RECORD $ORACLE_HOME
50 ORACLE_HOME valid? PASSED = OHexists
60 O_H perms OK? PASSED = CorrectPerms
70 Umask set to 022? PASSED = UmaskOK
80 LDLIBRARYPATH unset? PASSED = UnSet
90 JAVA_HOME unset? PASSED = UnSet
100 Other O_Hs in PATH? PASSED = NoneFound
110 oraInventory perms PASSED = oraInventoryNotFound
120 /tmp adequate? PASSED = TempSpaceOK
130 Swap (in Mb) PASSED > 16383
140 RAM (in Mb) PASSED > 16011
150 Swap OK? PASSED = SwapToRAMOK
160 Disk Space OK? PASSED = DiskSpaceOK
170 Kernel params OK? FAILED = SHMMAXTooSmall
180 Got ld,nm,ar,make? PASSED = ld_nm_ar_make_found
190 ulimits OK? FAILED = StackTooSmall NoFilesTooSmall Maxupro..>
204 RHEL3 rpms(Pt1) ok? PASSED = NotRHEL3
205 RHEL3 rpms(Pt2) ok? PASSED = NotRHEL3
206 RHEL4 Pt 1 rpms ok? FAILED = binutils-2.15.92.0.2-22 RHEL4rpmsPart..>
207 RHEL4 Pt 2 rpms ok? FAILED = libaio notInstalled glibc-devel (32bi..>
208 SuSE SLES9 rpms ok? PASSED = NotSuSE
209 ip_local_port_range PASSED = ip_local_port_rangeOK
210 Tainted Kernel? PASSED = NotVerifiable
220 other OUI up? PASSED = NoOtherOUI
Please see the log file for detailed results and recommendations
Log FileName: 10gr2_rdbms_linuxamd64_hcve_043006_run_4355/validate_result_10gr2_rdbms_linuxamd64_hcve_043006.log
Download ASMLib rpm files from :
http://www.oracle.com/technology/software/tech/linux/asmlib/rhel4.html
Search for “Drivers for kernel 2.6.9-55.EL”
The Library and Tools
· oracleasm-support-2.0.3-1.x86_64.rpm
· oracleasmlib-2.0.2-1.x86_64.rpm
· Drivers for kernel “2.6.9-55.ELsmp #1 SMP”
oracleasm-2.6.9-55.ELsmp-2.0.3-1.x86_64.rpm
4.7 Download OCFS packages
ocfs2 package from URL:-
http://oss.oracle.com/projects/ocfs2/files/RedHat/RHEL4/x86_64/old/1.2.7-1/2.6.9-55.EL/
- ocfs2-tools-1.2.7-1.el4.x86_64.rpm
- ocfs2-tools-debuginfo-1.2.7-1.el4.x86_64.rpm
- ocfs2console-1.2.7-1.el4.x86_64.rpm
4.8 Creating Required Operating System Groups and Users
Create oinstall and dba groups (not created osoper group as this is optional)
/usr/sbin/groupadd oinstall
/usr/sbin/groupadd dba
Create Oracle user
/usr/sbin/useradd -g oinstall -G dba -d /home/oracle -m oracle
This command will create user “oracle”, with group “oinstall” and supplementary group of “dba”. It will also create the home directory of “/home/oracle”. The default shell will be used - /bin/bash
Set Password
passwd oracle
4.9 Oracle required directory creation
Oracle software will be installed on the internal disk, in file system /u01.
The OCFS2 file system will be on the external disk and will be mounted as /u02.
On server’s sygora01 and rac02
mkdir -p /u01/app/oracle
chown -R oracle:oinstall /u01/app/oracle
chmod -R 775 /u01/app/oracle
mkdir /u02
chown oracle:dba /u02
chmod 775 /u02
4.10 Verifying That the User nobody Exists
1. To determine if the user exists, enter the following command:
id nobody
If this command displays information about the nobody user then do not create that user.
2. If the nobody user does not exist, then enter the following command to create it:
/usr/sbin/useradd nobody
4.11 Configuring SSH on Cluster Member Nodes For oracle
mkdir ~/.ssh
chmod 700 ~/.ssh
mkdir ~/.ssh
chmod 700 ~/.ssh
/usr/bin/ssh-keygen -t rsa
/usr/bin/ssh-keygen -t dsa
ssh rac01 cat /home/oracle/.ssh/id_rsa.pub >> authorized_keys
ssh rac01 cat /home/oracle/.ssh/id_dsa.pub >> authorized_keys
ssh rac02 cat /home/oracle/.ssh/id_rsa.pub >> authorized_keys
ssh rac02 cat /home/oracle/.ssh/id_dsa.pub >> authorized_keys
scp authorized_keys rac02:/home/oracle/.ssh/
chmod 600 ~/.ssh/ authorized_keys
exec /usr/bin/ssh-agent $SHELL
/usr/bin/ssh-add
chmod 700 ~/.ssh
ssh rac02 cat /home/oracle/.ssh/id_rsa.pub >> authorized_keys
ssh rac02 cat /home/oracle/.ssh/id_dsa.pub >> authorized_keys
ssh rac01 cat /home/oracle/.ssh/id_rsa.pub >> authorized_keys
ssh rac01 cat /home/oracle/.ssh/id_dsa.pub >> authorized_keys
scp authorized_keys rac01:/home/oracle/.ssh/
chmod 600 ~/.ssh/ authorized_keys
Then log onto node 2 and execute: ssh rac02
4.12 Configuring SSH on Cluster Member Nodes for root
kdir ~/.ssh
chmod 700 ~/.ssh
/usr/bin/ssh-keygen -t rsa
/usr/bin/ssh-keygen -t dsa
touch ~/.ssh/authorized_keys
On node 1:
ssh rac01 cat /root/.ssh/id_rsa.pub >> authorized_keys
Password:
ssh rac01 cat /root/.ssh/id_dsa.pub >> authorized_keys
Password:
ssh rac02 cat /root/.ssh/id_rsa.pub >> authorized_keys
Password:
ssh rac02 cat /root/.ssh/id_dsa.pub >> authorized_keys
Password:
scp authorized_keys rac02:/root/.ssh/
chmod 600 ~/.ssh/ authorized_keys
On node 2:
ssh rac02 cat /root/.ssh/id_rsa.pub >> authorized_keys
Password:
ssh rac02 cat /root/.ssh/id_dsa.pub >> authorized_keys
Password:
ssh rac01 cat /root/.ssh/id_rsa.pub >> authorized_keys
Password:
ssh rac01 cat /root/.ssh/id_dsa.pub >> authorized_keys
Password:
scp authorized_keys rac01:/root/.ssh/
chmod 600 ~/.ssh/ authorized_keys
exec /usr/bin/ssh-agent $SHELL
/usr/bin/ssh-add
Test ssh by logging onto node1 and executing following command:
ssh rac02
Then log onto node 2 and execute: ssh rac02
4.13 VNC setup
Start VNC services and login as oracle user
Connect to root
Start shell and execute “xhost +”
The xhost program is used to add and delete host names or user names to the list allowed to make connections to the X server.
Fire Xcalc to see it is working
Start new shell and login as oracle
Execute “xhost +”
Fire Xcalc to see it is working
4.14 Kernel parameters
Edit file /etc/sysctl.conf on both servers and add these lines:
kernel.shmall = 2097152
kernel.shmmax = 4294967296
kernel.shmmni = 4096
kernel.sem = 250 32000 100 128
fs.file-max = 65536
net.ipv4.ip_local_port_range = 1024 65000
net.core.rmem_default=262144
net.core.wmem_default=262144
net.core.rmem_max=262144
net.core.wmem_max=262144
Reboot both servers
init 6 or reboot
4.15 Verifying Hangcheck-timer Module on Kernel 2.6
As root run the following command to check if hangcheck loaded:
/sbin/lsmod | grep hang
If the hangcheck-timer module is not listed for any node, then enter a command similar to the following to start the module located in the directories of the current kernel version:
insmod /lib/modules/2.6.9-55.ELsmp/kernel/drivers/char/hangcheck-timer.ko hangcheck_tick=30 hangcheck_margin=180
lsmod | grep hang
The output should be similar to the following:
hangcheck_timer 5337 0
Now add "/sbin/modprobe hangcheck-timer” to /etc/rc.local
[root@rac02 ~]# echo "/sbin/modprobe hangcheck-timer" >> /etc/rc.local
4.16 Oracle user limits
Add the following lines to /etc/security/limits.conf file:
* soft nproc 2047
* hard nproc 16384
* soft nofile 1024
* hard nofile 65536
4.17 Installing the cvuqdisk Package for linux
Please download and install the operating system package cvuqdisk.
Check cvuqdisk is installed
rpm -qi cvuqdisk
It’s not installed if the command returns “package cvuqdisk is not installed”
If there is an existing version, remove it with
rpm -e cvuqdisk
Install package
cd /u01/10gr2/10gr2/clusterware/rpm
rpm -iv cvuqdisk-1.0.1-1.rpm
Once installed make sure following details returned for “rpm -qi cvuqdisk”
[root@rac01 ~]# rpm -qi cvuqdisk
Name : cvuqdisk Relocations: (not relocatable)
Version : 1.0.1 Vendor: Oracle Corp.
Release : 1 Build Date: Thu 02 Jun 2005 23:21:38 BST
Install Date: Tue 12 Feb 2008 14:00:36 GMT Build Host: stacs27.us.oracle.com
Group : none Source RPM: cvuqdisk-1.0.1-1.src.rpm
Size : 4168 License: Oracle Corp.
Signature : (none)
Summary : RPM file for cvuqdisk
Description :
This package contains the cvuqdisk program required by CVU.
cvuqdisk is a binary that assists CVU in finding scsi disks.
4.18 Disk Partitioning
fdisk –l
Disk /dev/cciss/c0d0: 73.3 GB, 73372631040 bytes
255 heads, 63 sectors/track, 8920 cylinders
Units = cylinders of 16065 * 512 = 8225280 bytes
Device Boot Start End Blocks Id System
/dev/cciss/c0d0p1 * 1 13 104391 83 Linux
/dev/cciss/c0d0p2 14 8920 71545477+ 8e Linux LVM
Disk /dev/dm-7: 187.9 GB, 187904819200 bytes
255 heads, 63 sectors/track, 22844 cylinders
Units = cylinders of 16065 * 512 = 8225280 bytes
Disk /dev/dm-7 doesn't contain a valid partition table
Disk /dev/dm-8: 429.4 GB, 429496729600 bytes
255 heads, 63 sectors/track, 52216 cylinders
Units = cylinders of 16065 * 512 = 8225280 bytes
Disk /dev/dm-8 doesn't contain a valid partition table
Disk /dev/dm-9: 1073 MB, 1073741824 bytes
34 heads, 61 sectors/track, 1011 cylinders
Units = cylinders of 2074 * 512 = 1061888 bytes
Disk /dev/dm-9 doesn't contain a valid partition table
Disk /dev/dm-10: 1073 MB, 1073741824 bytes
34 heads, 61 sectors/track, 1011 cylinders
Units = cylinders of 2074 * 512 = 1061888 bytes
Disk /dev/dm-10 doesn't contain a valid partition table
Disk /dev/dm-11: 1073 MB, 1073741824 bytes
34 heads, 61 sectors/track, 1011 cylinders
Units = cylinders of 2074 * 512 = 1061888 bytes
Disk /dev/dm-11 doesn't contain a valid partition table
Disk /dev/dm-12: 5368 MB, 5368709120 bytes
166 heads, 62 sectors/track, 1018 cylinders
Units = cylinders of 10292 * 512 = 5269504 bytes
Disk /dev/dm-12 doesn't contain a valid partition table
Disk /dev/dm-13: 5368 MB, 5368709120 bytes
166 heads, 62 sectors/track, 1018 cylinders
Units = cylinders of 10292 * 512 = 5269504 bytes
Disk /dev/dm-13 doesn't contain a valid partition table
4.19 Checking the Network Setup with CVU
[oracle@rac01 software]$ /u01/software/clusterware/cluvfy/runcluvfy.sh comp nodereach –n
rac01,rac02 -verbose
Verifying node reachability
Checking node reachability...
Check: Node reachability from node "rac01"
Destination Node Reachable?
------------------------------------ ------------------------
rac02 yes
rac01 yes
Result: Node reachability check passed from node "rac01".
Verification of node reachability was successful.
4.20 Checking the Hardware and Operating System Setup with CVU
Checking the Operating System Requirements Setup with CVU
Connect to oracle VNC and in terminal run the following command:
exec /usr/bin/ssh-agent $SHELL
/usr/bin/ssh-add
[oracle@rac01 ~]$ /u01/software/clusterware/cluvfy/runcluvfy.sh stage -post hwos -n rac01,rac02
Performing post-checks for hardware and operating system setup
Checking node reachability...
Node reachability check passed from node "rac01".
Checking user equivalence...
User equivalence check failed for user "oracle".
Check failed on nodes:
rac01
ARNING:
User equivalence is not set for nodes:
rac01
Verification will proceed with nodes:
rac02
Checking node connectivity...
Node connectivity check passed for subnet "10.13.100.0" with node(s) rac02.
Node connectivity check passed for subnet "192.168.100.0" with node(s) rac02.
Node connectivity check passed for subnet "172.16.2.0" with node(s) rac02.
Suitable interfaces for the private interconnect on subnet "10.13.100.0":
rac02 bond0:10.13.100.12
Suitable interfaces for the private interconnect on subnet "192.168.100.0":
rac02 bond1:192.168.100.12
Suitable interfaces for the private interconnect on subnet "172.16.2.0":
rac02 bond2:172.16.2.2
ERROR:
Could not find a suitable set of interfaces for VIPs.
Node connectivity check failed.
Checking shared storage accessibility...
Disk Sharing Nodes (1 in count)
------------------------------------ ------------------------
/dev/sda rac02
/dev/sdb rac02
/dev/sdc rac02
/dev/sdd rac02
/dev/sde rac02
/dev/sdf rac02
/dev/sdg rac02
Disk Sharing Nodes (1 in count)
------------------------------------ ------------------------
/dev/sdh rac02
/dev/sdi rac02
/dev/sdj rac02
/dev/sdk rac02
/dev/sdl rac02
/dev/sdm rac02
/dev/sdn rac02
Shared storage check was successful on nodes "rac02".
Post-check for hardware and operating system setup was unsuccessful on all the nodes.
VIP failed - Ignore - see Checking the Network Setup with CVU
Shared storage failed - See Verifying Shared Storage
VIP failed - Ignore - see Checking the Network Setup with CVU
Shared storage failed - See Verifying Shared Storage
4.21 Checking the Operating System Requirements with CVU
[oracle@rac01 ~]$ /u01/software/clusterware/cluvfy/runcluvfy.sh stage -post hwos -n rac01,rac02
Verifying system requirement
Checking system requirements for 'crs'...
Total memory check passed.
Free disk space check passed.
Swap space check passed.
System architecture check passed.
Kernel version check passed.
Package existence check passed for "binutils-2.15.92.0.2-13".
Group existence check passed for "dba".
Group existence check passed for "oinstall".
User existence check passed for "nobody".
System requirement passed for 'crs'
Verification of system requirement was successful.
/u01/software/clusterware/cluvfy/runcluvfy.sh comp ssa -n rac01,rac02 -s /dev/mapper/mpath0 –verbose
4.23 Verifying the Clusterware Requirements with CVU
cd /u01/10gr2/10gr2/clusterware/cluvfy
./runcluvfy.sh stage -pre crsinst -n rac01,rac02
Performing pre-checks for cluster services setup
Checking node reachability...
Node reachability check passed from node "rac01".
Checking user equivalence...
User equivalence check passed for user "oracle".
Checking administrative privileges...
User existence check passed for "oracle".
Group existence check passed for "oinstall".
Membership check for user "oracle" in group "oinstall" [as Primary] passed.
Administrative privileges check passed.
Checking node connectivity...
WARNING:
Make sure IP address "192.168.6.2" is up and is a valid IP address on node "rac01".
Node connectivity check failed for subnet "192.168.6.0".
Node connectivity check passed for subnet "192.168.5.0" with node(s) rac02,rac01.
Suitable interfaces for the private interconnect on subnet "192.168.5.0":
rac02 eth2:192.168.5.3
rac01 eth2:192.168.5.2
ERROR:
Could not find a suitable set of interfaces for VIPs.
Node connectivity check failed.
Checking system requirements for 'crs'...
Total memory check passed.
Free disk space check passed.
Swap space check passed.
System architecture check passed.
Kernel version check passed.
Package existence check passed for "binutils-2.15.92.0.2-13".
Group existence check passed for "dba".
Group existence check passed for "oinstall".
User existence check passed for "nobody".
System requirement passed for 'crs'
Pre-check for cluster services setup was unsuccessful on all the nodes.
VIP failed - Ignore - see Checking the Network Setup with CVU
4.24 ASM package install - Done
Install the 3 ASM packages on both servers using the following command:
[root@rac02 software]# rpm -Uvh oracleasm-support-2.0.3-1.x86_64.rpm \
oracleasmlib-2.0.2-1.x86_64.rpm \
oracleasm-2.6.9-55.ELsmp-2.0.3-1.x86_64.rpm
Preparing... ########################################### [100%]
1:oracleasm-support ########################################### [ 33%]
2:oracleasm-2.6.9-55.ELsm########################################### [ 67%]
3:oracleasmlib ########################################### [100%]
On both servers
[root@rac01 software]# /etc/init.d/oracleasm configure
Configuring the Oracle ASM library driver.
This will configure the on-boot properties of the Oracle ASM library
driver. The following questions will determine whether the driver is
loaded on boot and what permissions it will have. The current values
will be shown in brackets ('[]'). Hitting
answer will keep that current value. Ctrl-C will abort.
Default user to own the driver interface []: oracle
Default group to own the driver interface []: dba
Start Oracle ASM library driver on boot (y/n) [n]: y
Fix permissions of Oracle ASM disks on boot (y/n) [y]: y
Writing Oracle ASM library driver configuration: [ OK ]
Creating /dev/oracleasm mount point: [ OK ]
Loading module "oracleasm": [ OK ]
Mounting ASMlib driver filesystem: [ OK ]
Scanning system for ASM disks: [ OK ]
Disk Matrix at RAC01, RAC02
Mpath | Size | Usage |
mpath2 | 1GB | VOTE3 |
mpath1 | 1GB | VOTE2 |
mpath0 | 1GB | VOTE1 |
mpath6 | 400GB | ASM1 |
mpath5 | 175GB | ASM2 |
mpath4 | 5GB | OCR1 |
mpath3 | 5GB | OC2 |
On RAC01
[root@rac01 software]# /etc/init.d/oracleasm createdisk VOL1 /dev/mapper/mpath5
Marking disk "/dev/mapper/mpath5" as an ASM disk: [ OK ]
[root@rac01 software]# /etc/init.d/oracleasm createdisk VOL1 /dev/mapper/mpath6
root@rac01 software]# /etc/init.d/oracleasm createdisk VOL2 /dev/mapper/mpath6
Marking disk "/dev/mapper/mpath6" as an ASM disk: [ OK ]
On RAC02
[root@rac02 ~]# /etc/init.d/oracleasm scandisks
Scanning system for ASM disks: [ OK ]
4.24.1 Configuring the Scan Order
The Oracle ASMLib configuration file is located at /etc/sysconfig/oracleasm.
The configuration file contains many configuration variables. The ORACLEASM_SCANORDER variable specifies disks to be scanned first. The ORACLEASM_SCANEXCLUDE variable specifies the disks that are to be ignored.
Multipath Disks First
Edit the ORACLEASM_SCANORDER variable to configure ASMLib to scan the multipath disks first:
ORACLEASM_SCANORDER="multipath sd"
4.25 OCFS package install
On both rac01 and rac02, run the following command to install the 3 packages:-
[root@rac01 software]# rpm -Uvh ocfs2-2.6.9-55.ELsmp-1.2.8-2.el4.x86_64.rpm \
ocfs2-2.6.9-55.EL-debuginfo-1.2.8-2.el4.x86_64.rpm \
ocfs2console-1.2.7-1.el4.x86_64.rpm \
ocfs2-tools-1.2.7-1.el4.x86_64.rpm \
ocfs2-tools-debuginfo-1.2.7-1.el4.x86_64.rpm \
Preparing... ########################################### [100%]
1:ocfs2-tools ########################################### [ 20%]
2:ocfs2-2.6.9-55.ELsmp ########################################### [ 40%]
3:ocfs2-2.6.9-55.EL-debug########################################### [ 60%]
4:ocfs2console ########################################### [ 80%]
5:ocfs2-tools-debuginfo ########################################### [100%]
On both servers
Disable SELinux, run the "Security Level Configuration" GUI utility:
# /usr/bin/system-config-securitylevel &
or from console / Applications / System Settings / Security Level
Now, click the SELinux tab and check off the "Enabled" checkbox. After clicking on [OK], you will be presented with a warning dialog. Simply acknowledge this warning by clicking "Yes". Your screen should now look like the following after disabling the SELinux option:
Reboot both servers
init 6 or reboot
4.27 OCFS2 Configuration
OCFS2 is the file system used for the Voting disk and CSS. Oracle provides a tool, ocfs2console to setup and configure the usage of the file system.
Perform these tasks at only node RAC01
Connect to root VNC and run the following commands:
exec /usr/bin/ssh-agent $SHELL
/usr/bin/ssh-add
ocfs2console
On both servers as root
[root@rac01 ~]# /etc/init.d/o2cb configure
Configuring the O2CB driver.
This will configure the on-boot properties of the O2CB driver.
The following questions will determine whether the driver is loaded on
boot. The current values will be shown in brackets ('[]'). Hitting
will abort.
Load O2CB driver on boot (y/n) [y]: y
Cluster to start on boot (Enter "none" to clear) [ocfs2]:
Specify heartbeat dead threshold (>=7) [31]:
Specify network idle timeout in ms (>=5000) [30000]:
Specify network keepalive delay in ms (>=1000) [2000]:
Specify network reconnect delay in ms (>=2000) [2000]:
Writing O2CB configuration: OK
O2CB cluster ocfs2 already online
4.27.1 OCFS2 commands
To check the status of the cluster, do:
[root@rac01 software]# /etc/init.d/o2cb status
Module "configfs": Loaded
Filesystem "configfs": Mounted
Module "ocfs2_nodemanager": Loaded
Module "ocfs2_dlm": Loaded
Module "ocfs2_dlmfs": Loaded
Filesystem "ocfs2_dlmfs": Mounted
Checking O2CB cluster ocfs2: Online
Heartbeat dead threshold: 31
Network idle timeout: 30000
Network keepalive delay: 2000
Network reconnect delay: 2000
Checking O2CB heartbeat: Not active
To load the modules, do:
/etc/init.d/o2cb load
To online cluster ocfs2, do:
/etc/init.d/o2cb online ocfs2
Starting cluster ocfs2: OK
To offline cluster ocfs2, do:
/etc/init.d/o2cb offline ocfs2
Cleaning heartbeat on ocfs2: OK
If the cluster is setup to load on boot, one could start and stop cluster ocfs2, as follows:
Loading module "configfs": OK
Mounting configfs filesystem at /config: OK
Loading module "ocfs2_nodemanager": OK
Loading module "ocfs2_dlm": OK
Loading module "ocfs2_dlmfs": OK
Mounting ocfs2_dlmfs filesystem at /dlm: OK
Starting cluster ocfs2: OK
/etc/init.d/o2cb stop
Cleaning heartbeat on ocfs2: OK
Stopping cluster ocfs2: OK
Unmounting ocfs2_dlmfs filesystem: OK
Unloading module "ocfs2_dlmfs": OK
Unmounting configfs filesystem: OK
Unloading module "configfs": OK
4.28 OCFS2 File system format
Format command needs O2CB cluster started and online as it needs to check the volume is not mounted on some node in the cluster.
Connect to root VNC and run the following commands:
ocfs2console
If list of devices presented is wrong please use following command to manually format the OCFS volumes.
Create 3 Voting Disk volumes as below
# mkfs.ocfs2 -L "vote1” /dev/mapper/mpath0
# mkfs.ocfs2 -L "vote2” /dev/mapper/mpath1
# mkfs.ocfs2 -L "vote3” /dev/mapper/mpath2
Create 2 OCR volumes as below
# mkfs.ocfs2 -L "ocr1” /dev/mapper/mpath4
# mkfs.ocfs2 -L "ocr2” /dev/mapper/mpath3
http://oss.oracle.com/projects/ocfs2/dist/documentation/ocfs2_faq.html#CONFIGURE
[root@rac01 software]# mkfs.ocfs2 -L "vote1" /dev/mapper/mpath0
mkfs.ocfs2 1.2.7
Filesystem label=vote1
Block size=4096 (bits=12)
Cluster size=4096 (bits=12)
Volume size=1073741824 (262144 clusters) (262144 blocks)
9 cluster groups (tail covers 4096 clusters, rest cover 32256 clusters)
Journal size=67108864
Initial number of node slots: 4
Creating bitmaps: done
Initializing superblock: done
Writing system files: done
Writing superblock: done
Writing backup superblock: 0 block(s)
Formatting Journals: done
Writing lost+found: done
mkfs.ocfs2 successful
[root@rac01 software]# mkfs.ocfs2 -L "vote2" /dev/mapper/mpath1
mkfs.ocfs2 1.2.7
Filesystem label=vote2
Block size=4096 (bits=12)
Cluster size=4096 (bits=12)
Volume size=1073741824 (262144 clusters) (262144 blocks)
9 cluster groups (tail covers 4096 clusters, rest cover 32256 clusters)
Journal size=67108864
Initial number of node slots: 4
Creating bitmaps: done
Initializing superblock: done
Writing system files: done
Writing superblock: done
Writing backup superblock: 0 block(s)
Formatting Journals: done
Writing lost+found: done
mkfs.ocfs2 successful
[root@rac01 software]# mkfs.ocfs2 -L "vote3" /dev/mapper/mpath2
mkfs.ocfs2 1.2.7
Filesystem label=vote3
Block size=4096 (bits=12)
Cluster size=4096 (bits=12)
Volume size=1073741824 (262144 clusters) (262144 blocks)
9 cluster groups (tail covers 4096 clusters, rest cover 32256 clusters)
Journal size=67108864
Initial number of node slots: 4
Creating bitmaps: done
Initializing superblock: done
Writing system files: done
riting superblock: done
Writing backup superblock: 0 block(s)
Formatting Journals: done
Writing lost+found: done
mkfs.ocfs2 successful
[root@rac01 software]# mkfs.ocfs2 -L "ocr1" /dev/mapper/mpath4
mkfs.ocfs2 1.2.7
Filesystem label=ocr1
Block size=4096 (bits=12)
Cluster size=4096 (bits=12)
Volume size=5368709120 (1310720 clusters) (1310720 blocks)
41 cluster groups (tail covers 20480 clusters, rest cover 32256 clusters)
Journal size=67108864
Initial number of node slots: 4
Creating bitmaps: done
Initializing superblock: done
Writing system files: done
Writing superblock: done
Writing backup superblock: 2 block(s)
Formatting Journals: done
Writing lost+found: done
mkfs.ocfs2 successful
[root@rac01 software]# mkfs.ocfs2 -L "ocr2" /dev/mapper/mpath3
mkfs.ocfs2 1.2.7
Filesystem label=ocr2
Block size=4096 (bits=12)
Cluster size=4096 (bits=12)
Volume size=5368709120 (1310720 clusters) (1310720 blocks)
41 cluster groups (tail covers 20480 clusters, rest cover 32256 clusters)
Journal size=67108864
Initial number of node slots: 4
Creating bitmaps: done
Initializing superblock: done
Writing system files: done
Writing superblock: done
Writing backup superblock: 2 block(s)
Formatting Journals: done
Writing lost+found: done
mkfs.ocfs2 successful
4.29 OCFS2 File system mount
[root@rac01 software]# mkdir /u02
[root@rac01 software]# mkdir -p /u02/vote1
[root@rac01 software]# mkdir -p /u02/vote2
[root@rac01 software]# mkdir -p /u02/vote3
[root@rac01 software]# mkdir -p /u02/ocr1
[root@rac01 software]# mkdir -p /u02/ocr2
[root@rac01 software]# chown -R oracle:oinstall /u02/vote1 /u02/vote2 /u02/vote3 /u02/ocr1 /u02/ocr2
[root@rac01 software]# chmod -R 775 /u02/vote1 /u02/vote2 /u02/vote3 /u02/ocr1 /u02/ocr2
Mpath Size Usage
mpath0 1GB vote1
mpath1 1GB vote2
mpath2 1GB vote3
mpath4 5GB ocr1
mpath3 5GB ocr2
To mount run the following command as root on both servers:
mount -t ocfs2 -o datavolume,nointr /dev/mapper/mpath0 /u02/vote1
mount -t ocfs2 -o datavolume,nointr /dev/mapper/mpath1 /u02/vote2
mount -t ocfs2 -o datavolume,nointr /dev/mapper/mpath2 /u02/vote3
mount -t ocfs2 -o datavolume,nointr /dev/mapper/mpath3 /u02/ocr2
mount -t ocfs2 -o datavolume,nointr /dev/mapper/mpath4 /u02/ocr1
To unmount run the following command as root:
umount /u02
[root@rac01 /]# df -k
Filesystem 1K-blocks Used Available Use% Mounted on
/dev/mapper/VolGroup00-LogVol00
20642428 2847104 16746748 15% /
/dev/cciss/c0d0p1 101086 21753 74114 23% /boot
none 8197640 0 8197640 0% /dev/shm
/dev/mapper/VolGroup00-LogVol02
2064208 36060 1923292 2% /home
/dev/mapper/VolGroup00-LogVol03
1032088 171424 808236 18% /opt
/dev/mapper/VolGroup00-LogVol05
4128448 2798528 1120208 72% /tmp
/dev/mapper/VolGroup00-LogVol06
23932052 1206840 21509520 6% /u01
/dev/mapper/VolGroup00-LogVol04
2064208 154664 1804688 8% /var
/dev/mapper/mpath0 1048576 268156 780420 26% /u02/vote1
/dev/mapper/mpath1 1048576 268156 780420 26% /u02/vote2
/dev/mapper/mpath2 1048576 268156 780420 26% /u02/vote3
/dev/mapper/mpath3 5242880 268292 4974588 6% /u02/ocr2
/dev/mapper/mpath4 5242880 268292 4974588 6% /u02/ocr1
[root@rac01 /]# umount /u02/vote1
[root@rac01 /]# umount /u02/vote2
[root@rac01 /]# umount /u02/vote3
[root@rac01 /]# umount /u02/ocr1
[root@rac01 /]# umount /u02/ocr2
Edit /etc/fstab on both servers and added
/dev/mapper/mpath0 /u02/vote1 ocfs2 _netdev,datavolume,nointr 0 0
/dev/mapper/mpath1 /u02/vote2 ocfs2 _netdev,datavolume,nointr 0 0
/dev/mapper/mpath2 /u02/vote3 ocfs2 _netdev,datavolume,nointr 0 0
/dev/mapper/mpath3 /u02/ocr2 ocfs2 _netdev,datavolume,nointr 0 0
/dev/mapper/mpath4 /u02/ocr1 ocfs2 _netdev,datavolume,nointr 0 0
The first field (/dev/hdc) is the physical device/remote filesystem which is to be described.
The second field (/mnt/cdrom) specifies the mount point where the filesystem will be mounted.
The third field (iso9660) is the type of filesystem on the device from the first field.
The fourth field (noauto,ro,user) is a (default) list of options which mount should use when mounting the filesystem.
The fifth field (0) is used by dump (a backup utility) to decide if a filesystem should be backed up. If zero then dump will ignore that filesystem. The sixth field (0) is used by fsck (the filesystem check utility) to determine the order in which filesystems should be checked.
Check server configuration for startup has OCFS2 and O2CB as “on” for runlevels 3 and 5:
chkconfig --list o2cb
o2cb 0:off 1:off 2:on 3:on 4:on 5:on 6:off
chkconfig --list ocfs2
ocfs2 0:off 1:off 2:on 3:on 4:on 5:on 6:off
Reboot and confirm /u02 has mounted.
5 Installation
Installation will be progressed in 3 parts,
- CRS install at separate base directory /u01/crs
- ASM install in a separate $ASM_HOME from database home ($ORACLE_HOME)
- DB install at $ORACLE_HOME
5.1 CRS install
exec /usr/bin/ssh-agent $SHELL
/usr/bin/ssh-add
cd /u01/10gr2/10gr2/clusterware
./runInstaller
Has 'rootpre.sh' been run by root? [y/n] (n)
Y
change Inventory Directory to
/u01/app/oracle/oraInventory
and group to
dba
Next
/u01/crs/oracle/product/10/app
Next
All OK
Edit rac01 private and virtual names so priv-{node} and vip-{node}.
The reason for the change is there is less chance of selecting incorrect connection name.
Add rac02,
with Private node name of orapriv02
and virtual host name of oravip02
Select bond0 for public ,bond2 for Private interface.
Please run these scripts as “root” and click OK when done.
5.1.1 VIPCA
exec /usr/bin/ssh-agent $SHELL
/usr/bin/ssh-add
Click IP Alias Name for rac01 and enter oravip01, the rest is filled in when you tab
Ifconfig should return a new VIP address configured
[root@rac01 ~]# ifconfig
bond0 Link encap:Ethernet HWaddr 00:1B:78:95:0E:3A
inet addr:10.13.100.11 Bcast:10.13.100.255 Mask:255.255.255.0
inet6 addr: fe80::200:ff:fe00:0/64 Scope:Link
UP BROADCAST RUNNING MASTER MULTICAST MTU:1500 Metric:1
RX packets:1999421 errors:0 dropped:0 overruns:0 frame:0
TX packets:3637692 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:0
RX bytes:140532768 (134.0 MiB) TX bytes:1230836921 (1.1 GiB)
bond0:1 Link encap:Ethernet HWaddr 00:1B:78:95:0E:3A
inet addr:10.13.100.13 Bcast:10.13.100.255 Mask:255.255.255.0
UP BROADCAST RUNNING MASTER MULTICAST MTU:1500 Metric:1
5.2 ASM Install
With Oracle Database 10g Release 2 (10.2), Automatic Storage Management should be installed in a separate ASM home directory.
While carrying install Oracle returned following error
ASM1 instance could start on node 1 but not on node 2(RAC02)
SR Number 18563922.6 raised and solution suggested was adding following ASM string at ASM1,ASM2 init files.
asm_diskstring = '/dev/oracleasm/disks/*'
LSNRCTL> start LISTENER_RAC02
Starting /u01/app/oracle/product/10.2.0/asm/bin/tnslsnr: please wait...
TNSLSNR for Linux: Version 10.2.0.1.0 - Production
System parameter file is /u01/app/oracle/product/10.2.0/asm/network/admin/listener.ora
Log messages written to /u01/app/oracle/product/10.2.0/asm/network/log/listener_rac02.log
Listening on: (DESCRIPTION=(ADDRESS=(PROTOCOL=ipc)(KEY=EXTPROC1)))
Listening on: (DESCRIPTION=(ADDRESS=(PROTOCOL=tcp)(HOST=10.13.100.14)(PORT=1521)))
Listening on: (DESCRIPTION=(ADDRESS=(PROTOCOL=tcp)(HOST=10.13.100.12)(PORT=1521)))
Connecting to (DESCRIPTION=(ADDRESS=(PROTOCOL=IPC)(KEY=EXTPROC1)))
STATUS of the LISTENER
------------------------
Alias LISTENER_RAC02
Version TNSLSNR for Linux: Version 10.2.0.1.0 - Production
Start Date 22-FEB-2008 13:00:11
Uptime 0 days 0 hr. 0 min. 0 sec
Trace Level off
Security ON: Local OS Authentication
SNMP ON
Listener Parameter File /u01/app/oracle/product/10.2.0/asm/network/admin/listener.ora
Listener Log File /u01/app/oracle/product/10.2.0/asm/network/log/listener_rac02.log
Listening Endpoints Summary...
(DESCRIPTION=(ADDRESS=(PROTOCOL=ipc)(KEY=EXTPROC1)))
(DESCRIPTION=(ADDRESS=(PROTOCOL=tcp)(HOST=10.13.100.14)(PORT=1521)))
(DESCRIPTION=(ADDRESS=(PROTOCOL=tcp)(HOST=10.13.100.12)(PORT=1521)))
Services Summary...
Service "PLSExtProc" has 1 instance(s).
Instance "PLSExtProc", status UNKNOWN, has 1 handler(s) for this service...
The command completed successfully
5.3 Install Database Software
Connect as oracle to rac01 VNC session & start Database Configuration Assistant
5.4 Create RAC Database
Confirm Cluster Installation with both rac01 and rac01 selected, Next
Change processes from 150 to 500, 8192 Bytes Block size (default)
Default for Connection Mode (dedicated Server Mode)
Click OK
Click exit - will take about 5 minutes to finish and return to unix prompt.
6 Scripts and profile files
5.4 .bash_profile rac01
$HOME/.bash_profile at Rac01
export PATH
unset USERNAME
CRS_HOME=/u01/crs/oracle/product/10/crs ; export CRS_HOME
ASM_HOME=/u01/app/oracle/product/10.2.0/asm ; export ASM_HOME
RDBMS_HOME=/u01/app/oracle/product/10.2.0/db_1 ; export RDBMS_HOME
# Now set ORACLE_HOME to RDBMS
ORACLE_HOME=$RDBMS_HOME ; export ORACLE_HOME
ORACLE_SID=prod1 ; export ORACLE_SID
# set LD_LIBRARY_PATH (as oraenv)
case "$LD_LIBRARY_PATH" in
*$OLDHOME/lib*) LD_LIBRARY_PATH=`echo $LD_LIBRARY_PATH | \
sed "s;$OLDHOME/lib;$ORACLE_HOME/lib;g"` ;;
*$ORACLE_HOME/lib*) ;;
"") LD_LIBRARY_PATH=$ORACLE_HOME/lib ;;
*) LD_LIBRARY_PATH=$ORACLE_HOME/lib:$LD_LIBRARY_PATH ;;
esac
PATH=$PATH:/usr/local/bin:$ORACLE_HOME/bin:$CRS_HOME/bin:$ASM_HOME/bin.
export PATH
unset USERNAME
5.5 .bash_profile rac02
export PATH
unset USERNAME
CRS_HOME=/u01/crs/oracle/product/10/crs ; export CRS_HOME
ASM_HOME=/u01/app/oracle/product/10.2.0/asm ; export ASM_HOME
RDBMS_HOME=/u01/app/oracle/product/10.2.0/db_1 ; export RDBMS_HOME
# Now set ORACLE_HOME to RDBMS
ORACLE_HOME=$RDBMS_HOME ; export ORACLE_HOME
ORACLE_SID=prod2 ; export ORACLE_SID
# set LD_LIBRARY_PATH (as oraenv)
case "$LD_LIBRARY_PATH" in
*$OLDHOME/lib*) LD_LIBRARY_PATH=`echo $LD_LIBRARY_PATH | \
sed "s;$OLDHOME/lib;$ORACLE_HOME/lib;g"` ;;
*$ORACLE_HOME/lib*) ;;
"") LD_LIBRARY_PATH=$ORACLE_HOME/lib ;;
*) LD_LIBRARY_PATH=$ORACLE_HOME/lib:$LD_LIBRARY_PATH ;;
esac
PATH=$PATH:/usr/local/bin:$ORACLE_HOME/bin:$CRS_HOME/bin:$ASM_HOME/bin.
export PATH
unset USERNAME
6 RAC Infrastructure Testing : Following tests were carried out on one RAC node at a time
6.1 RAC Voting Disk Test :
Test ID | Category | Cause of Failure | Method of Test | Database Crash |
V1 | Software\Disk | Failure of SAN storage system | Disconnect SAN HBA Switch to Server RAC01 or RAC02, one at a Time | NO |
6.2 RAC Cluster Registry Test
Test ID | Category | Cause of Failure | Method of Test | Database Crash |
C1 | Software\Disk | Failure of SAN storage system | Disconnect SAN HBA Switch to Server RAC01 or RAC02, one at a Time | NO |
6.3 RAC ASM Tests
Test ID | Category | Cause of Failure | Method of Test | Database Crash |
A1 | Software\Disk | Failure of SAN storage system | Disconnect SAN HBA Switch to Server RAC01 or RAC02, one at a Time | NO |
6.4 RAC Interconnect Tests
Test ID | Category | Cause of Failure | Method of Test | Server Crash |
N1 | Private Network | Loss of Private network | Pull PRIVATE port cable from RAC01 | NO CRASHRAC moved database connections to RAC01. |
N2 | Public Network | Loss of Public network | Pull PUBLIC port cable from RAC01 | NO CRASHRAC moved database connections from node 1 to Node 2 |
N3 | Private Network | Loss of Private network | Pull PRIVATE port cable from RAC02. | NO CRASHRAC moved database connections to RAC02 |
N4 | PublicNetwork | Loss of Public network | Pull PUBLIC port cable from RAC02 | NO CRASHRAC moved database connections from node 2 to Node 1 |
Appendix
Errors and warnings encountered and cause and required actions
1. OCR/Voting disk volumes were accessible by rac02 server
After restart oracle lost 2 volumes - /devmapper/mpath1 & /devmapper/mpath3
[root@rac01 ~]# mount -t ocfs2 -o datavolume,nointr /dev/mapper/mpath1 /u02/vote2
mount.ocfs2: Device name specified was not found while opening device /dev/mapper/mpath1
[root@rac01 ~]# mount -t ocfs2 -o datavolume,nointr /dev/mapper/mpath2 /u02/vote3
[root@rac01 ~]# mount -t ocfs2 -o datavolume,nointr /dev/mapper/mpath3 /u02/ocr2
mount.ocfs2: Device name specified was not found while opening device /dev/mapper/mpath3
[root@rac01 ~]# mount -t ocfs2 -o datavolume,nointr /dev/mapper/mpath4 /u02/ocr1
[root@rac01 mapper]# ls -lrt | grep mpath
brw-rw---- 1 root disk 253, 13 Feb 13 18:07 mpath6
brw-rw---- 1 root disk 253, 12 Feb 13 18:07 mpath5
brw-rw---- 1 root disk 253, 11 Feb 13 18:07 mpath4
brw-rw---- 1 root disk 253, 9 Feb 13 18:07 mpath2
brw-rw---- 1 root disk 253, 7 Feb 13 18:07 mpath0
This was a device mapper issue at SAN level.
2. RAC entire cluster went down while carrying public network test
Private network disabled at rac01
- Existing connection were migrated from prod1 to prod2
- Something happened and RAC2 also rebooted, lost entire RAC database.
SQL> select instance_name from v$instance;
INSTANCE_NAME
—————-
prod1
SQL> select instance_name from v$instance;
select instance_name from v$instance
ERROR at line 1:
ORA-03135: connection lost contact
SQL> select instance_name from v$instance;
INSTANCE_NAME
—————-
prod2
Investigation: OCFS2 requires the nodes to be alive on network and sends regular keep alive packets to ensure that they are there. When one node disappears on the network, it lead to node-self-fencing hence when public network was disabled from first node, OCFS2 sends shutdown signal to second node
Solution :
-Use interconnect IP address to configure OCFS2 in /etc/ocfs2/cluster.conf instead of using public IP address
-reboot Nodes and do the test again
Testing : To change heartbeat IP for OCFS make changes at both nodes & reboot both servers. Once OCFS was on private interconnect IP address, everything should work fine.
OLD /etc/ocfs2/cluster.conf FILE with Public IP Addresses
node:
ip_port = 7777
ip_address = 10.13.100.11
number = 0
name = rac01
cluster = ocfs2
node:
ip_port = 7777
ip_address = 10.13.100.12
number = 1
name = rac02
cluster = ocfs2
cluster:
node_count = 2
name = ocfs2
New /etc/ocfs2/cluster.conf with Private Interconnect IP Addresses
node:
ip_port = 7777
ip_address = 172.16.2.1
number = 0
name = rac01
cluster = ocfs2
node:
ip_port = 7777
ip_address = 172.16.2.2
number = 1
name = rac02
cluster = ocfs2
cluster:
node_count = 2
name = ocfs2